blob: f31917a02f2b9719dce9ff6b6a4639ee8d2fabb9 [file] [log] [blame]
Jingning Han3ee6db62015-08-05 19:00:31 -07001/*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <math.h>
13
14#include "./vp10_rtcd.h"
15#include "./vpx_dsp_rtcd.h"
16
Johannc5f11912015-08-31 14:36:35 -070017#include "vpx_dsp/vpx_dsp_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070018#include "vpx_mem/vpx_mem.h"
19#include "vpx_ports/mem.h"
Jingning Han3acfe462015-08-12 09:20:31 -070020#include "vpx_ports/system_state.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070021
Jingning Han54d66ef2015-08-06 21:14:07 -070022#include "vp10/common/common.h"
23#include "vp10/common/entropy.h"
24#include "vp10/common/entropymode.h"
25#include "vp10/common/idct.h"
26#include "vp10/common/mvref_common.h"
27#include "vp10/common/pred_common.h"
28#include "vp10/common/quant_common.h"
29#include "vp10/common/reconinter.h"
30#include "vp10/common/reconintra.h"
31#include "vp10/common/scan.h"
32#include "vp10/common/seg_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070033
Jingning Han54d66ef2015-08-06 21:14:07 -070034#include "vp10/encoder/cost.h"
35#include "vp10/encoder/encodemb.h"
36#include "vp10/encoder/encodemv.h"
37#include "vp10/encoder/encoder.h"
38#include "vp10/encoder/mcomp.h"
hui su5d011cb2015-09-15 12:44:13 -070039#include "vp10/encoder/palette.h"
Jingning Han54d66ef2015-08-06 21:14:07 -070040#include "vp10/encoder/quantize.h"
41#include "vp10/encoder/ratectrl.h"
42#include "vp10/encoder/rd.h"
43#include "vp10/encoder/rdopt.h"
44#include "vp10/encoder/aq_variance.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070045
46#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
47 (1 << INTRA_FRAME))
48#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
49 (1 << INTRA_FRAME))
50#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
51 (1 << INTRA_FRAME))
52
53#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
54
55#define MIN_EARLY_TERM_INDEX 3
56#define NEW_MV_DISCOUNT_FACTOR 8
57
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070058#if CONFIG_EXT_TX
Debargha Mukherjeeb8bc0262015-09-11 08:32:56 -070059const double ext_tx_th = 0.98;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070060#endif
61
Jingning Han3ee6db62015-08-05 19:00:31 -070062typedef struct {
63 PREDICTION_MODE mode;
64 MV_REFERENCE_FRAME ref_frame[2];
65} MODE_DEFINITION;
66
67typedef struct {
68 MV_REFERENCE_FRAME ref_frame[2];
69} REF_DEFINITION;
70
71struct rdcost_block_args {
Jingning Han71c15602015-10-13 12:40:39 -070072#if CONFIG_VAR_TX
73 const VP10_COMP *cpi;
74#endif
Jingning Han3ee6db62015-08-05 19:00:31 -070075 MACROBLOCK *x;
76 ENTROPY_CONTEXT t_above[16];
77 ENTROPY_CONTEXT t_left[16];
78 int this_rate;
79 int64_t this_dist;
80 int64_t this_sse;
81 int64_t this_rd;
82 int64_t best_rd;
83 int exit_early;
84 int use_fast_coef_costing;
85 const scan_order *so;
86 uint8_t skippable;
87};
88
89#define LAST_NEW_MV_INDEX 6
90static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
91 {NEARESTMV, {LAST_FRAME, NONE}},
92 {NEARESTMV, {ALTREF_FRAME, NONE}},
93 {NEARESTMV, {GOLDEN_FRAME, NONE}},
94
95 {DC_PRED, {INTRA_FRAME, NONE}},
96
97 {NEWMV, {LAST_FRAME, NONE}},
98 {NEWMV, {ALTREF_FRAME, NONE}},
99 {NEWMV, {GOLDEN_FRAME, NONE}},
100
101 {NEARMV, {LAST_FRAME, NONE}},
102 {NEARMV, {ALTREF_FRAME, NONE}},
103 {NEARMV, {GOLDEN_FRAME, NONE}},
104
105 {ZEROMV, {LAST_FRAME, NONE}},
106 {ZEROMV, {GOLDEN_FRAME, NONE}},
107 {ZEROMV, {ALTREF_FRAME, NONE}},
108
109 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
110 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
111
112 {TM_PRED, {INTRA_FRAME, NONE}},
113
114 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
115 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
116 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
117 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
118
119 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
120 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
121
122 {H_PRED, {INTRA_FRAME, NONE}},
123 {V_PRED, {INTRA_FRAME, NONE}},
124 {D135_PRED, {INTRA_FRAME, NONE}},
125 {D207_PRED, {INTRA_FRAME, NONE}},
126 {D153_PRED, {INTRA_FRAME, NONE}},
127 {D63_PRED, {INTRA_FRAME, NONE}},
128 {D117_PRED, {INTRA_FRAME, NONE}},
129 {D45_PRED, {INTRA_FRAME, NONE}},
130};
131
132static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
133 {{LAST_FRAME, NONE}},
134 {{GOLDEN_FRAME, NONE}},
135 {{ALTREF_FRAME, NONE}},
136 {{LAST_FRAME, ALTREF_FRAME}},
137 {{GOLDEN_FRAME, ALTREF_FRAME}},
138 {{INTRA_FRAME, NONE}},
139};
140
hui su5d011cb2015-09-15 12:44:13 -0700141static INLINE int write_uniform_cost(int n, int v) {
142 int l = get_unsigned_bits(n), m = (1 << l) - n;
143 if (l == 0)
144 return 0;
145 if (v < m)
146 return (l - 1) * vp10_cost_bit(128, 0);
147 else
148 return l * vp10_cost_bit(128, 0);
149}
150
Jingning Han3ee6db62015-08-05 19:00:31 -0700151static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
152 int m, int n, int min_plane, int max_plane) {
153 int i;
154
155 for (i = min_plane; i < max_plane; ++i) {
156 struct macroblock_plane *const p = &x->plane[i];
157 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
158
159 p->coeff = ctx->coeff_pbuf[i][m];
160 p->qcoeff = ctx->qcoeff_pbuf[i][m];
161 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
162 p->eobs = ctx->eobs_pbuf[i][m];
163
164 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
165 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
166 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
167 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
168
169 ctx->coeff_pbuf[i][n] = p->coeff;
170 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
171 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
172 ctx->eobs_pbuf[i][n] = p->eobs;
173 }
174}
175
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700176static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700177 MACROBLOCK *x, MACROBLOCKD *xd,
178 int *out_rate_sum, int64_t *out_dist_sum,
179 int *skip_txfm_sb, int64_t *skip_sse_sb) {
180 // Note our transform coeffs are 8 times an orthogonal transform.
181 // Hence quantizer step is also 8 times. To get effective quantizer
182 // we need to divide by 8 before sending to modeling function.
183 int i;
184 int64_t rate_sum = 0;
185 int64_t dist_sum = 0;
186 const int ref = xd->mi[0]->mbmi.ref_frame[0];
187 unsigned int sse;
188 unsigned int var = 0;
189 unsigned int sum_sse = 0;
190 int64_t total_sse = 0;
191 int skip_flag = 1;
192 const int shift = 6;
193 int rate;
194 int64_t dist;
195 const int dequant_shift =
196#if CONFIG_VP9_HIGHBITDEPTH
197 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
198 xd->bd - 5 :
199#endif // CONFIG_VP9_HIGHBITDEPTH
200 3;
201
202 x->pred_sse[ref] = 0;
203
204 for (i = 0; i < MAX_MB_PLANE; ++i) {
205 struct macroblock_plane *const p = &x->plane[i];
206 struct macroblockd_plane *const pd = &xd->plane[i];
207 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
208 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
209 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
210 const int64_t dc_thr = p->quant_thred[0] >> shift;
211 const int64_t ac_thr = p->quant_thred[1] >> shift;
212 // The low thresholds are used to measure if the prediction errors are
213 // low enough so that we can skip the mode search.
James Zern5e16d392015-08-17 18:19:22 -0700214 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
215 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700216 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
217 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
218 int idx, idy;
219 int lw = b_width_log2_lookup[unit_size] + 2;
220 int lh = b_height_log2_lookup[unit_size] + 2;
221
222 sum_sse = 0;
223
224 for (idy = 0; idy < bh; ++idy) {
225 for (idx = 0; idx < bw; ++idx) {
226 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
227 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
228 int block_idx = (idy << 1) + idx;
229 int low_err_skip = 0;
230
231 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
232 dst, pd->dst.stride, &sse);
233 x->bsse[(i << 2) + block_idx] = sse;
234 sum_sse += sse;
235
236 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
237 if (!x->select_tx_size) {
238 // Check if all ac coefficients can be quantized to zero.
239 if (var < ac_thr || var == 0) {
240 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
241
242 // Check if dc coefficient can be quantized to zero.
243 if (sse - var < dc_thr || sse == var) {
244 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
245
246 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
247 low_err_skip = 1;
248 }
249 }
250 }
251
252 if (skip_flag && !low_err_skip)
253 skip_flag = 0;
254
255 if (i == 0)
256 x->pred_sse[ref] += sse;
257 }
258 }
259
260 total_sse += sum_sse;
261
262 // Fast approximate the modelling function.
263 if (cpi->sf.simple_model_rd_from_var) {
264 int64_t rate;
265 const int64_t square_error = sum_sse;
266 int quantizer = (pd->dequant[1] >> dequant_shift);
267
268 if (quantizer < 120)
269 rate = (square_error * (280 - quantizer)) >> 8;
270 else
271 rate = 0;
272 dist = (square_error * quantizer) >> 8;
273 rate_sum += rate;
274 dist_sum += dist;
275 } else {
276 vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
277 pd->dequant[1] >> dequant_shift,
278 &rate, &dist);
279 rate_sum += rate;
280 dist_sum += dist;
281 }
282 }
283
284 *skip_txfm_sb = skip_flag;
285 *skip_sse_sb = total_sse << 4;
286 *out_rate_sum = (int)rate_sum;
287 *out_dist_sum = dist_sum << 4;
288}
289
290int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291 intptr_t block_size, int64_t *ssz) {
292 int i;
293 int64_t error = 0, sqcoeff = 0;
294
295 for (i = 0; i < block_size; i++) {
296 const int diff = coeff[i] - dqcoeff[i];
297 error += diff * diff;
298 sqcoeff += coeff[i] * coeff[i];
299 }
300
301 *ssz = sqcoeff;
302 return error;
303}
304
305int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
306 int block_size) {
307 int i;
308 int64_t error = 0;
309
310 for (i = 0; i < block_size; i++) {
311 const int diff = coeff[i] - dqcoeff[i];
312 error += diff * diff;
313 }
314
315 return error;
316}
317
318#if CONFIG_VP9_HIGHBITDEPTH
319int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
320 const tran_low_t *dqcoeff,
321 intptr_t block_size,
322 int64_t *ssz, int bd) {
323 int i;
324 int64_t error = 0, sqcoeff = 0;
325 int shift = 2 * (bd - 8);
326 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
327
328 for (i = 0; i < block_size; i++) {
329 const int64_t diff = coeff[i] - dqcoeff[i];
330 error += diff * diff;
331 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
332 }
333 assert(error >= 0 && sqcoeff >= 0);
334 error = (error + rounding) >> shift;
335 sqcoeff = (sqcoeff + rounding) >> shift;
336
337 *ssz = sqcoeff;
338 return error;
339}
340#endif // CONFIG_VP9_HIGHBITDEPTH
341
342/* The trailing '0' is a terminator which is used inside cost_coeffs() to
343 * decide whether to include cost of a trailing EOB node or not (i.e. we
344 * can skip this if the last coefficient in this transform block, e.g. the
345 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
346 * were non-zero). */
347static const int16_t band_counts[TX_SIZES][8] = {
348 { 1, 2, 3, 4, 3, 16 - 13, 0 },
349 { 1, 2, 3, 4, 11, 64 - 21, 0 },
350 { 1, 2, 3, 4, 11, 256 - 21, 0 },
351 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
352};
353static int cost_coeffs(MACROBLOCK *x,
354 int plane, int block,
Jingning Han2cdc1272015-10-09 09:57:42 -0700355#if CONFIG_VAR_TX
356 int coeff_ctx,
357#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700358 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
Jingning Han2cdc1272015-10-09 09:57:42 -0700359#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700360 TX_SIZE tx_size,
361 const int16_t *scan, const int16_t *nb,
362 int use_fast_coef_costing) {
363 MACROBLOCKD *const xd = &x->e_mbd;
364 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
365 const struct macroblock_plane *p = &x->plane[plane];
366 const struct macroblockd_plane *pd = &xd->plane[plane];
367 const PLANE_TYPE type = pd->plane_type;
368 const int16_t *band_count = &band_counts[tx_size][1];
369 const int eob = p->eobs[block];
370 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
371 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
372 x->token_costs[tx_size][type][is_inter_block(mbmi)];
373 uint8_t token_cache[32 * 32];
Jingning Han2cdc1272015-10-09 09:57:42 -0700374#if CONFIG_VAR_TX
375 int pt = coeff_ctx;
376#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700377 int pt = combine_entropy_contexts(*A, *L);
Jingning Han2cdc1272015-10-09 09:57:42 -0700378#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700379 int c, cost;
380#if CONFIG_VP9_HIGHBITDEPTH
381 const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
382#else
383 const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
384#endif
385
Jingning Han2cdc1272015-10-09 09:57:42 -0700386#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700387 // Check for consistency of tx_size with mode info
388 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
389 : get_uv_tx_size(mbmi, pd) == tx_size);
Jingning Han2cdc1272015-10-09 09:57:42 -0700390#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700391
392 if (eob == 0) {
393 // single eob token
394 cost = token_costs[0][0][pt][EOB_TOKEN];
395 c = 0;
396 } else {
397 int band_left = *band_count++;
398
399 // dc token
400 int v = qcoeff[0];
401 int16_t prev_t;
402 EXTRABIT e;
403 vp10_get_token_extra(v, &prev_t, &e);
404 cost = (*token_costs)[0][pt][prev_t] +
405 vp10_get_cost(prev_t, e, cat6_high_cost);
406
407 token_cache[0] = vp10_pt_energy_class[prev_t];
408 ++token_costs;
409
410 // ac tokens
411 for (c = 1; c < eob; c++) {
412 const int rc = scan[c];
413 int16_t t;
414
415 v = qcoeff[rc];
416 vp10_get_token_extra(v, &t, &e);
417 if (use_fast_coef_costing) {
418 cost += (*token_costs)[!prev_t][!prev_t][t] +
419 vp10_get_cost(t, e, cat6_high_cost);
420 } else {
421 pt = get_coef_context(nb, token_cache, c);
422 cost += (*token_costs)[!prev_t][pt][t] +
423 vp10_get_cost(t, e, cat6_high_cost);
424 token_cache[rc] = vp10_pt_energy_class[t];
425 }
426 prev_t = t;
427 if (!--band_left) {
428 band_left = *band_count++;
429 ++token_costs;
430 }
431 }
432
433 // eob token
434 if (band_left) {
435 if (use_fast_coef_costing) {
436 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
437 } else {
438 pt = get_coef_context(nb, token_cache, c);
439 cost += (*token_costs)[0][pt][EOB_TOKEN];
440 }
441 }
442 }
443
Jingning Han2cdc1272015-10-09 09:57:42 -0700444#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700445 // is eob first coefficient;
446 *A = *L = (c > 0);
Jingning Han2cdc1272015-10-09 09:57:42 -0700447#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700448
449 return cost;
450}
451
452static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
453 int64_t *out_dist, int64_t *out_sse) {
454 const int ss_txfrm_size = tx_size << 1;
455 MACROBLOCKD* const xd = &x->e_mbd;
456 const struct macroblock_plane *const p = &x->plane[plane];
457 const struct macroblockd_plane *const pd = &xd->plane[plane];
458 int64_t this_sse;
459 int shift = tx_size == TX_32X32 ? 0 : 2;
460 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
461 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
462#if CONFIG_VP9_HIGHBITDEPTH
463 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
464 *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
465 &this_sse, bd) >> shift;
466#else
467 *out_dist = vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
468 &this_sse) >> shift;
469#endif // CONFIG_VP9_HIGHBITDEPTH
470 *out_sse = this_sse >> shift;
Jingning Han3ee6db62015-08-05 19:00:31 -0700471}
472
Jingning Hanebc48ef2015-10-07 11:43:48 -0700473static int rate_block(int plane, int block, int blk_row, int blk_col,
Jingning Han3ee6db62015-08-05 19:00:31 -0700474 TX_SIZE tx_size, struct rdcost_block_args* args) {
Jingning Han2cdc1272015-10-09 09:57:42 -0700475#if CONFIG_VAR_TX
476 int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
477 *(args->t_left + blk_row));
478 int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
479 tx_size, args->so->scan, args->so->neighbors,
480 args->use_fast_coef_costing);
481 const struct macroblock_plane *p = &args->x->plane[plane];
482 *(args->t_above + blk_col) = !(p->eobs[block] == 0);
483 *(args->t_left + blk_row) = !(p->eobs[block] == 0);
484 return coeff_cost;
485#else
486 return cost_coeffs(args->x, plane, block,
487 args->t_above + blk_col,
488 args->t_left + blk_row,
489 tx_size, args->so->scan, args->so->neighbors,
Jingning Han3ee6db62015-08-05 19:00:31 -0700490 args->use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -0700491#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700492}
493
Jingning Hanebc48ef2015-10-07 11:43:48 -0700494static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
495 BLOCK_SIZE plane_bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700496 TX_SIZE tx_size, void *arg) {
497 struct rdcost_block_args *args = arg;
498 MACROBLOCK *const x = args->x;
499 MACROBLOCKD *const xd = &x->e_mbd;
500 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
501 int64_t rd1, rd2, rd;
502 int rate;
503 int64_t dist;
504 int64_t sse;
505
506 if (args->exit_early)
507 return;
508
509 if (!is_inter_block(mbmi)) {
Jingning Han71c15602015-10-13 12:40:39 -0700510#if CONFIG_VAR_TX
511 struct encode_b_args arg = {x, NULL, &mbmi->skip};
512 uint8_t *dst, *src;
513 int src_stride = x->plane[plane].src.stride;
514 int dst_stride = xd->plane[plane].dst.stride;
515 unsigned int tmp_sse;
516 PREDICTION_MODE mode = (plane == 0) ?
Jingning Han94266f42015-10-16 18:21:11 -0700517 get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
Jingning Han71c15602015-10-13 12:40:39 -0700518
519#if CONFIG_VP9_HIGHBITDEPTH
520 vp10_encode_block_intra(plane, block, blk_row, blk_col,
521 plane_bsize, tx_size, &arg);
522 dist_block(x, plane, block, tx_size, &dist, &sse);
523#else
524 src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
525 dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
526 vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
527 b_height_log2_lookup[plane_bsize],
528 tx_size, mode, dst, dst_stride,
529 dst, dst_stride, blk_col, blk_row, plane);
530 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
531 dst, dst_stride, &tmp_sse);
532 sse = (int64_t)tmp_sse * 16;
533 vp10_encode_block_intra(plane, block, blk_row, blk_col,
534 plane_bsize, tx_size, &arg);
535 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
536 dst, dst_stride, &tmp_sse);
537 dist = (int64_t)tmp_sse * 16;
538#endif // CONFIG_VP9_HIGHBITDEPTH
539#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700540 struct encode_b_args arg = {x, NULL, &mbmi->skip};
Jingning Hanebc48ef2015-10-07 11:43:48 -0700541 vp10_encode_block_intra(plane, block, blk_row, blk_col,
542 plane_bsize, tx_size, &arg);
Jingning Han3ee6db62015-08-05 19:00:31 -0700543 dist_block(x, plane, block, tx_size, &dist, &sse);
Jingning Han71c15602015-10-13 12:40:39 -0700544#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700545 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
546 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
547 SKIP_TXFM_NONE) {
548 // full forward transform and quantization
Jingning Hancaeb10b2015-10-22 17:25:00 -0700549 vp10_xform_quant(x, plane, block, blk_row, blk_col,
550 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700551 dist_block(x, plane, block, tx_size, &dist, &sse);
552 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
553 SKIP_TXFM_AC_ONLY) {
554 // compute DC coefficient
555 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
556 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Jingning Hanebc48ef2015-10-07 11:43:48 -0700557 vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
558 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700559 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
560 dist = sse;
561 if (x->plane[plane].eobs[block]) {
562 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
563 const int64_t resd_sse = coeff[0] - dqcoeff[0];
564 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
565#if CONFIG_VP9_HIGHBITDEPTH
566 dc_correct >>= ((xd->bd - 8) * 2);
567#endif
568 if (tx_size != TX_32X32)
569 dc_correct >>= 2;
570
James Zern5e16d392015-08-17 18:19:22 -0700571 dist = VPXMAX(0, sse - dc_correct);
Jingning Han3ee6db62015-08-05 19:00:31 -0700572 }
573 } else {
574 // SKIP_TXFM_AC_DC
575 // skip forward transform
576 x->plane[plane].eobs[block] = 0;
577 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
578 dist = sse;
579 }
580 } else {
581 // full forward transform and quantization
Jingning Hanebc48ef2015-10-07 11:43:48 -0700582 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700583 dist_block(x, plane, block, tx_size, &dist, &sse);
584 }
585
586 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
587 if (args->this_rd + rd > args->best_rd) {
588 args->exit_early = 1;
589 return;
590 }
591
Jingning Hanebc48ef2015-10-07 11:43:48 -0700592 rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700593 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
595
596 // TODO(jingning): temporarily enabled only for luma component
James Zern5e16d392015-08-17 18:19:22 -0700597 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700598 if (plane == 0)
599 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400600 (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700601
602 args->this_rate += rate;
603 args->this_dist += dist;
604 args->this_sse += sse;
605 args->this_rd += rd;
606
607 if (args->this_rd > args->best_rd) {
608 args->exit_early = 1;
609 return;
610 }
611
612 args->skippable &= !x->plane[plane].eobs[block];
613}
614
615static void txfm_rd_in_plane(MACROBLOCK *x,
Jingning Han71c15602015-10-13 12:40:39 -0700616#if CONFIG_VAR_TX
617 const VP10_COMP *cpi,
618#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700619 int *rate, int64_t *distortion,
620 int *skippable, int64_t *sse,
621 int64_t ref_best_rd, int plane,
622 BLOCK_SIZE bsize, TX_SIZE tx_size,
623 int use_fast_coef_casting) {
624 MACROBLOCKD *const xd = &x->e_mbd;
625 const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su5eed74e2015-08-18 16:57:07 -0700626 TX_TYPE tx_type;
Jingning Han3ee6db62015-08-05 19:00:31 -0700627 struct rdcost_block_args args;
628 vp10_zero(args);
629 args.x = x;
Jingning Han71c15602015-10-13 12:40:39 -0700630#if CONFIG_VAR_TX
631 args.cpi = cpi;
632#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700633 args.best_rd = ref_best_rd;
634 args.use_fast_coef_costing = use_fast_coef_casting;
635 args.skippable = 1;
636
637 if (plane == 0)
638 xd->mi[0]->mbmi.tx_size = tx_size;
639
640 vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
641
hui sub3cc3a02015-08-24 14:37:54 -0700642 tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700643 args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
Jingning Han3ee6db62015-08-05 19:00:31 -0700644
645 vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700646 block_rd_txfm, &args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700647 if (args.exit_early) {
648 *rate = INT_MAX;
649 *distortion = INT64_MAX;
650 *sse = INT64_MAX;
651 *skippable = 0;
652 } else {
653 *distortion = args.this_dist;
654 *rate = args.this_rate;
655 *sse = args.this_sse;
656 *skippable = args.skippable;
657 }
658}
659
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700660static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700661 int *rate, int64_t *distortion,
662 int *skip, int64_t *sse,
663 int64_t ref_best_rd,
664 BLOCK_SIZE bs) {
665 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700666 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700667 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
668 MACROBLOCKD *const xd = &x->e_mbd;
669 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
hui su6c81e372015-09-29 12:09:15 -0700670#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700671 TX_TYPE tx_type, best_tx_type = DCT_DCT;
hui su6c81e372015-09-29 12:09:15 -0700672 int r, s;
673 int64_t d, psse, this_rd, best_rd = INT64_MAX;
674 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
675 int s0 = vp10_cost_bit(skip_prob, 0);
676 int s1 = vp10_cost_bit(skip_prob, 1);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700677 int ext_tx_set;
678 const int is_inter = is_inter_block(mbmi);
hui su6c81e372015-09-29 12:09:15 -0700679#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700680
James Zern5e16d392015-08-17 18:19:22 -0700681 mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700682
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700683#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700684 ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
685
686 if (is_inter &&
687 get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700688 !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700689 for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
690 if (is_inter) {
691 if (!ext_tx_used_inter[ext_tx_set][tx_type])
692 continue;
693 } else {
694 if (!ext_tx_used_intra[ext_tx_set][tx_type])
695 continue;
696 }
697
698 mbmi->tx_type = tx_type;
699 if (ext_tx_set == 1 &&
700 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
hui su4f16f112015-10-02 10:45:27 -0700701 best_tx_type == DCT_DCT) {
702 tx_type = IDTX - 1;
hui su3fa01292015-09-28 18:38:00 -0700703 continue;
704 }
hui su6c81e372015-09-29 12:09:15 -0700705
Jingning Han71c15602015-10-13 12:40:39 -0700706 txfm_rd_in_plane(x,
707#if CONFIG_VAR_TX
708 cpi,
709#endif
710 &r, &d, &s,
hui su6c81e372015-09-29 12:09:15 -0700711 &psse, ref_best_rd, 0, bs, mbmi->tx_size,
712 cpi->sf.use_fast_coef_costing);
713
714 if (r == INT_MAX)
715 continue;
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700716 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
717 if (is_inter) {
718 if (ext_tx_set > 0)
719 r += cpi->inter_tx_type_costs[ext_tx_set]
720 [mbmi->tx_size][mbmi->tx_type];
721 } else {
722 if (ext_tx_set > 0)
723 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
724 [mbmi->mode][mbmi->tx_type];
725 }
hui su3fa01292015-09-28 18:38:00 -0700726 }
hui su6c81e372015-09-29 12:09:15 -0700727
728 if (s)
729 this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
730 else
731 this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700732 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
hui su6c81e372015-09-29 12:09:15 -0700733 this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
734
hui su4f16f112015-10-02 10:45:27 -0700735 if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
hui su6c81e372015-09-29 12:09:15 -0700736 best_rd = this_rd;
hui su4f16f112015-10-02 10:45:27 -0700737 best_tx_type = mbmi->tx_type;
hui su6c81e372015-09-29 12:09:15 -0700738 }
739 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700740 }
hui su6c81e372015-09-29 12:09:15 -0700741
hui su4f16f112015-10-02 10:45:27 -0700742 mbmi->tx_type = best_tx_type;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700743#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700744
Jingning Han71c15602015-10-13 12:40:39 -0700745 txfm_rd_in_plane(x,
746#if CONFIG_VAR_TX
747 cpi,
748#endif
749 rate, distortion, skip,
Jingning Han3ee6db62015-08-05 19:00:31 -0700750 sse, ref_best_rd, 0, bs,
751 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700752
753#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700754 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700755 !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700756 int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
757 if (is_inter)
758 *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
759 [mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700760 else
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700761 *rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
762 [mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700763 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700764#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700765}
766
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400767static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
768 int *rate, int64_t *distortion,
769 int *skip, int64_t *sse,
770 int64_t ref_best_rd,
771 BLOCK_SIZE bs) {
772 MACROBLOCKD *const xd = &x->e_mbd;
773 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
774
775 mbmi->tx_size = TX_4X4;
776
Jingning Han71c15602015-10-13 12:40:39 -0700777 txfm_rd_in_plane(x,
778#if CONFIG_VAR_TX
779 cpi,
780#endif
781 rate, distortion, skip,
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400782 sse, ref_best_rd, 0, bs,
783 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
784}
785
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700786static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700787 int *rate,
788 int64_t *distortion,
789 int *skip,
790 int64_t *psse,
791 int64_t ref_best_rd,
792 BLOCK_SIZE bs) {
793 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700794 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700795 MACROBLOCKD *const xd = &x->e_mbd;
796 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
797 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
hui su38debe52015-09-20 19:18:00 -0700798 int r, s;
799 int64_t d, sse;
800 int64_t rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700801 int n, m;
802 int s0, s1;
hui su38debe52015-09-20 19:18:00 -0700803 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700804 TX_SIZE best_tx = max_tx_size;
805 int start_tx, end_tx;
hui su38debe52015-09-20 19:18:00 -0700806 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
hui su07154b02015-09-22 10:34:18 -0700807#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700808 TX_TYPE tx_type, best_tx_type = DCT_DCT;
809 int ext_tx_set;
hui su07154b02015-09-22 10:34:18 -0700810#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700811 const int is_inter = is_inter_block(mbmi);
hui su07154b02015-09-22 10:34:18 -0700812
Jingning Han3ee6db62015-08-05 19:00:31 -0700813 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
814 assert(skip_prob > 0);
815 s0 = vp10_cost_bit(skip_prob, 0);
816 s1 = vp10_cost_bit(skip_prob, 1);
817
hui su38debe52015-09-20 19:18:00 -0700818 if (tx_select) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700819 start_tx = max_tx_size;
820 end_tx = 0;
821 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700822 const TX_SIZE chosen_tx_size =
823 VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700824 start_tx = chosen_tx_size;
825 end_tx = chosen_tx_size;
826 }
827
hui su38debe52015-09-20 19:18:00 -0700828 *distortion = INT64_MAX;
829 *rate = INT_MAX;
830 *skip = 0;
831 *psse = INT64_MAX;
832
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700833#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700834 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700835#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700836 last_rd = INT64_MAX;
hui su07154b02015-09-22 10:34:18 -0700837 for (n = start_tx; n >= end_tx; --n) {
838 int r_tx_size = 0;
hui su07154b02015-09-22 10:34:18 -0700839 for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
840 if (m == n)
841 r_tx_size += vp10_cost_zero(tx_probs[m]);
842 else
843 r_tx_size += vp10_cost_one(tx_probs[m]);
Shunyao Liaa006d72015-08-19 12:04:56 -0700844 }
hui su07154b02015-09-22 10:34:18 -0700845
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700846#if CONFIG_EXT_TX
847 ext_tx_set = get_ext_tx_set(n, bs, is_inter);
848 if (is_inter) {
849 if (!ext_tx_used_inter[ext_tx_set][tx_type])
850 continue;
851 } else {
852 if (!ext_tx_used_intra[ext_tx_set][tx_type])
853 continue;
854 }
855 mbmi->tx_type = tx_type;
856 if (ext_tx_set == 1 &&
857 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
858 best_tx_type == DCT_DCT) {
859 tx_type = IDTX - 1;
860 break;
861 }
Jingning Han71c15602015-10-13 12:40:39 -0700862 txfm_rd_in_plane(x,
863#if CONFIG_VAR_TX
864 cpi,
865#endif
866 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700867 &sse, ref_best_rd, 0, bs, n,
868 cpi->sf.use_fast_coef_costing);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700869 if (get_ext_tx_types(n, bs, is_inter) > 1 &&
870 !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
871 r != INT_MAX) {
872 if (is_inter) {
873 if (ext_tx_set > 0)
874 r += cpi->inter_tx_type_costs[ext_tx_set]
875 [mbmi->tx_size][mbmi->tx_type];
876 } else {
877 if (ext_tx_set > 0)
878 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
879 [mbmi->mode][mbmi->tx_type];
880 }
hui su3fa01292015-09-28 18:38:00 -0700881 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700882#else // CONFIG_EXT_TX
883 txfm_rd_in_plane(x,
884#if CONFIG_VAR_TX
885 cpi,
886#endif
887 &r, &d, &s,
888 &sse, ref_best_rd, 0, bs, n,
889 cpi->sf.use_fast_coef_costing);
hui su07154b02015-09-22 10:34:18 -0700890#endif // CONFIG_EXT_TX
891
892 if (r == INT_MAX)
893 continue;
894
hui su07154b02015-09-22 10:34:18 -0700895 if (s) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700896 if (is_inter) {
hui su07154b02015-09-22 10:34:18 -0700897 rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
hui su07154b02015-09-22 10:34:18 -0700898 } else {
899 rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
900 }
901 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700902 rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
hui su07154b02015-09-22 10:34:18 -0700903 }
904
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700905 if (tx_select && !(s && is_inter))
906 r += r_tx_size;
907
908 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
hui su07154b02015-09-22 10:34:18 -0700909 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
910
911 // Early termination in transform size search.
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700912 if (cpi->sf.tx_size_search_breakout &&
913 (rd == INT64_MAX ||
914#if CONFIG_EXT_TX
Jingning Han35b3bd32015-11-10 16:02:33 -0800915 (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700916#else
Jingning Han35b3bd32015-11-10 16:02:33 -0800917 (s == 1 && n < start_tx) ||
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700918#endif
919 (n < (int) max_tx_size && rd > last_rd)))
hui su07154b02015-09-22 10:34:18 -0700920 break;
921
922 last_rd = rd;
hui su4f16f112015-10-02 10:45:27 -0700923 if (rd <
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700924#if CONFIG_EXT_TX
925 (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
hui su07154b02015-09-22 10:34:18 -0700926#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700927 best_rd) {
hui su07154b02015-09-22 10:34:18 -0700928 best_tx = n;
929 best_rd = rd;
930 *distortion = d;
931 *rate = r;
932 *skip = s;
933 *psse = sse;
934#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700935 best_tx_type = mbmi->tx_type;
hui su07154b02015-09-22 10:34:18 -0700936#endif // CONFIG_EXT_TX
937 }
Jingning Han3ee6db62015-08-05 19:00:31 -0700938 }
hui su07154b02015-09-22 10:34:18 -0700939#if CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700940 }
hui su07154b02015-09-22 10:34:18 -0700941#endif // CONFIG_EXT_TX
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700942
Jingning Han3ee6db62015-08-05 19:00:31 -0700943 mbmi->tx_size = best_tx;
hui su07154b02015-09-22 10:34:18 -0700944#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700945 mbmi->tx_type = best_tx_type;
Jingning Han71c15602015-10-13 12:40:39 -0700946 txfm_rd_in_plane(x,
947#if CONFIG_VAR_TX
948 cpi,
949#endif
950 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700951 &sse, ref_best_rd, 0, bs, best_tx,
952 cpi->sf.use_fast_coef_costing);
953#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700954}
955
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700956static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
Jingning Han3ee6db62015-08-05 19:00:31 -0700957 int64_t *distortion, int *skip,
958 int64_t *psse, BLOCK_SIZE bs,
959 int64_t ref_best_rd) {
960 MACROBLOCKD *xd = &x->e_mbd;
961 int64_t sse;
962 int64_t *ret_sse = psse ? psse : &sse;
963
964 assert(bs == xd->mi[0]->mbmi.sb_type);
965
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400966 if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
967 choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
968 ref_best_rd, bs);
969 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
970 xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700971 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
972 bs);
973 } else {
974 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
975 ref_best_rd, bs);
976 }
977}
978
979static int conditional_skipintra(PREDICTION_MODE mode,
980 PREDICTION_MODE best_intra_mode) {
981 if (mode == D117_PRED &&
982 best_intra_mode != V_PRED &&
983 best_intra_mode != D135_PRED)
984 return 1;
985 if (mode == D63_PRED &&
986 best_intra_mode != V_PRED &&
987 best_intra_mode != D45_PRED)
988 return 1;
989 if (mode == D207_PRED &&
990 best_intra_mode != H_PRED &&
991 best_intra_mode != D45_PRED)
992 return 1;
993 if (mode == D153_PRED &&
994 best_intra_mode != H_PRED &&
995 best_intra_mode != D135_PRED)
996 return 1;
997 return 0;
998}
999
hui su5d011cb2015-09-15 12:44:13 -07001000void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1001 int palette_ctx, int dc_mode_cost,
1002 PALETTE_MODE_INFO *palette_mode_info,
1003 uint8_t *best_palette_color_map,
1004 TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
1005 int64_t *best_rd) {
1006 MACROBLOCKD *const xd = &x->e_mbd;
1007 MODE_INFO *const mic = xd->mi[0];
1008 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1009 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1010 int this_rate, this_rate_tokenonly, s;
1011 int64_t this_distortion, this_rd;
1012 int colors, n;
1013 int src_stride = x->plane[0].src.stride;
1014 uint8_t *src = x->plane[0].src.buf;
1015
1016#if CONFIG_VP9_HIGHBITDEPTH
1017 if (cpi->common.use_highbitdepth)
1018 colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
1019 cpi->common.bit_depth);
1020 else
1021#endif // CONFIG_VP9_HIGHBITDEPTH
1022 colors = vp10_count_colors(src, src_stride, rows, cols);
1023 palette_mode_info->palette_size[0] = 0;
1024
1025 if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
1026 int r, c, i, j, k;
1027 int max_itr = 50;
1028 int color_ctx, color_idx = 0;
1029 int color_order[PALETTE_MAX_SIZE];
1030 double *data = x->palette_buffer->kmeans_data_buf;
1031 uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
1032 uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
1033 double centroids[PALETTE_MAX_SIZE];
1034 uint8_t *color_map;
1035 double lb, ub, val;
1036 PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
1037#if CONFIG_VP9_HIGHBITDEPTH
1038 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1039 if (cpi->common.use_highbitdepth)
1040 lb = ub = src16[0];
1041 else
1042#endif // CONFIG_VP9_HIGHBITDEPTH
1043 lb = ub = src[0];
1044
1045#if CONFIG_VP9_HIGHBITDEPTH
1046 if (cpi->common.use_highbitdepth) {
1047 for (r = 0; r < rows; ++r) {
1048 for (c = 0; c < cols; ++c) {
1049 val = src16[r * src_stride + c];
1050 data[r * cols + c] = val;
1051 if (val < lb)
1052 lb = val;
1053 else if (val > ub)
1054 ub = val;
1055 }
1056 }
1057 } else {
1058#endif // CONFIG_VP9_HIGHBITDEPTH
1059 for (r = 0; r < rows; ++r) {
1060 for (c = 0; c < cols; ++c) {
1061 val = src[r * src_stride + c];
1062 data[r * cols + c] = val;
1063 if (val < lb)
1064 lb = val;
1065 else if (val > ub)
1066 ub = val;
1067 }
1068 }
1069#if CONFIG_VP9_HIGHBITDEPTH
1070 }
1071#endif // CONFIG_VP9_HIGHBITDEPTH
1072
1073 mic->mbmi.mode = DC_PRED;
1074
1075 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
1076 n >= 2; --n) {
1077 for (i = 0; i < n; ++i)
1078 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
1079 vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
1080 n, 1, max_itr);
1081 vp10_insertion_sort(centroids, n);
hui su17c817a2015-10-15 18:04:50 -07001082 for (i = 0; i < n; ++i)
1083 centroids[i] = round(centroids[i]);
hui su5d011cb2015-09-15 12:44:13 -07001084 // remove duplicates
1085 i = 1;
1086 k = n;
1087 while (i < k) {
1088 if (centroids[i] == centroids[i - 1]) {
1089 j = i;
1090 while (j < k - 1) {
1091 centroids[j] = centroids[j + 1];
1092 ++j;
1093 }
1094 --k;
1095 } else {
1096 ++i;
1097 }
1098 }
1099
1100#if CONFIG_VP9_HIGHBITDEPTH
1101 if (cpi->common.use_highbitdepth)
1102 for (i = 0; i < k; ++i)
1103 mic->mbmi.palette_mode_info.palette_colors[i] =
1104 clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth);
1105 else
1106#endif // CONFIG_VP9_HIGHBITDEPTH
1107 for (i = 0; i < k; ++i)
Yaowu Xu8ced62f2015-10-14 08:10:05 -07001108 pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
hui su5d011cb2015-09-15 12:44:13 -07001109 pmi->palette_size[0] = k;
1110
1111 vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
1112 for (r = 0; r < rows; ++r)
1113 for (c = 0; c < cols; ++c)
1114 xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
1115
1116 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1117 &s, NULL, bsize, *best_rd);
1118 if (this_rate_tokenonly == INT_MAX)
1119 continue;
1120
1121 this_rate = this_rate_tokenonly + dc_mode_cost +
1122 cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
1123 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
1124 this_rate +=
1125 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1126 [palette_ctx], 1);
1127 color_map = xd->plane[0].color_index_map;
1128 this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
1129 for (i = 0; i < rows; ++i) {
1130 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
1131 color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
1132 k, color_order);
1133 for (r = 0; r < k; ++r)
1134 if (color_map[i * cols + j] == color_order[r]) {
1135 color_idx = r;
1136 break;
1137 }
1138 assert(color_idx < k);
1139 this_rate +=
1140 cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
1141 }
1142 }
1143 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1144
1145 if (this_rd < *best_rd) {
1146 *best_rd = this_rd;
1147 *palette_mode_info = mic->mbmi.palette_mode_info;
1148 memcpy(best_palette_color_map, xd->plane[0].color_index_map,
1149 rows * cols * sizeof(xd->plane[0].color_index_map[0]));
1150 *mode_selected = DC_PRED;
1151 *best_tx = mic->mbmi.tx_size;
1152 }
1153 }
1154 }
1155}
1156
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001157static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001158 int row, int col,
1159 PREDICTION_MODE *best_mode,
1160 const int *bmode_costs,
1161 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1162 int *bestrate, int *bestratey,
1163 int64_t *bestdistortion,
1164 BLOCK_SIZE bsize, int64_t rd_thresh) {
1165 PREDICTION_MODE mode;
1166 MACROBLOCKD *const xd = &x->e_mbd;
1167 int64_t best_rd = rd_thresh;
1168 struct macroblock_plane *p = &x->plane[0];
1169 struct macroblockd_plane *pd = &xd->plane[0];
1170 const int src_stride = p->src.stride;
1171 const int dst_stride = pd->dst.stride;
1172 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
1173 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
1174 ENTROPY_CONTEXT ta[2], tempa[2];
1175 ENTROPY_CONTEXT tl[2], templ[2];
1176 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1177 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1178 int idx, idy;
1179 uint8_t best_dst[8 * 8];
1180#if CONFIG_VP9_HIGHBITDEPTH
1181 uint16_t best_dst16[8 * 8];
1182#endif
1183
1184 memcpy(ta, a, sizeof(ta));
1185 memcpy(tl, l, sizeof(tl));
1186 xd->mi[0]->mbmi.tx_size = TX_4X4;
hui su5d011cb2015-09-15 12:44:13 -07001187 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001188
1189#if CONFIG_VP9_HIGHBITDEPTH
1190 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1191 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1192 int64_t this_rd;
1193 int ratey = 0;
1194 int64_t distortion = 0;
1195 int rate = bmode_costs[mode];
1196
1197 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1198 continue;
1199
1200 // Only do the oblique modes if the best so far is
1201 // one of the neighboring directional modes
1202 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1203 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001204 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001205 }
1206
1207 memcpy(tempa, ta, sizeof(ta));
1208 memcpy(templ, tl, sizeof(tl));
1209
1210 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1211 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1212 const int block = (row + idy) * 2 + (col + idx);
1213 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1214 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1215 int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001216 block,
1217 p->src_diff);
Jingning Han3ee6db62015-08-05 19:00:31 -07001218 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1219 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001220 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001221 dst, dst_stride,
1222 col + idx, row + idy, 0);
1223 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
1224 dst, dst_stride, xd->bd);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001225 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001226 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001227 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001228#if CONFIG_VAR_TX
1229 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1230 *(templ + idy));
1231#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001232 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001233 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001234 ratey += cost_coeffs(x, 0, block,
1235#if CONFIG_VAR_TX
1236 coeff_ctx,
1237#else
1238 tempa + idx, templ + idy,
1239#endif
1240 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001241 so->scan, so->neighbors,
1242 cpi->sf.use_fast_coef_costing);
1243 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1244 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001245 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1246 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001247 xd->bd, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001248 } else {
1249 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001250 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001251 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001252#if CONFIG_VAR_TX
1253 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1254 *(templ + idy));
1255#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001256 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001257 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001258 ratey += cost_coeffs(x, 0, block,
1259#if CONFIG_VAR_TX
1260 coeff_ctx,
1261#else
1262 tempa + idx, templ + idy,
1263#endif
1264 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001265 so->scan, so->neighbors,
1266 cpi->sf.use_fast_coef_costing);
1267 distortion += vp10_highbd_block_error(
1268 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1269 16, &unused, xd->bd) >> 2;
1270 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1271 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001272 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1273 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001274 xd->bd, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001275 }
1276 }
1277 }
1278
1279 rate += ratey;
1280 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1281
1282 if (this_rd < best_rd) {
1283 *bestrate = rate;
1284 *bestratey = ratey;
1285 *bestdistortion = distortion;
1286 best_rd = this_rd;
1287 *best_mode = mode;
1288 memcpy(a, tempa, sizeof(tempa));
1289 memcpy(l, templ, sizeof(templ));
1290 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1291 memcpy(best_dst16 + idy * 8,
1292 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1293 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1294 }
1295 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001296next_highbd:
Jingning Han3ee6db62015-08-05 19:00:31 -07001297 {}
1298 }
Jingning Han481b8342015-09-11 08:56:06 -07001299 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001300 return best_rd;
1301
1302 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1303 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1304 best_dst16 + idy * 8,
1305 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1306 }
1307
1308 return best_rd;
1309 }
1310#endif // CONFIG_VP9_HIGHBITDEPTH
1311
1312 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1313 int64_t this_rd;
1314 int ratey = 0;
1315 int64_t distortion = 0;
1316 int rate = bmode_costs[mode];
1317
1318 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1319 continue;
1320
1321 // Only do the oblique modes if the best so far is
1322 // one of the neighboring directional modes
1323 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1324 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001325 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001326 }
1327
1328 memcpy(tempa, ta, sizeof(ta));
1329 memcpy(templ, tl, sizeof(tl));
1330
1331 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1332 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1333 const int block = (row + idy) * 2 + (col + idx);
1334 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1335 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1336 int16_t *const src_diff =
1337 vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1338 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1339 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001340 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001341 dst, dst_stride, col + idx, row + idy, 0);
1342 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1343
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001344 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001345 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001346 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001347#if CONFIG_VAR_TX
1348 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1349 *(templ + idy));
1350#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001351 vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001352 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001353#if CONFIG_VAR_TX
1354 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1355 so->neighbors, cpi->sf.use_fast_coef_costing);
1356 *(tempa + idx) = !(p->eobs[block] == 0);
1357 *(templ + idy) = !(p->eobs[block] == 0);
1358#else
1359 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1360 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001361 so->scan, so->neighbors,
1362 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07001363#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001364 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1365 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001366 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001367 dst, dst_stride, p->eobs[block], DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001368 } else {
1369 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001370 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001371 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001372#if CONFIG_VAR_TX
1373 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1374 *(templ + idy));
1375#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001376 vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001377 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001378#if CONFIG_VAR_TX
1379 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1380 so->neighbors, cpi->sf.use_fast_coef_costing);
1381 *(tempa + idx) = !(p->eobs[block] == 0);
1382 *(templ + idy) = !(p->eobs[block] == 0);
1383#else
1384 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1385 TX_4X4, so->scan, so->neighbors,
1386 cpi->sf.use_fast_coef_costing);
1387#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001388 distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1389 16, &unused) >> 2;
1390 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1391 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001392 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001393 dst, dst_stride, p->eobs[block], tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001394 }
1395 }
1396 }
1397
1398 rate += ratey;
1399 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1400
1401 if (this_rd < best_rd) {
1402 *bestrate = rate;
1403 *bestratey = ratey;
1404 *bestdistortion = distortion;
1405 best_rd = this_rd;
1406 *best_mode = mode;
1407 memcpy(a, tempa, sizeof(tempa));
1408 memcpy(l, templ, sizeof(templ));
1409 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1410 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1411 num_4x4_blocks_wide * 4);
1412 }
1413 next:
1414 {}
1415 }
1416
Jingning Hanf1376972015-09-10 12:42:21 -07001417 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001418 return best_rd;
1419
1420 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1421 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1422 num_4x4_blocks_wide * 4);
1423
1424 return best_rd;
1425}
1426
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001427static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
Jingning Han3ee6db62015-08-05 19:00:31 -07001428 int *rate, int *rate_y,
1429 int64_t *distortion,
1430 int64_t best_rd) {
1431 int i, j;
1432 const MACROBLOCKD *const xd = &mb->e_mbd;
1433 MODE_INFO *const mic = xd->mi[0];
1434 const MODE_INFO *above_mi = xd->above_mi;
1435 const MODE_INFO *left_mi = xd->left_mi;
1436 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1437 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1438 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1439 int idx, idy;
1440 int cost = 0;
1441 int64_t total_distortion = 0;
1442 int tot_rate_y = 0;
1443 int64_t total_rd = 0;
1444 ENTROPY_CONTEXT t_above[4], t_left[4];
1445 const int *bmode_costs = cpi->mbmode_cost;
1446
1447 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1448 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1449
hui sube3559b2015-10-07 09:29:02 -07001450#if CONFIG_EXT_INTRA
1451 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
1452#endif // CONFIG_EXT_INTRA
1453
Jingning Han3ee6db62015-08-05 19:00:31 -07001454 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1455 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1456 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1457 PREDICTION_MODE best_mode = DC_PRED;
1458 int r = INT_MAX, ry = INT_MAX;
1459 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1460 i = idy * 2 + idx;
1461 if (cpi->common.frame_type == KEY_FRAME) {
1462 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
1463 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
1464
1465 bmode_costs = cpi->y_mode_costs[A][L];
1466 }
1467
1468 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
1469 bmode_costs, t_above + idx, t_left + idy,
1470 &r, &ry, &d, bsize, best_rd - total_rd);
1471 if (this_rd >= best_rd - total_rd)
1472 return INT64_MAX;
1473
1474 total_rd += this_rd;
1475 cost += r;
1476 total_distortion += d;
1477 tot_rate_y += ry;
1478
1479 mic->bmi[i].as_mode = best_mode;
1480 for (j = 1; j < num_4x4_blocks_high; ++j)
1481 mic->bmi[i + j * 2].as_mode = best_mode;
1482 for (j = 1; j < num_4x4_blocks_wide; ++j)
1483 mic->bmi[i + j].as_mode = best_mode;
1484
1485 if (total_rd >= best_rd)
1486 return INT64_MAX;
1487 }
1488 }
1489
1490 *rate = cost;
1491 *rate_y = tot_rate_y;
1492 *distortion = total_distortion;
1493 mic->mbmi.mode = mic->bmi[3].as_mode;
1494
1495 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1496}
1497
hui sube3559b2015-10-07 09:29:02 -07001498#if CONFIG_EXT_INTRA
1499// Return 1 if an ext intra mode is selected; return 0 otherwise.
1500static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
1501 int *rate, int *rate_tokenonly,
1502 int64_t *distortion, int *skippable,
1503 BLOCK_SIZE bsize, int mode_cost,
1504 int64_t *best_rd) {
1505 MACROBLOCKD *const xd = &x->e_mbd;
1506 MODE_INFO *const mic = xd->mi[0];
1507 MB_MODE_INFO *mbmi = &mic->mbmi;
1508 int this_rate, this_rate_tokenonly, s;
1509 int ext_intra_selected_flag = 0;
hui su4aa50c12015-11-10 12:09:59 -08001510 int64_t this_distortion, this_rd;
hui sube3559b2015-10-07 09:29:02 -07001511 EXT_INTRA_MODE mode;
1512 TX_SIZE best_tx_size = TX_4X4;
1513 EXT_INTRA_MODE_INFO ext_intra_mode_info;
1514#if CONFIG_EXT_TX
1515 TX_TYPE best_tx_type;
1516#endif // CONFIG_EXT_TX
1517
1518 vp10_zero(ext_intra_mode_info);
1519 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
1520 mbmi->mode = DC_PRED;
1521
hui su4aa50c12015-11-10 12:09:59 -08001522 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
1523 mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
1524 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1525 &s, NULL, bsize, *best_rd);
1526 if (this_rate_tokenonly == INT_MAX)
1527 continue;
hui sube3559b2015-10-07 09:29:02 -07001528
hui su4aa50c12015-11-10 12:09:59 -08001529 this_rate = this_rate_tokenonly +
1530 vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
1531 write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
1532 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
hui sube3559b2015-10-07 09:29:02 -07001533
hui su4aa50c12015-11-10 12:09:59 -08001534 if (this_rd < *best_rd) {
1535 *best_rd = this_rd;
1536 best_tx_size = mic->mbmi.tx_size;
1537 ext_intra_mode_info = mbmi->ext_intra_mode_info;
hui sube3559b2015-10-07 09:29:02 -07001538#if CONFIG_EXT_TX
hui su4aa50c12015-11-10 12:09:59 -08001539 best_tx_type = mic->mbmi.tx_type;
hui sube3559b2015-10-07 09:29:02 -07001540#endif // CONFIG_EXT_TX
hui su4aa50c12015-11-10 12:09:59 -08001541 *rate = this_rate;
1542 *rate_tokenonly = this_rate_tokenonly;
1543 *distortion = this_distortion;
1544 *skippable = s;
1545 ext_intra_selected_flag = 1;
hui sube3559b2015-10-07 09:29:02 -07001546 }
1547 }
1548
1549 if (ext_intra_selected_flag) {
1550 mbmi->mode = DC_PRED;
1551 mbmi->tx_size = best_tx_size;
1552 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
1553 ext_intra_mode_info.use_ext_intra_mode[0];
1554 mbmi->ext_intra_mode_info.ext_intra_mode[0] =
1555 ext_intra_mode_info.ext_intra_mode[0];
hui sube3559b2015-10-07 09:29:02 -07001556#if CONFIG_EXT_TX
1557 mbmi->tx_type = best_tx_type;
1558#endif // CONFIG_EXT_TX
1559 return 1;
1560 } else {
1561 return 0;
1562 }
1563}
hui su4aa50c12015-11-10 12:09:59 -08001564
1565static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x,
1566 int *rate, int *rate_tokenonly,
1567 int64_t *distortion, int *skippable,
1568 BLOCK_SIZE bsize, int rate_overhead,
1569 int64_t best_rd) {
1570 MACROBLOCKD *const xd = &x->e_mbd;
1571 MODE_INFO *const mic = xd->mi[0];
1572 MB_MODE_INFO *mbmi = &mic->mbmi;
1573 int this_rate, this_rate_tokenonly, s;
1574 int angle_delta, best_angle_delta = 0;
1575 const double rd_adjust = 1.2;
1576 int64_t this_distortion, this_rd, sse_dummy;
1577 TX_SIZE best_tx_size = mic->mbmi.tx_size;
1578#if CONFIG_EXT_TX
1579 TX_TYPE best_tx_type = mbmi->tx_type;
1580#endif // CONFIG_EXT_TX
1581
1582 if (ANGLE_FAST_SEARCH) {
1583 int deltas_level1[3] = {0, -2, 2};
1584 int deltas_level2[3][2] = {
1585 {-1, 1}, {-3, -1}, {1, 3},
1586 };
1587 const int level1 = 3, level2 = 2;
1588 int i, j, best_i = -1;
1589
1590 for (i = 0; i < level1; ++i) {
1591 mic->mbmi.angle_delta[0] = deltas_level1[i];
1592 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1593 &s, NULL, bsize,
1594 (i == 0 && best_rd < INT64_MAX) ? best_rd * rd_adjust :
1595 best_rd);
1596 if (this_rate_tokenonly == INT_MAX) {
1597 if (i == 0)
1598 break;
1599 else
1600 continue;
1601 }
1602 this_rate = this_rate_tokenonly + rate_overhead;
1603 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1604 if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
1605 break;
1606 if (this_rd < best_rd) {
1607 best_i = i;
1608 best_rd = this_rd;
1609 best_angle_delta = mbmi->angle_delta[0];
1610 best_tx_size = mbmi->tx_size;
1611#if CONFIG_EXT_TX
1612 best_tx_type = mbmi->tx_type;
1613#endif // CONFIG_EXT_TX
1614 *rate = this_rate;
1615 *rate_tokenonly = this_rate_tokenonly;
1616 *distortion = this_distortion;
1617 *skippable = s;
1618 }
1619 }
1620
1621 if (best_i >= 0) {
1622 for (j = 0; j < level2; ++j) {
1623 mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
1624 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1625 &s, NULL, bsize, best_rd);
1626 if (this_rate_tokenonly == INT_MAX)
1627 continue;
1628 this_rate = this_rate_tokenonly + rate_overhead;
1629 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1630 if (this_rd < best_rd) {
1631 best_rd = this_rd;
1632 best_angle_delta = mbmi->angle_delta[0];
1633 best_tx_size = mbmi->tx_size;
1634#if CONFIG_EXT_TX
1635 best_tx_type = mbmi->tx_type;
1636#endif // CONFIG_EXT_TX
1637 *rate = this_rate;
1638 *rate_tokenonly = this_rate_tokenonly;
1639 *distortion = this_distortion;
1640 *skippable = s;
1641 }
1642 }
1643 }
1644 } else {
1645 for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
1646 ++angle_delta) {
1647 mic->mbmi.angle_delta[0] = angle_delta;
1648
1649 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1650 &s, NULL, bsize, best_rd);
1651 if (this_rate_tokenonly == INT_MAX)
1652 continue;
1653
1654 this_rate = this_rate_tokenonly + rate_overhead;
1655 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1656
1657 if (this_rd < best_rd) {
1658 best_rd = this_rd;
1659 best_angle_delta = mbmi->angle_delta[0];
1660 best_tx_size = mbmi->tx_size;
1661#if CONFIG_EXT_TX
1662 best_tx_type = mbmi->tx_type;
1663#endif // CONFIG_EXT_TX
1664 *rate = this_rate;
1665 *rate_tokenonly = this_rate_tokenonly;
1666 *distortion = this_distortion;
1667 *skippable = s;
1668 }
1669 }
1670 }
1671
1672 mbmi->tx_size = best_tx_size;
1673 mbmi->angle_delta[0] = best_angle_delta;
1674#if CONFIG_EXT_TX
1675 mbmi->tx_type = best_tx_type;
1676#endif // CONFIG_EXT_TX
1677
1678 if (*rate_tokenonly < INT_MAX) {
1679 txfm_rd_in_plane(x,
1680#if CONFIG_VAR_TX
1681 cpi,
1682#endif
1683 &this_rate_tokenonly, &this_distortion, &s,
1684 &sse_dummy, INT64_MAX, 0, bsize, mbmi->tx_size,
1685 cpi->sf.use_fast_coef_costing);
1686 }
1687
1688 return best_rd;
1689}
hui sube3559b2015-10-07 09:29:02 -07001690#endif // CONFIG_EXT_INTRA
1691
Jingning Han3ee6db62015-08-05 19:00:31 -07001692// This function is used only for intra_only frames
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001693static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001694 int *rate, int *rate_tokenonly,
1695 int64_t *distortion, int *skippable,
1696 BLOCK_SIZE bsize,
1697 int64_t best_rd) {
1698 PREDICTION_MODE mode;
1699 PREDICTION_MODE mode_selected = DC_PRED;
1700 MACROBLOCKD *const xd = &x->e_mbd;
1701 MODE_INFO *const mic = xd->mi[0];
1702 int this_rate, this_rate_tokenonly, s;
1703 int64_t this_distortion, this_rd;
1704 TX_SIZE best_tx = TX_4X4;
hui sube3559b2015-10-07 09:29:02 -07001705#if CONFIG_EXT_INTRA
1706 EXT_INTRA_MODE_INFO ext_intra_mode_info;
hui su4aa50c12015-11-10 12:09:59 -08001707 int is_directional_mode, rate_overhead, best_angle_delta = 0;
hui sube3559b2015-10-07 09:29:02 -07001708#endif // CONFIG_EXT_INTRA
hui su3fa01292015-09-28 18:38:00 -07001709#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001710 TX_TYPE best_tx_type = DCT_DCT;
hui su3fa01292015-09-28 18:38:00 -07001711#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001712 int *bmode_costs;
hui su5d011cb2015-09-15 12:44:13 -07001713 PALETTE_MODE_INFO palette_mode_info;
hui suaaf6f622015-10-14 20:18:18 -07001714 uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
1715 x->palette_buffer->best_palette_color_map : NULL;
hui su5d011cb2015-09-15 12:44:13 -07001716 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1717 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1718 int palette_ctx = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001719 const MODE_INFO *above_mi = xd->above_mi;
1720 const MODE_INFO *left_mi = xd->left_mi;
1721 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
1722 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
1723 bmode_costs = cpi->y_mode_costs[A][L];
1724
hui sube3559b2015-10-07 09:29:02 -07001725#if CONFIG_EXT_INTRA
1726 ext_intra_mode_info.use_ext_intra_mode[0] = 0;
1727 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
hui su4aa50c12015-11-10 12:09:59 -08001728 mic->mbmi.angle_delta[0] = 0;
hui sube3559b2015-10-07 09:29:02 -07001729#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07001730 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07001731 palette_mode_info.palette_size[0] = 0;
1732 mic->mbmi.palette_mode_info.palette_size[0] = 0;
1733 if (above_mi)
1734 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1735 if (left_mi)
1736 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1737
Jingning Han3ee6db62015-08-05 19:00:31 -07001738 /* Y Search for intra prediction mode */
hui sube3559b2015-10-07 09:29:02 -07001739 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
Jingning Han3ee6db62015-08-05 19:00:31 -07001740 mic->mbmi.mode = mode;
hui su4aa50c12015-11-10 12:09:59 -08001741#if CONFIG_EXT_INTRA
1742 is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
1743 if (is_directional_mode) {
1744 rate_overhead = bmode_costs[mode] +
1745 write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
1746 this_rate_tokenonly = INT_MAX;
1747 this_rd =
1748 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly,
1749 &this_distortion, &s, bsize, rate_overhead,
1750 best_rd);
1751 } else {
1752 mic->mbmi.angle_delta[0] = 0;
1753 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1754 &s, NULL, bsize, best_rd);
1755 }
1756#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07001757 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
hui su4aa50c12015-11-10 12:09:59 -08001758 &s, NULL, bsize, best_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07001759
1760 if (this_rate_tokenonly == INT_MAX)
1761 continue;
1762
1763 this_rate = this_rate_tokenonly + bmode_costs[mode];
hui su5d011cb2015-09-15 12:44:13 -07001764 if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
1765 this_rate +=
1766 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1767 [palette_ctx], 0);
hui sube3559b2015-10-07 09:29:02 -07001768#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08001769 if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
hui sube3559b2015-10-07 09:29:02 -07001770 this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
hui su4aa50c12015-11-10 12:09:59 -08001771 if (is_directional_mode)
1772 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
1773 MAX_ANGLE_DELTAS +
1774 mic->mbmi.angle_delta[0]);
hui sube3559b2015-10-07 09:29:02 -07001775#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07001776 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1777
1778 if (this_rd < best_rd) {
1779 mode_selected = mode;
1780 best_rd = this_rd;
1781 best_tx = mic->mbmi.tx_size;
hui su4aa50c12015-11-10 12:09:59 -08001782#if CONFIG_EXT_INTRA
1783 best_angle_delta = mic->mbmi.angle_delta[0];
1784#endif // CONFIG_EXT_INTRA
hui su3fa01292015-09-28 18:38:00 -07001785#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001786 best_tx_type = mic->mbmi.tx_type;
hui su3fa01292015-09-28 18:38:00 -07001787#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001788 *rate = this_rate;
1789 *rate_tokenonly = this_rate_tokenonly;
1790 *distortion = this_distortion;
1791 *skippable = s;
1792 }
1793 }
1794
hui su5d011cb2015-09-15 12:44:13 -07001795 if (cpi->common.allow_screen_content_tools)
1796 rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
1797 &palette_mode_info, best_palette_color_map,
1798 &best_tx, &mode_selected, &best_rd);
1799
hui sube3559b2015-10-07 09:29:02 -07001800#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08001801 if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) {
hui sube3559b2015-10-07 09:29:02 -07001802 if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
1803 skippable, bsize, bmode_costs[DC_PRED],
1804 &best_rd)) {
1805 mode_selected = mic->mbmi.mode;
1806 best_tx = mic->mbmi.tx_size;
1807 ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
1808#if CONFIG_EXT_TX
1809 best_tx_type = mic->mbmi.tx_type;
1810#endif // CONFIG_EXT_TX
1811 }
1812 }
1813
1814 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
1815 ext_intra_mode_info.use_ext_intra_mode[0];
1816 if (ext_intra_mode_info.use_ext_intra_mode[0]) {
1817 mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
1818 ext_intra_mode_info.ext_intra_mode[0];
hui sube3559b2015-10-07 09:29:02 -07001819 }
1820#endif // CONFIG_EXT_INTRA
1821
Jingning Han3ee6db62015-08-05 19:00:31 -07001822 mic->mbmi.mode = mode_selected;
1823 mic->mbmi.tx_size = best_tx;
hui su4aa50c12015-11-10 12:09:59 -08001824#if CONFIG_EXT_INTRA
1825 mic->mbmi.angle_delta[0] = best_angle_delta;
1826#endif // CONFIG_EXT_INTRA
hui su3fa01292015-09-28 18:38:00 -07001827#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001828 mic->mbmi.tx_type = best_tx_type;
hui su3fa01292015-09-28 18:38:00 -07001829#endif // CONFIG_EXT_TX
hui su5d011cb2015-09-15 12:44:13 -07001830 mic->mbmi.palette_mode_info.palette_size[0] =
1831 palette_mode_info.palette_size[0];
1832 if (palette_mode_info.palette_size[0] > 0) {
1833 memcpy(mic->mbmi.palette_mode_info.palette_colors,
1834 palette_mode_info.palette_colors,
1835 PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
1836 memcpy(xd->plane[0].color_index_map, best_palette_color_map,
1837 rows * cols * sizeof(best_palette_color_map[0]));
1838 }
Jingning Han3ee6db62015-08-05 19:00:31 -07001839
1840 return best_rd;
1841}
1842
Jingning Hana8dad552015-10-08 16:46:10 -07001843#if CONFIG_VAR_TX
Jingning Han71c15602015-10-13 12:40:39 -07001844static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
Jingning Han2cdc1272015-10-09 09:57:42 -07001845 int blk_row, int blk_col, int plane, int block,
1846 int plane_bsize, int coeff_ctx,
1847 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1848 MACROBLOCKD *xd = &x->e_mbd;
1849 const struct macroblock_plane *const p = &x->plane[plane];
1850 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han71c15602015-10-13 12:40:39 -07001851#if CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001852 const int ss_txfrm_size = tx_size << 1;
1853 int64_t this_sse;
1854 int shift = tx_size == TX_32X32 ? 0 : 2;
1855 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han71c15602015-10-13 12:40:39 -07001856#endif
1857 unsigned int tmp_sse = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001858 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1859 PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
1860 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1861 const scan_order *const scan_order =
1862 get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
1863
Jingning Han71c15602015-10-13 12:40:39 -07001864 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
1865 int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
1866 int src_stride = p->src.stride;
1867 uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
1868 uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
1869 DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
1870
1871 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1872 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1873
1874 if (xd->mb_to_bottom_edge < 0)
1875 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1876 if (xd->mb_to_right_edge < 0)
1877 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1878
Jingning Han2cdc1272015-10-09 09:57:42 -07001879 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
1880
Jingning Han71c15602015-10-13 12:40:39 -07001881 vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
1882 NULL, 0, NULL, 0, bh, bh);
1883
1884 if (blk_row + (bh >> 2) > max_blocks_high ||
1885 blk_col + (bh >> 2) > max_blocks_wide) {
1886 int idx, idy;
1887 unsigned int this_sse;
1888 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1889 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1890 for (idy = 0; idy < blocks_height; idy += 2) {
1891 for (idx = 0; idx < blocks_width; idx += 2) {
1892 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1893 src_stride,
1894 rec_buffer + 4 * idy * 32 + 4 * idx,
1895 32, &this_sse);
1896 tmp_sse += this_sse;
1897 }
1898 }
1899 } else {
1900 cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
1901 }
1902
Jingning Han2cdc1272015-10-09 09:57:42 -07001903#if CONFIG_VP9_HIGHBITDEPTH
1904 *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
1905 &this_sse, xd->bd) >> shift;
Jingning Han2cdc1272015-10-09 09:57:42 -07001906 *bsse += this_sse >> shift;
Jingning Han71c15602015-10-13 12:40:39 -07001907#else
1908 *bsse += (int64_t)tmp_sse * 16;
1909
1910 if (p->eobs[block] > 0) {
Jingning Han71c15602015-10-13 12:40:39 -07001911 switch (tx_size) {
1912 case TX_32X32:
1913 vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
1914 tx_type);
1915 break;
1916 case TX_16X16:
1917 vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
1918 tx_type);
1919 break;
1920 case TX_8X8:
1921 vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
1922 tx_type);
1923 break;
1924 case TX_4X4:
1925 vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
1926 tx_type,
1927 xd->lossless[xd->mi[0]->mbmi.segment_id]);
1928 break;
1929 default:
1930 assert(0 && "Invalid transform size");
1931 break;
1932 }
1933
1934 if ((bh >> 2) + blk_col > max_blocks_wide ||
1935 (bh >> 2) + blk_row > max_blocks_high) {
1936 int idx, idy;
1937 unsigned int this_sse;
1938 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1939 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1940 tmp_sse = 0;
1941 for (idy = 0; idy < blocks_height; idy += 2) {
1942 for (idx = 0; idx < blocks_width; idx += 2) {
1943 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1944 src_stride,
1945 rec_buffer + 4 * idy * 32 + 4 * idx,
1946 32, &this_sse);
1947 tmp_sse += this_sse;
1948 }
1949 }
1950 } else {
1951 cpi->fn_ptr[txm_bsize].vf(src, src_stride,
1952 rec_buffer, 32, &tmp_sse);
1953 }
1954 }
1955 *dist += (int64_t)tmp_sse * 16;
1956#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001957
1958 *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
1959 scan_order->scan, scan_order->neighbors, 0);
1960 *skip &= (p->eobs[block] == 0);
1961}
1962
1963static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
1964 int blk_row, int blk_col, int plane, int block,
1965 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
Jingning Han2cdc1272015-10-09 09:57:42 -07001966 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
Jingning Han3edad6e2015-10-14 09:38:17 -07001967 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Jingning Han2cdc1272015-10-09 09:57:42 -07001968 int *rate, int64_t *dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001969 int64_t *bsse, int *skip,
1970 int64_t ref_best_rd, int *is_cost_valid) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001971 MACROBLOCKD *const xd = &x->e_mbd;
1972 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1973 struct macroblock_plane *const p = &x->plane[plane];
1974 struct macroblockd_plane *const pd = &xd->plane[plane];
1975 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1976 (blk_col >> (1 - pd->subsampling_x));
1977 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1978 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1979 int64_t this_rd = INT64_MAX;
Jingning Han2cdc1272015-10-09 09:57:42 -07001980 ENTROPY_CONTEXT *pta = ta + blk_col;
1981 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han3a279612015-10-12 19:20:58 -07001982 ENTROPY_CONTEXT stxa = 0, stxl = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001983 int coeff_ctx, i;
Jingning Han3edad6e2015-10-14 09:38:17 -07001984 int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
1985 tx_left + (blk_row >> 1), tx_size);
1986
Jingning Han3a279612015-10-12 19:20:58 -07001987 int64_t sum_dist = 0, sum_bsse = 0;
1988 int64_t sum_rd = INT64_MAX;
Jingning Han3edad6e2015-10-14 09:38:17 -07001989 int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
Jingning Han3a279612015-10-12 19:20:58 -07001990 int all_skip = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001991 int tmp_eob = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001992 int zero_blk_rate;
Jingning Han1e48f742015-10-13 11:59:49 -07001993
1994 if (ref_best_rd < 0) {
1995 *is_cost_valid = 0;
1996 return;
1997 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001998
1999 switch (tx_size) {
2000 case TX_4X4:
Jingning Han3a279612015-10-12 19:20:58 -07002001 stxa = pta[0];
2002 stxl = ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07002003 break;
2004 case TX_8X8:
Jingning Han3a279612015-10-12 19:20:58 -07002005 stxa = !!*(const uint16_t *)&pta[0];
2006 stxl = !!*(const uint16_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07002007 break;
2008 case TX_16X16:
Jingning Han3a279612015-10-12 19:20:58 -07002009 stxa = !!*(const uint32_t *)&pta[0];
2010 stxl = !!*(const uint32_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07002011 break;
2012 case TX_32X32:
Jingning Han3a279612015-10-12 19:20:58 -07002013 stxa = !!*(const uint64_t *)&pta[0];
2014 stxl = !!*(const uint64_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07002015 break;
2016 default:
2017 assert(0 && "Invalid transform size.");
2018 break;
2019 }
Jingning Han3a279612015-10-12 19:20:58 -07002020 coeff_ctx = combine_entropy_contexts(stxa, stxl);
Jingning Han2cdc1272015-10-09 09:57:42 -07002021
2022 if (xd->mb_to_bottom_edge < 0)
2023 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
2024 if (xd->mb_to_right_edge < 0)
2025 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
2026
2027 *rate = 0;
2028 *dist = 0;
2029 *bsse = 0;
2030 *skip = 1;
2031
2032 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
2033 return;
2034
Jingning Hanbfeac5e2015-10-15 23:11:30 -07002035 zero_blk_rate =
2036 x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
2037
Jingning Han1e48f742015-10-13 11:59:49 -07002038 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
2039 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han71c15602015-10-13 12:40:39 -07002040 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han1e48f742015-10-13 11:59:49 -07002041 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07002042
Jingning Han47c7fd92015-10-30 13:00:48 -07002043 if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
2044 RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) &&
Jingning Hanbfeac5e2015-10-15 23:11:30 -07002045 !xd->lossless[mbmi->segment_id]) {
2046 *rate = zero_blk_rate;
2047 *dist = *bsse;
2048 *skip = 1;
2049 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
2050 p->eobs[block] = 0;
2051 } else {
2052 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
2053 *skip = 0;
2054 }
2055
Jingning Han1e48f742015-10-13 11:59:49 -07002056 if (tx_size > TX_4X4)
Jingning Han3edad6e2015-10-14 09:38:17 -07002057 *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
Jingning Han1e48f742015-10-13 11:59:49 -07002058 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
2059 tmp_eob = p->eobs[block];
2060 }
2061
Jingning Han2cdc1272015-10-09 09:57:42 -07002062 if (tx_size > TX_4X4) {
2063 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Han3a279612015-10-12 19:20:58 -07002064 int bsl = b_height_log2_lookup[bsize];
Jingning Han2cdc1272015-10-09 09:57:42 -07002065 int sub_step = 1 << (2 * (tx_size - 1));
2066 int i;
Jingning Han3a279612015-10-12 19:20:58 -07002067 int this_rate;
2068 int64_t this_dist;
2069 int64_t this_bsse;
2070 int this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07002071 int this_cost_valid = 1;
2072 int64_t tmp_rd = 0;
Jingning Han3a279612015-10-12 19:20:58 -07002073
2074 --bsl;
Jingning Han236623c2015-10-26 19:39:30 -07002075 for (i = 0; i < 4 && this_cost_valid; ++i) {
Jingning Han3a279612015-10-12 19:20:58 -07002076 int offsetr = (i >> 1) << bsl;
2077 int offsetc = (i & 0x01) << bsl;
Jingning Han2cdc1272015-10-09 09:57:42 -07002078 select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
2079 plane, block + i * sub_step, tx_size - 1,
Jingning Han3edad6e2015-10-14 09:38:17 -07002080 plane_bsize, ta, tl, tx_above, tx_left,
2081 &this_rate, &this_dist,
Jingning Han1e48f742015-10-13 11:59:49 -07002082 &this_bsse, &this_skip,
2083 ref_best_rd - tmp_rd, &this_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07002084 sum_rate += this_rate;
2085 sum_dist += this_dist;
2086 sum_bsse += this_bsse;
2087 all_skip &= this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07002088 tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
2089 if (this_rd < tmp_rd)
2090 break;
Jingning Han2cdc1272015-10-09 09:57:42 -07002091 }
Jingning Han1e48f742015-10-13 11:59:49 -07002092 if (this_cost_valid)
2093 sum_rd = tmp_rd;
Jingning Han3a279612015-10-12 19:20:58 -07002094 }
2095
2096 if (this_rd < sum_rd) {
Jingning Han79fe7242015-10-23 14:27:21 -07002097 int idx, idy;
Jingning Han3a279612015-10-12 19:20:58 -07002098 for (i = 0; i < (1 << tx_size); ++i)
Jingning Han1e48f742015-10-13 11:59:49 -07002099 pta[i] = ptl[i] = !(tmp_eob == 0);
Jingning Han3edad6e2015-10-14 09:38:17 -07002100 txfm_partition_update(tx_above + (blk_col >> 1),
2101 tx_left + (blk_row >> 1), tx_size);
Jingning Han1e48f742015-10-13 11:59:49 -07002102 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han79fe7242015-10-23 14:27:21 -07002103
2104 for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
2105 for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
2106 mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
Jingning Han3a279612015-10-12 19:20:58 -07002107 mbmi->tx_size = tx_size;
Jingning Han236623c2015-10-26 19:39:30 -07002108 if (this_rd == INT64_MAX)
2109 *is_cost_valid = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07002110 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
Jingning Han3a279612015-10-12 19:20:58 -07002111 } else {
2112 *rate = sum_rate;
2113 *dist = sum_dist;
2114 *bsse = sum_bsse;
2115 *skip = all_skip;
Jingning Han236623c2015-10-26 19:39:30 -07002116 if (sum_rd == INT64_MAX)
2117 *is_cost_valid = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07002118 }
2119}
2120
2121static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
2122 int *rate, int64_t *distortion, int *skippable,
2123 int64_t *sse, BLOCK_SIZE bsize,
2124 int64_t ref_best_rd) {
2125 MACROBLOCKD *const xd = &x->e_mbd;
2126 int is_cost_valid = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07002127 int64_t this_rd = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07002128
2129 if (ref_best_rd < 0)
2130 is_cost_valid = 0;
2131
2132 *rate = 0;
2133 *distortion = 0;
2134 *sse = 0;
2135 *skippable = 1;
2136
2137 if (is_cost_valid) {
2138 const struct macroblockd_plane *const pd = &xd->plane[0];
2139 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2140 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
2141 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
2142 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
2143 int bh = num_4x4_blocks_wide_lookup[txb_size];
2144 int idx, idy;
2145 int block = 0;
2146 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
2147 ENTROPY_CONTEXT ctxa[16], ctxl[16];
Jingning Han3edad6e2015-10-14 09:38:17 -07002148 TXFM_CONTEXT tx_above[8], tx_left[8];
Jingning Han2cdc1272015-10-09 09:57:42 -07002149
2150 int pnrate = 0, pnskip = 1;
2151 int64_t pndist = 0, pnsse = 0;
2152
2153 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
Jingning Han3edad6e2015-10-14 09:38:17 -07002154 memcpy(tx_above, xd->above_txfm_context,
2155 sizeof(TXFM_CONTEXT) * (mi_width >> 1));
2156 memcpy(tx_left, xd->left_txfm_context,
2157 sizeof(TXFM_CONTEXT) * (mi_height >> 1));
Jingning Han2cdc1272015-10-09 09:57:42 -07002158
2159 for (idy = 0; idy < mi_height; idy += bh) {
2160 for (idx = 0; idx < mi_width; idx += bh) {
2161 select_tx_block(cpi, x, idy, idx, 0, block,
Jingning Han3a279612015-10-12 19:20:58 -07002162 max_txsize_lookup[plane_bsize], plane_bsize,
Jingning Han3edad6e2015-10-14 09:38:17 -07002163 ctxa, ctxl, tx_above, tx_left,
2164 &pnrate, &pndist, &pnsse, &pnskip,
Jingning Han1e48f742015-10-13 11:59:49 -07002165 ref_best_rd - this_rd, &is_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07002166 *rate += pnrate;
2167 *distortion += pndist;
2168 *sse += pnsse;
2169 *skippable &= pnskip;
Jingning Han1e48f742015-10-13 11:59:49 -07002170 this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
2171 RDCOST(x->rdmult, x->rddiv, 0, pnsse));
Jingning Han2cdc1272015-10-09 09:57:42 -07002172 block += step;
2173 }
2174 }
2175 }
2176
2177 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2178 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2179 if (this_rd > ref_best_rd)
2180 is_cost_valid = 0;
2181
2182 if (!is_cost_valid) {
2183 // reset cost value
2184 *rate = INT_MAX;
2185 *distortion = INT64_MAX;
2186 *sse = INT64_MAX;
2187 *skippable = 0;
2188 }
2189}
2190
Jingning Han4b594d32015-11-02 12:05:47 -08002191#if CONFIG_EXT_TX
2192static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
2193 int *rate, int64_t *distortion, int *skippable,
2194 int64_t *sse, BLOCK_SIZE bsize,
2195 int64_t ref_best_rd) {
2196 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
2197 const VP10_COMMON *const cm = &cpi->common;
2198 MACROBLOCKD *const xd = &x->e_mbd;
2199 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2200 int64_t rd = INT64_MAX;
2201 int64_t best_rd = INT64_MAX;
2202 TX_TYPE tx_type, best_tx_type = DCT_DCT;
2203 int ext_tx_set;
2204 const int is_inter = is_inter_block(mbmi);
2205 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
2206 int s0 = vp10_cost_bit(skip_prob, 0);
2207 int s1 = vp10_cost_bit(skip_prob, 1);
Jingning Han696ee002015-11-03 08:56:47 -08002208 TX_SIZE best_tx_size[64];
Jingning Han493d0232015-11-03 12:59:24 -08002209 TX_SIZE best_tx = TX_SIZES;
2210 uint8_t best_blk_skip[256];
2211 const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
Jingning Han696ee002015-11-03 08:56:47 -08002212 int idx, idy;
Jingning Han4b594d32015-11-02 12:05:47 -08002213
2214 *distortion = INT64_MAX;
2215 *rate = INT_MAX;
2216 *skippable = 0;
2217 *sse = INT64_MAX;
2218
Jingning Han696ee002015-11-03 08:56:47 -08002219 ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
2220
Jingning Han4b594d32015-11-02 12:05:47 -08002221 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
2222 int this_rate = 0;
2223 int this_skip = 1;
2224 int64_t this_dist = 0;
2225 int64_t this_sse = 0;
2226
Jingning Han4b594d32015-11-02 12:05:47 -08002227 if (is_inter) {
2228 if (!ext_tx_used_inter[ext_tx_set][tx_type])
2229 continue;
2230 } else {
2231 if (!ext_tx_used_intra[ext_tx_set][tx_type])
2232 continue;
2233 }
2234
2235 mbmi->tx_type = tx_type;
2236
2237 if (ext_tx_set == 1 &&
2238 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
2239 best_tx_type == DCT_DCT) {
2240 tx_type = IDTX - 1;
2241 break;
2242 }
2243
2244 inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
2245 bsize, ref_best_rd);
2246
2247 if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
2248 !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2249 this_rate != INT_MAX) {
2250 if (is_inter) {
2251 if (ext_tx_set > 0)
2252 this_rate += cpi->inter_tx_type_costs[ext_tx_set]
Jingning Han696ee002015-11-03 08:56:47 -08002253 [max_tx_size][mbmi->tx_type];
Jingning Han4b594d32015-11-02 12:05:47 -08002254 } else {
2255 if (ext_tx_set > 0)
Jingning Han696ee002015-11-03 08:56:47 -08002256 this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
Jingning Han4b594d32015-11-02 12:05:47 -08002257 [mbmi->mode][mbmi->tx_type];
2258 }
2259 }
2260
2261 if (this_rate == INT_MAX)
2262 continue;
2263
2264 if (this_skip)
2265 rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
2266 else
2267 rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
2268
2269 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
2270 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
2271
2272 if (rd <
2273 (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
2274 best_rd) {
2275 best_rd = rd;
2276 *distortion = this_dist;
2277 *rate = this_rate;
2278 *skippable = this_skip;
2279 *sse = this_sse;
2280 best_tx_type = mbmi->tx_type;
Jingning Han493d0232015-11-03 12:59:24 -08002281 best_tx = mbmi->tx_size;
2282 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Jingning Han696ee002015-11-03 08:56:47 -08002283 for (idy = 0; idy < xd->n8_h; ++idy)
2284 for (idx = 0; idx < xd->n8_w; ++idx)
2285 best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
Jingning Han4b594d32015-11-02 12:05:47 -08002286 }
2287 }
2288
2289 mbmi->tx_type = best_tx_type;
Jingning Han696ee002015-11-03 08:56:47 -08002290 for (idy = 0; idy < xd->n8_h; ++idy)
2291 for (idx = 0; idx < xd->n8_w; ++idx)
2292 mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
Jingning Han493d0232015-11-03 12:59:24 -08002293 mbmi->tx_size = best_tx;
2294 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Jingning Han4b594d32015-11-02 12:05:47 -08002295}
2296#endif
2297
Jingning Hana8dad552015-10-08 16:46:10 -07002298static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
2299 int blk_row, int blk_col, int plane, int block,
2300 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
2301 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
2302 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
2303 MACROBLOCKD *const xd = &x->e_mbd;
2304 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Han2cdc1272015-10-09 09:57:42 -07002305 struct macroblock_plane *const p = &x->plane[plane];
Jingning Hana8dad552015-10-08 16:46:10 -07002306 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han2cdc1272015-10-09 09:57:42 -07002307 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Hana8dad552015-10-08 16:46:10 -07002308 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
2309 (blk_col >> (1 - pd->subsampling_x));
2310 TX_SIZE plane_tx_size = plane ?
Jingning Han2cdc1272015-10-09 09:57:42 -07002311 get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
2312 0, 0) :
Jingning Hana8dad552015-10-08 16:46:10 -07002313 mbmi->inter_tx_size[tx_idx];
2314
2315 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
2316 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
2317
2318 if (xd->mb_to_bottom_edge < 0)
2319 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
2320 if (xd->mb_to_right_edge < 0)
2321 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
2322
2323 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
2324 return;
2325
2326 if (tx_size == plane_tx_size) {
Jingning Han2cdc1272015-10-09 09:57:42 -07002327 int coeff_ctx, i;
Jingning Hana8dad552015-10-08 16:46:10 -07002328 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
Jingning Han2cdc1272015-10-09 09:57:42 -07002329 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Jingning Hana8dad552015-10-08 16:46:10 -07002330 switch (tx_size) {
2331 case TX_4X4:
2332 break;
2333 case TX_8X8:
2334 ta[0] = !!*(const uint16_t *)&ta[0];
2335 tl[0] = !!*(const uint16_t *)&tl[0];
2336 break;
2337 case TX_16X16:
2338 ta[0] = !!*(const uint32_t *)&ta[0];
2339 tl[0] = !!*(const uint32_t *)&tl[0];
2340 break;
2341 case TX_32X32:
2342 ta[0] = !!*(const uint64_t *)&ta[0];
2343 tl[0] = !!*(const uint64_t *)&tl[0];
2344 break;
2345 default:
2346 assert(0 && "Invalid transform size.");
2347 break;
2348 }
Jingning Han2cdc1272015-10-09 09:57:42 -07002349 coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
Jingning Han71c15602015-10-13 12:40:39 -07002350 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han2cdc1272015-10-09 09:57:42 -07002351 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hana8dad552015-10-08 16:46:10 -07002352 for (i = 0; i < (1 << tx_size); ++i) {
Jingning Han2cdc1272015-10-09 09:57:42 -07002353 ta[i] = !(p->eobs[block] == 0);
2354 tl[i] = !(p->eobs[block] == 0);
Jingning Hana8dad552015-10-08 16:46:10 -07002355 }
Jingning Hana8dad552015-10-08 16:46:10 -07002356 } else {
Jingning Hana8dad552015-10-08 16:46:10 -07002357 int bsl = b_width_log2_lookup[bsize];
2358 int step = 1 << (2 * (tx_size - 1));
2359 int i;
2360
2361 assert(bsl > 0);
2362 --bsl;
2363
2364 for (i = 0; i < 4; ++i) {
2365 int offsetr = (i >> 1) << bsl;
2366 int offsetc = (i & 0x01) << bsl;
2367 tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
2368 block + i * step, tx_size - 1, plane_bsize,
2369 above_ctx, left_ctx, rate, dist, bsse, skip);
2370 }
2371 }
2372}
2373
2374// Return value 0: early termination triggered, no valid rd cost available;
2375// 1: rd cost values are valid.
2376static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
2377 int *rate, int64_t *distortion, int *skippable,
2378 int64_t *sse, BLOCK_SIZE bsize,
2379 int64_t ref_best_rd) {
2380 MACROBLOCKD *const xd = &x->e_mbd;
2381 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2382 int plane;
2383 int is_cost_valid = 1;
2384 int64_t this_rd;
2385
2386 if (ref_best_rd < 0)
2387 is_cost_valid = 0;
2388
2389 if (is_inter_block(mbmi) && is_cost_valid) {
2390 int plane;
2391 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2392 vp10_subtract_plane(x, bsize, plane);
2393 }
2394
2395 *rate = 0;
2396 *distortion = 0;
2397 *sse = 0;
2398 *skippable = 1;
2399
2400 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
2401 const struct macroblockd_plane *const pd = &xd->plane[plane];
2402 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2403 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
2404 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
2405 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
2406 int bh = num_4x4_blocks_wide_lookup[txb_size];
2407 int idx, idy;
2408 int block = 0;
2409 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
2410 int pnrate = 0, pnskip = 1;
2411 int64_t pndist = 0, pnsse = 0;
2412 ENTROPY_CONTEXT ta[16], tl[16];
2413
2414 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
2415
2416 for (idy = 0; idy < mi_height; idy += bh) {
2417 for (idx = 0; idx < mi_width; idx += bh) {
2418 tx_block_rd(cpi, x, idy, idx, plane, block,
2419 max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
2420 &pnrate, &pndist, &pnsse, &pnskip);
2421 block += step;
2422 }
2423 }
2424
2425 if (pnrate == INT_MAX) {
2426 is_cost_valid = 0;
2427 break;
2428 }
2429
2430 *rate += pnrate;
2431 *distortion += pndist;
2432 *sse += pnsse;
2433 *skippable &= pnskip;
2434
2435 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2436 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2437
2438 if (this_rd > ref_best_rd) {
2439 is_cost_valid = 0;
2440 break;
2441 }
2442 }
2443
2444 if (!is_cost_valid) {
2445 // reset cost value
2446 *rate = INT_MAX;
2447 *distortion = INT64_MAX;
2448 *sse = INT64_MAX;
2449 *skippable = 0;
2450 }
2451
2452 return is_cost_valid;
2453}
2454#endif
2455
Jingning Han3ee6db62015-08-05 19:00:31 -07002456// Return value 0: early termination triggered, no valid rd cost available;
2457// 1: rd cost values are valid.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002458static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002459 int *rate, int64_t *distortion, int *skippable,
2460 int64_t *sse, BLOCK_SIZE bsize,
2461 int64_t ref_best_rd) {
2462 MACROBLOCKD *const xd = &x->e_mbd;
2463 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2464 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
2465 int plane;
2466 int pnrate = 0, pnskip = 1;
2467 int64_t pndist = 0, pnsse = 0;
2468 int is_cost_valid = 1;
2469
2470 if (ref_best_rd < 0)
2471 is_cost_valid = 0;
2472
2473 if (is_inter_block(mbmi) && is_cost_valid) {
2474 int plane;
2475 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2476 vp10_subtract_plane(x, bsize, plane);
2477 }
2478
2479 *rate = 0;
2480 *distortion = 0;
2481 *sse = 0;
2482 *skippable = 1;
2483
2484 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Jingning Han71c15602015-10-13 12:40:39 -07002485 txfm_rd_in_plane(x,
2486#if CONFIG_VAR_TX
2487 cpi,
2488#endif
2489 &pnrate, &pndist, &pnskip, &pnsse,
Jingning Han3ee6db62015-08-05 19:00:31 -07002490 ref_best_rd, plane, bsize, uv_tx_size,
2491 cpi->sf.use_fast_coef_costing);
2492 if (pnrate == INT_MAX) {
2493 is_cost_valid = 0;
2494 break;
2495 }
2496 *rate += pnrate;
2497 *distortion += pndist;
2498 *sse += pnsse;
2499 *skippable &= pnskip;
2500 }
2501
2502 if (!is_cost_valid) {
2503 // reset cost value
2504 *rate = INT_MAX;
2505 *distortion = INT64_MAX;
2506 *sse = INT64_MAX;
2507 *skippable = 0;
2508 }
2509
2510 return is_cost_valid;
2511}
2512
hui sube3559b2015-10-07 09:29:02 -07002513#if CONFIG_EXT_INTRA
2514// Return 1 if an ext intra mode is selected; return 0 otherwise.
2515static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
2516 PICK_MODE_CONTEXT *ctx,
2517 int *rate, int *rate_tokenonly,
2518 int64_t *distortion, int *skippable,
2519 BLOCK_SIZE bsize, int64_t *best_rd) {
2520 MACROBLOCKD *const xd = &x->e_mbd;
2521 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2522 int ext_intra_selected_flag = 0;
2523 int this_rate_tokenonly, this_rate, s;
hui su4aa50c12015-11-10 12:09:59 -08002524 int64_t this_distortion, this_sse, this_rd;
hui sube3559b2015-10-07 09:29:02 -07002525 EXT_INTRA_MODE mode;
hui sube3559b2015-10-07 09:29:02 -07002526 EXT_INTRA_MODE_INFO ext_intra_mode_info;
2527
2528 vp10_zero(ext_intra_mode_info);
2529 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
2530 mbmi->uv_mode = DC_PRED;
2531
hui su4aa50c12015-11-10 12:09:59 -08002532 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
2533 mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
2534 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2535 &this_distortion, &s, &this_sse, bsize, *best_rd))
2536 continue;
hui sube3559b2015-10-07 09:29:02 -07002537
hui su4aa50c12015-11-10 12:09:59 -08002538 this_rate = this_rate_tokenonly +
2539 vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
2540 cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
2541 write_uniform_cost(FILTER_INTRA_MODES, mode);
2542 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2543 if (this_rd < *best_rd) {
2544 *best_rd = this_rd;
2545 *rate = this_rate;
2546 *rate_tokenonly = this_rate_tokenonly;
2547 *distortion = this_distortion;
2548 *skippable = s;
2549 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2550 ext_intra_selected_flag = 1;
2551 if (!x->select_tx_size)
2552 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
hui sube3559b2015-10-07 09:29:02 -07002553 }
2554 }
2555
hui sube3559b2015-10-07 09:29:02 -07002556
2557 if (ext_intra_selected_flag) {
2558 mbmi->uv_mode = DC_PRED;
2559 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
2560 ext_intra_mode_info.use_ext_intra_mode[1];
2561 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
2562 ext_intra_mode_info.ext_intra_mode[1];
hui sube3559b2015-10-07 09:29:02 -07002563 return 1;
2564 } else {
2565 return 0;
2566 }
2567}
hui su4aa50c12015-11-10 12:09:59 -08002568
2569static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
2570 PICK_MODE_CONTEXT *ctx,
2571 int *rate, int *rate_tokenonly,
2572 int64_t *distortion, int *skippable,
2573 BLOCK_SIZE bsize, int rate_overhead,
2574 int64_t best_rd) {
2575 MACROBLOCKD *const xd = &x->e_mbd;
2576 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2577 int this_rate_tokenonly, this_rate, s;
2578 int64_t this_distortion, this_sse, this_rd;
2579 int angle_delta, best_angle_delta = 0;
2580 const double rd_adjust = 1.2;
2581
2582 (void)ctx;
2583 *rate_tokenonly = INT_MAX;
2584 if (ANGLE_FAST_SEARCH) {
2585 int deltas_level1[3] = {0, -2, 2};
2586 int deltas_level2[3][2] = {
2587 {-1, 1}, {-3, -1}, {1, 3},
2588 };
2589 const int level1 = 3, level2 = 2;
2590 int i, j, best_i = -1;
2591
2592 for (i = 0; i < level1; ++i) {
2593 mbmi->angle_delta[1] = deltas_level1[i];
2594 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2595 &this_distortion, &s, &this_sse, bsize,
2596 (i == 0 && best_rd < INT64_MAX) ?
2597 best_rd * rd_adjust : best_rd)) {
2598 if (i == 0)
2599 break;
2600 else
2601 continue;
2602 }
2603 this_rate = this_rate_tokenonly + rate_overhead;
2604 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2605 if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
2606 break;
2607 if (this_rd < best_rd) {
2608 best_i = i;
2609 best_rd = this_rd;
2610 best_angle_delta = mbmi->angle_delta[1];
2611 *rate = this_rate;
2612 *rate_tokenonly = this_rate_tokenonly;
2613 *distortion = this_distortion;
2614 *skippable = s;
2615 }
2616 }
2617
2618 if (best_i >= 0) {
2619 for (j = 0; j < level2; ++j) {
2620 mbmi->angle_delta[1] = deltas_level2[best_i][j];
2621 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2622 &this_distortion, &s, &this_sse, bsize, best_rd))
2623 continue;
2624 this_rate = this_rate_tokenonly + rate_overhead;
2625 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2626 if (this_rd < best_rd) {
2627 best_rd = this_rd;
2628 best_angle_delta = mbmi->angle_delta[1];
2629 *rate = this_rate;
2630 *rate_tokenonly = this_rate_tokenonly;
2631 *distortion = this_distortion;
2632 *skippable = s;
2633 }
2634 }
2635 }
2636 } else {
2637 for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
2638 ++angle_delta) {
2639 mbmi->angle_delta[1] = angle_delta;
2640 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2641 &this_distortion, &s, &this_sse, bsize, best_rd))
2642 continue;
2643 this_rate = this_rate_tokenonly + rate_overhead;
2644 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2645 if (this_rd < best_rd) {
2646 best_rd = this_rd;
2647 best_angle_delta = mbmi->angle_delta[1];
2648 *rate = this_rate;
2649 *rate_tokenonly = this_rate_tokenonly;
2650 *distortion = this_distortion;
2651 *skippable = s;
2652 }
2653 }
2654 }
2655
2656 mbmi->angle_delta[1] = best_angle_delta;
2657 if (*rate_tokenonly != INT_MAX)
2658 super_block_uvrd(cpi, x, &this_rate_tokenonly,
2659 &this_distortion, &s, &this_sse, bsize, INT_MAX);
2660 return *rate_tokenonly != INT_MAX;
2661}
hui sube3559b2015-10-07 09:29:02 -07002662#endif // CONFIG_EXT_INTRA
2663
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002664static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002665 PICK_MODE_CONTEXT *ctx,
2666 int *rate, int *rate_tokenonly,
2667 int64_t *distortion, int *skippable,
2668 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
2669 MACROBLOCKD *xd = &x->e_mbd;
hui sube3559b2015-10-07 09:29:02 -07002670 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Jingning Han3ee6db62015-08-05 19:00:31 -07002671 PREDICTION_MODE mode;
2672 PREDICTION_MODE mode_selected = DC_PRED;
2673 int64_t best_rd = INT64_MAX, this_rd;
2674 int this_rate_tokenonly, this_rate, s;
2675 int64_t this_distortion, this_sse;
hui sube3559b2015-10-07 09:29:02 -07002676#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08002677 int is_directional_mode, rate_overhead, best_angle_delta = 0;
hui sube3559b2015-10-07 09:29:02 -07002678 EXT_INTRA_MODE_INFO ext_intra_mode_info;
Jingning Han3ee6db62015-08-05 19:00:31 -07002679
hui sube3559b2015-10-07 09:29:02 -07002680 ext_intra_mode_info.use_ext_intra_mode[1] = 0;
2681 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
2682#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07002683 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07002684 xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07002685 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2686 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
2687 continue;
2688
hui sube3559b2015-10-07 09:29:02 -07002689 mbmi->uv_mode = mode;
hui su4aa50c12015-11-10 12:09:59 -08002690#if CONFIG_EXT_INTRA
2691 is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
2692 rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
2693 write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
2694 mbmi->angle_delta[1] = 0;
2695 if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) {
2696 if (!rd_pick_intra_angle_sbuv(cpi, x, ctx, &this_rate,
2697 &this_rate_tokenonly, &this_distortion, &s,
2698 bsize, rate_overhead, best_rd))
2699 continue;
2700 } else {
2701 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2702 &this_distortion, &s, &this_sse, bsize, best_rd))
2703 continue;
2704 }
2705 this_rate = this_rate_tokenonly +
2706 cpi->intra_uv_mode_cost[mbmi->mode][mode];
2707 if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
2708 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
2709 MAX_ANGLE_DELTAS +
2710 mbmi->angle_delta[1]);
2711 if (mode == DC_PRED && 0)
2712 this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
2713#else
Jingning Han3ee6db62015-08-05 19:00:31 -07002714 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2715 &this_distortion, &s, &this_sse, bsize, best_rd))
2716 continue;
hui su6ab6ac42015-11-06 13:56:51 -08002717 this_rate = this_rate_tokenonly +
2718 cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode];
hui sube3559b2015-10-07 09:29:02 -07002719#endif // CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08002720
Jingning Han3ee6db62015-08-05 19:00:31 -07002721 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2722
2723 if (this_rd < best_rd) {
2724 mode_selected = mode;
hui su4aa50c12015-11-10 12:09:59 -08002725#if CONFIG_EXT_INTRA
2726 best_angle_delta = mbmi->angle_delta[1];
2727#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07002728 best_rd = this_rd;
2729 *rate = this_rate;
2730 *rate_tokenonly = this_rate_tokenonly;
2731 *distortion = this_distortion;
2732 *skippable = s;
2733 if (!x->select_tx_size)
2734 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2735 }
2736 }
2737
hui sube3559b2015-10-07 09:29:02 -07002738#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08002739 if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) {
hui sube3559b2015-10-07 09:29:02 -07002740 if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
2741 skippable, bsize, &best_rd)) {
2742 mode_selected = mbmi->uv_mode;
2743 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2744 }
2745 }
2746
2747 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
2748 ext_intra_mode_info.use_ext_intra_mode[1];
2749 if (ext_intra_mode_info.use_ext_intra_mode[1])
2750 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
2751 ext_intra_mode_info.ext_intra_mode[1];
hui su4aa50c12015-11-10 12:09:59 -08002752 mbmi->angle_delta[1] = best_angle_delta;
hui sube3559b2015-10-07 09:29:02 -07002753#endif // CONFIG_EXT_INTRA
2754 mbmi->uv_mode = mode_selected;
Jingning Han3ee6db62015-08-05 19:00:31 -07002755 return best_rd;
2756}
2757
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002758static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002759 int *rate, int *rate_tokenonly,
2760 int64_t *distortion, int *skippable,
2761 BLOCK_SIZE bsize) {
Jingning Han3ee6db62015-08-05 19:00:31 -07002762 int64_t unused;
2763
2764 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
2765 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
2766 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
2767 skippable, &unused, bsize, INT64_MAX);
hui su6ab6ac42015-11-06 13:56:51 -08002768 *rate = *rate_tokenonly +
2769 cpi->intra_uv_mode_cost[x->e_mbd.mi[0]->mbmi.mode][DC_PRED];
Jingning Han3ee6db62015-08-05 19:00:31 -07002770 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
2771}
2772
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002773static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002774 PICK_MODE_CONTEXT *ctx,
2775 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
2776 int *rate_uv, int *rate_uv_tokenonly,
2777 int64_t *dist_uv, int *skip_uv,
2778 PREDICTION_MODE *mode_uv) {
2779 // Use an estimated rd for uv_intra based on DC_PRED if the
2780 // appropriate speed flag is set.
2781 if (cpi->sf.use_uv_intra_rd_estimate) {
2782 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
2783 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
2784 // Else do a proper rd search for each possible transform size that may
2785 // be considered in the main rd loop.
2786 } else {
2787 rd_pick_intra_sbuv_mode(cpi, x, ctx,
2788 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
2789 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
2790 }
2791 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
2792}
2793
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002794static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
Jingning Han3ee6db62015-08-05 19:00:31 -07002795 int mode_context) {
2796 assert(is_inter_mode(mode));
2797 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
2798}
2799
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002800static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
Jingning Han3ee6db62015-08-05 19:00:31 -07002801 int i,
2802 PREDICTION_MODE mode, int_mv this_mv[2],
2803 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
2804 int_mv seg_mvs[MAX_REF_FRAMES],
2805 int_mv *best_ref_mv[2], const int *mvjcost,
2806 int *mvcost[2]) {
2807 MODE_INFO *const mic = xd->mi[0];
2808 const MB_MODE_INFO *const mbmi = &mic->mbmi;
2809 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2810 int thismvcost = 0;
2811 int idx, idy;
2812 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
2813 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
2814 const int is_compound = has_second_ref(mbmi);
2815
2816 switch (mode) {
2817 case NEWMV:
2818 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
2819 thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
2820 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2821 if (is_compound) {
2822 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
2823 thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
2824 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2825 }
2826 break;
2827 case NEARMV:
2828 case NEARESTMV:
2829 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
2830 if (is_compound)
2831 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
2832 break;
2833 case ZEROMV:
2834 this_mv[0].as_int = 0;
2835 if (is_compound)
2836 this_mv[1].as_int = 0;
2837 break;
2838 default:
2839 break;
2840 }
2841
2842 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
2843 if (is_compound)
2844 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
2845
2846 mic->bmi[i].as_mode = mode;
2847
2848 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
2849 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
2850 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
2851
2852 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
2853 thismvcost;
2854}
2855
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002856static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07002857 MACROBLOCK *x,
2858 int64_t best_yrd,
2859 int i,
2860 int *labelyrate,
2861 int64_t *distortion, int64_t *sse,
2862 ENTROPY_CONTEXT *ta,
2863 ENTROPY_CONTEXT *tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002864 int ir, int ic,
Jingning Han3ee6db62015-08-05 19:00:31 -07002865 int mi_row, int mi_col) {
2866 int k;
2867 MACROBLOCKD *xd = &x->e_mbd;
2868 struct macroblockd_plane *const pd = &xd->plane[0];
2869 struct macroblock_plane *const p = &x->plane[0];
2870 MODE_INFO *const mi = xd->mi[0];
2871 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
2872 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
2873 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
2874 int idx, idy;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002875 void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
Jingning Han3ee6db62015-08-05 19:00:31 -07002876
2877 const uint8_t *const src =
2878 &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
2879 uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2880 pd->dst.stride)];
2881 int64_t thisdistortion = 0, thissse = 0;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002882 int thisrate = 0;
hui sub3cc3a02015-08-24 14:37:54 -07002883 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07002884 const scan_order *so = get_scan(TX_4X4, tx_type, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07002885
Yaowu Xu7c514e22015-09-28 15:55:46 -07002886 vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
2887
Jingning Han3ee6db62015-08-05 19:00:31 -07002888#if CONFIG_VP9_HIGHBITDEPTH
2889 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002890 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_highbd_fwht4x4
2891 : vpx_highbd_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002892 } else {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002893 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002894 }
2895#else
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002896 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002897#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han3ee6db62015-08-05 19:00:31 -07002898
2899#if CONFIG_VP9_HIGHBITDEPTH
2900 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2901 vpx_highbd_subtract_block(
2902 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2903 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
2904 } else {
2905 vpx_subtract_block(
2906 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2907 8, src, p->src.stride, dst, pd->dst.stride);
2908 }
2909#else
2910 vpx_subtract_block(height, width,
2911 vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2912 8, src, p->src.stride, dst, pd->dst.stride);
2913#endif // CONFIG_VP9_HIGHBITDEPTH
2914
2915 k = i;
2916 for (idy = 0; idy < height / 4; ++idy) {
2917 for (idx = 0; idx < width / 4; ++idx) {
2918 int64_t ssz, rd, rd1, rd2;
2919 tran_low_t* coeff;
Jingning Han2cdc1272015-10-09 09:57:42 -07002920#if CONFIG_VAR_TX
2921 int coeff_ctx;
2922#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002923 k += (idy * 2 + idx);
Jingning Han2cdc1272015-10-09 09:57:42 -07002924#if CONFIG_VAR_TX
2925 coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
2926 *(tl + (k >> 1)));
2927#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002928 coeff = BLOCK_OFFSET(p->coeff, k);
Yaowu Xu7c514e22015-09-28 15:55:46 -07002929 fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
2930 coeff, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002931 vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
2932#if CONFIG_VP9_HIGHBITDEPTH
2933 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2934 thisdistortion += vp10_highbd_block_error(coeff,
2935 BLOCK_OFFSET(pd->dqcoeff, k),
2936 16, &ssz, xd->bd);
2937 } else {
2938 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2939 16, &ssz);
2940 }
2941#else
2942 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2943 16, &ssz);
2944#endif // CONFIG_VP9_HIGHBITDEPTH
2945 thissse += ssz;
Jingning Han2cdc1272015-10-09 09:57:42 -07002946#if CONFIG_VAR_TX
2947 thisrate += cost_coeffs(x, 0, k, coeff_ctx,
2948 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07002949 so->scan, so->neighbors,
2950 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07002951 *(ta + (k & 1)) = !(p->eobs[k] == 0);
2952 *(tl + (k >> 1)) = !(p->eobs[k] == 0);
2953#else
2954 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
2955 TX_4X4,
2956 so->scan, so->neighbors,
2957 cpi->sf.use_fast_coef_costing);
2958#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002959 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
2960 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
James Zern5e16d392015-08-17 18:19:22 -07002961 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -07002962 if (rd >= best_yrd)
2963 return INT64_MAX;
2964 }
2965 }
2966
2967 *distortion = thisdistortion >> 2;
2968 *labelyrate = thisrate;
2969 *sse = thissse >> 2;
2970
2971 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
2972}
2973
2974typedef struct {
2975 int eobs;
2976 int brate;
2977 int byrate;
2978 int64_t bdist;
2979 int64_t bsse;
2980 int64_t brdcost;
2981 int_mv mvs[2];
2982 ENTROPY_CONTEXT ta[2];
2983 ENTROPY_CONTEXT tl[2];
2984} SEG_RDSTAT;
2985
2986typedef struct {
2987 int_mv *ref_mv[2];
2988 int_mv mvp;
2989
2990 int64_t segment_rd;
2991 int r;
2992 int64_t d;
2993 int64_t sse;
2994 int segment_yrate;
2995 PREDICTION_MODE modes[4];
2996 SEG_RDSTAT rdstat[4][INTER_MODES];
2997 int mvthresh;
2998} BEST_SEG_INFO;
2999
3000static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
3001 return (mv->row >> 3) < x->mv_row_min ||
3002 (mv->row >> 3) > x->mv_row_max ||
3003 (mv->col >> 3) < x->mv_col_min ||
3004 (mv->col >> 3) > x->mv_col_max;
3005}
3006
3007static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
3008 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
3009 struct macroblock_plane *const p = &x->plane[0];
3010 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
3011
3012 p->src.buf = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i,
3013 p->src.stride)];
3014 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
3015 pd->pre[0].buf = &pd->pre[0].buf[vp10_raster_block_offset(BLOCK_8X8, i,
3016 pd->pre[0].stride)];
3017 if (has_second_ref(mbmi))
3018 pd->pre[1].buf = &pd->pre[1].buf[vp10_raster_block_offset(BLOCK_8X8, i,
3019 pd->pre[1].stride)];
3020}
3021
3022static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
3023 struct buf_2d orig_pre[2]) {
3024 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
3025 x->plane[0].src = orig_src;
3026 x->e_mbd.plane[0].pre[0] = orig_pre[0];
3027 if (has_second_ref(mbmi))
3028 x->e_mbd.plane[0].pre[1] = orig_pre[1];
3029}
3030
Jingning Han3ee6db62015-08-05 19:00:31 -07003031// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
3032// TODO(aconverse): Find out if this is still productive then clean up or remove
3033static int check_best_zero_mv(
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003034 const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
Jingning Han3ee6db62015-08-05 19:00:31 -07003035 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
3036 const MV_REFERENCE_FRAME ref_frames[2]) {
3037 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
3038 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
3039 (ref_frames[1] == NONE ||
3040 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
3041 int rfc = mode_context[ref_frames[0]];
3042 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
3043 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
3044 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
3045
3046 if (this_mode == NEARMV) {
3047 if (c1 > c3) return 0;
3048 } else if (this_mode == NEARESTMV) {
3049 if (c2 > c3) return 0;
3050 } else {
3051 assert(this_mode == ZEROMV);
3052 if (ref_frames[1] == NONE) {
3053 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
3054 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
3055 return 0;
3056 } else {
3057 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
3058 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
3059 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
3060 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
3061 return 0;
3062 }
3063 }
3064 }
3065 return 1;
3066}
3067
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003068static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003069 BLOCK_SIZE bsize,
3070 int_mv *frame_mv,
3071 int mi_row, int mi_col,
3072 int_mv single_newmv[MAX_REF_FRAMES],
3073 int *rate_mv) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003074 const VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003075 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
3076 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
3077 MACROBLOCKD *xd = &x->e_mbd;
3078 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3079 const int refs[2] = {mbmi->ref_frame[0],
3080 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
3081 int_mv ref_mv[2];
3082 int ite, ref;
3083 const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
3084 struct scale_factors sf;
3085
3086 // Do joint motion search in compound mode to get more accurate mv.
3087 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
3088 int last_besterr[2] = {INT_MAX, INT_MAX};
3089 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
3090 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
3091 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
3092 };
3093
3094 // Prediction buffer from second frame.
3095#if CONFIG_VP9_HIGHBITDEPTH
3096 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
3097 uint8_t *second_pred;
3098#else
3099 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
3100#endif // CONFIG_VP9_HIGHBITDEPTH
3101
3102 for (ref = 0; ref < 2; ++ref) {
3103 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
3104
3105 if (scaled_ref_frame[ref]) {
3106 int i;
3107 // Swap out the reference frame for a version that's been scaled to
3108 // match the resolution of the current frame, allowing the existing
3109 // motion search code to be used without additional modifications.
3110 for (i = 0; i < MAX_MB_PLANE; i++)
3111 backup_yv12[ref][i] = xd->plane[i].pre[ref];
3112 vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
3113 NULL);
3114 }
3115
3116 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
3117 }
3118
3119 // Since we have scaled the reference frames to match the size of the current
3120 // frame we must use a unit scaling factor during mode selection.
3121#if CONFIG_VP9_HIGHBITDEPTH
3122 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
Debargha Mukherjee85514c42015-10-30 09:19:36 -07003123 cm->width, cm->height,
3124 cm->use_highbitdepth);
Jingning Han3ee6db62015-08-05 19:00:31 -07003125#else
3126 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
Debargha Mukherjee85514c42015-10-30 09:19:36 -07003127 cm->width, cm->height);
Jingning Han3ee6db62015-08-05 19:00:31 -07003128#endif // CONFIG_VP9_HIGHBITDEPTH
3129
3130 // Allow joint search multiple times iteratively for each reference frame
3131 // and break out of the search loop if it couldn't find a better mv.
3132 for (ite = 0; ite < 4; ite++) {
3133 struct buf_2d ref_yv12[2];
3134 int bestsme = INT_MAX;
3135 int sadpb = x->sadperbit16;
3136 MV tmp_mv;
3137 int search_range = 3;
3138
3139 int tmp_col_min = x->mv_col_min;
3140 int tmp_col_max = x->mv_col_max;
3141 int tmp_row_min = x->mv_row_min;
3142 int tmp_row_max = x->mv_row_max;
3143 int id = ite % 2; // Even iterations search in the first reference frame,
3144 // odd iterations search in the second. The predictor
3145 // found for the 'other' reference frame is factored in.
3146
3147 // Initialized here because of compiler problem in Visual Studio.
3148 ref_yv12[0] = xd->plane[0].pre[0];
3149 ref_yv12[1] = xd->plane[0].pre[1];
3150
3151 // Get the prediction block from the 'other' reference frame.
3152#if CONFIG_VP9_HIGHBITDEPTH
3153 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3154 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
3155 vp10_highbd_build_inter_predictor(ref_yv12[!id].buf,
3156 ref_yv12[!id].stride,
3157 second_pred, pw,
3158 &frame_mv[refs[!id]].as_mv,
3159 &sf, pw, ph, 0,
3160 kernel, MV_PRECISION_Q3,
3161 mi_col * MI_SIZE, mi_row * MI_SIZE,
3162 xd->bd);
3163 } else {
3164 second_pred = (uint8_t *)second_pred_alloc_16;
3165 vp10_build_inter_predictor(ref_yv12[!id].buf,
3166 ref_yv12[!id].stride,
3167 second_pred, pw,
3168 &frame_mv[refs[!id]].as_mv,
3169 &sf, pw, ph, 0,
3170 kernel, MV_PRECISION_Q3,
3171 mi_col * MI_SIZE, mi_row * MI_SIZE);
3172 }
3173#else
3174 vp10_build_inter_predictor(ref_yv12[!id].buf,
3175 ref_yv12[!id].stride,
3176 second_pred, pw,
3177 &frame_mv[refs[!id]].as_mv,
3178 &sf, pw, ph, 0,
3179 kernel, MV_PRECISION_Q3,
3180 mi_col * MI_SIZE, mi_row * MI_SIZE);
3181#endif // CONFIG_VP9_HIGHBITDEPTH
3182
3183 // Do compound motion search on the current reference frame.
3184 if (id)
3185 xd->plane[0].pre[0] = ref_yv12[id];
3186 vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
3187
3188 // Use the mv result from the single mode as mv predictor.
3189 tmp_mv = frame_mv[refs[id]].as_mv;
3190
3191 tmp_mv.col >>= 3;
3192 tmp_mv.row >>= 3;
3193
3194 // Small-range full-pixel motion search.
3195 bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
3196 search_range,
3197 &cpi->fn_ptr[bsize],
3198 &ref_mv[id].as_mv, second_pred);
3199 if (bestsme < INT_MAX)
3200 bestsme = vp10_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
3201 second_pred, &cpi->fn_ptr[bsize], 1);
3202
3203 x->mv_col_min = tmp_col_min;
3204 x->mv_col_max = tmp_col_max;
3205 x->mv_row_min = tmp_row_min;
3206 x->mv_row_max = tmp_row_max;
3207
3208 if (bestsme < INT_MAX) {
3209 int dis; /* TODO: use dis in distortion calculation later. */
3210 unsigned int sse;
3211 bestsme = cpi->find_fractional_mv_step(
3212 x, &tmp_mv,
3213 &ref_mv[id].as_mv,
3214 cpi->common.allow_high_precision_mv,
3215 x->errorperbit,
3216 &cpi->fn_ptr[bsize],
3217 0, cpi->sf.mv.subpel_iters_per_step,
3218 NULL,
3219 x->nmvjointcost, x->mvcost,
3220 &dis, &sse, second_pred,
3221 pw, ph);
3222 }
3223
3224 // Restore the pointer to the first (possibly scaled) prediction buffer.
3225 if (id)
3226 xd->plane[0].pre[0] = ref_yv12[0];
3227
3228 if (bestsme < last_besterr[id]) {
3229 frame_mv[refs[id]].as_mv = tmp_mv;
3230 last_besterr[id] = bestsme;
3231 } else {
3232 break;
3233 }
3234 }
3235
3236 *rate_mv = 0;
3237
3238 for (ref = 0; ref < 2; ++ref) {
3239 if (scaled_ref_frame[ref]) {
3240 // Restore the prediction frame pointers to their unscaled versions.
3241 int i;
3242 for (i = 0; i < MAX_MB_PLANE; i++)
3243 xd->plane[i].pre[ref] = backup_yv12[ref][i];
3244 }
3245
3246 *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
3247 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
3248 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3249 }
3250}
3251
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003252static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003253 int_mv *best_ref_mv,
3254 int_mv *second_best_ref_mv,
3255 int64_t best_rd, int *returntotrate,
3256 int *returnyrate,
3257 int64_t *returndistortion,
3258 int *skippable, int64_t *psse,
3259 int mvthresh,
3260 int_mv seg_mvs[4][MAX_REF_FRAMES],
3261 BEST_SEG_INFO *bsi_buf, int filter_idx,
3262 int mi_row, int mi_col) {
3263 int i;
3264 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
3265 MACROBLOCKD *xd = &x->e_mbd;
3266 MODE_INFO *mi = xd->mi[0];
3267 MB_MODE_INFO *mbmi = &mi->mbmi;
3268 int mode_idx;
3269 int k, br = 0, idx, idy;
3270 int64_t bd = 0, block_sse = 0;
3271 PREDICTION_MODE this_mode;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003272 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003273 struct macroblock_plane *const p = &x->plane[0];
3274 struct macroblockd_plane *const pd = &xd->plane[0];
3275 const int label_count = 4;
3276 int64_t this_segment_rd = 0;
3277 int label_mv_thresh;
3278 int segmentyrate = 0;
3279 const BLOCK_SIZE bsize = mbmi->sb_type;
3280 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
3281 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
3282 ENTROPY_CONTEXT t_above[2], t_left[2];
3283 int subpelmv = 1, have_ref = 0;
3284 const int has_second_rf = has_second_ref(mbmi);
3285 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
3286 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3287
3288 vp10_zero(*bsi);
3289
3290 bsi->segment_rd = best_rd;
3291 bsi->ref_mv[0] = best_ref_mv;
3292 bsi->ref_mv[1] = second_best_ref_mv;
3293 bsi->mvp.as_int = best_ref_mv->as_int;
3294 bsi->mvthresh = mvthresh;
3295
3296 for (i = 0; i < 4; i++)
3297 bsi->modes[i] = ZEROMV;
3298
3299 memcpy(t_above, pd->above_context, sizeof(t_above));
3300 memcpy(t_left, pd->left_context, sizeof(t_left));
3301
3302 // 64 makes this threshold really big effectively
3303 // making it so that we very rarely check mvs on
3304 // segments. setting this to 1 would make mv thresh
3305 // roughly equal to what it is for macroblocks
3306 label_mv_thresh = 1 * bsi->mvthresh / label_count;
3307
3308 // Segmentation method overheads
3309 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
3310 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
3311 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
3312 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
3313 int_mv mode_mv[MB_MODE_COUNT][2];
3314 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3315 PREDICTION_MODE mode_selected = ZEROMV;
3316 int64_t best_rd = INT64_MAX;
3317 const int i = idy * 2 + idx;
3318 int ref;
3319
3320 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3321 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
3322 frame_mv[ZEROMV][frame].as_int = 0;
3323 vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
3324 &frame_mv[NEARESTMV][frame],
3325 &frame_mv[NEARMV][frame],
3326 mbmi_ext->mode_context);
3327 }
3328
3329 // search for the best motion vector on this segment
3330 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
3331 const struct buf_2d orig_src = x->plane[0].src;
3332 struct buf_2d orig_pre[2];
3333
3334 mode_idx = INTER_OFFSET(this_mode);
3335 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
3336 if (!(inter_mode_mask & (1 << this_mode)))
3337 continue;
3338
3339 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
3340 this_mode, mbmi->ref_frame))
3341 continue;
3342
3343 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
3344 memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
3345 sizeof(bsi->rdstat[i][mode_idx].ta));
3346 memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
3347 sizeof(bsi->rdstat[i][mode_idx].tl));
3348
3349 // motion search for newmv (single predictor case only)
3350 if (!has_second_rf && this_mode == NEWMV &&
3351 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
3352 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
3353 int step_param = 0;
3354 int thissme, bestsme = INT_MAX;
3355 int sadpb = x->sadperbit4;
3356 MV mvp_full;
3357 int max_mv;
3358 int cost_list[5];
3359
3360 /* Is the best so far sufficiently good that we cant justify doing
3361 * and new motion search. */
3362 if (best_rd < label_mv_thresh)
3363 break;
3364
3365 if (cpi->oxcf.mode != BEST) {
3366 // use previous block's result as next block's MV predictor.
3367 if (i > 0) {
3368 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
3369 if (i == 2)
3370 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
3371 }
3372 }
3373 if (i == 0)
3374 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
3375 else
James Zern5e16d392015-08-17 18:19:22 -07003376 max_mv =
3377 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
Jingning Han3ee6db62015-08-05 19:00:31 -07003378
3379 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3380 // Take wtd average of the step_params based on the last frame's
3381 // max mv magnitude and the best ref mvs of the current block for
3382 // the given reference.
3383 step_param = (vp10_init_search_range(max_mv) +
3384 cpi->mv_step_param) / 2;
3385 } else {
3386 step_param = cpi->mv_step_param;
3387 }
3388
3389 mvp_full.row = bsi->mvp.as_mv.row >> 3;
3390 mvp_full.col = bsi->mvp.as_mv.col >> 3;
3391
3392 if (cpi->sf.adaptive_motion_search) {
3393 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
3394 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
James Zern5e16d392015-08-17 18:19:22 -07003395 step_param = VPXMAX(step_param, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07003396 }
3397
3398 // adjust src pointer for this block
3399 mi_buf_shift(x, i);
3400
3401 vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
3402
3403 bestsme = vp10_full_pixel_search(
3404 cpi, x, bsize, &mvp_full, step_param, sadpb,
3405 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
3406 &bsi->ref_mv[0]->as_mv, new_mv,
3407 INT_MAX, 1);
3408
3409 // Should we do a full search (best quality only)
3410 if (cpi->oxcf.mode == BEST) {
3411 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
3412 /* Check if mvp_full is within the range. */
3413 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
3414 x->mv_row_min, x->mv_row_max);
3415 thissme = cpi->full_search_sad(x, &mvp_full,
3416 sadpb, 16, &cpi->fn_ptr[bsize],
3417 &bsi->ref_mv[0]->as_mv,
3418 &best_mv->as_mv);
3419 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
3420 if (thissme < bestsme) {
3421 bestsme = thissme;
3422 *new_mv = best_mv->as_mv;
3423 } else {
3424 // The full search result is actually worse so re-instate the
3425 // previous best vector
3426 best_mv->as_mv = *new_mv;
3427 }
3428 }
3429
3430 if (bestsme < INT_MAX) {
3431 int distortion;
3432 cpi->find_fractional_mv_step(
3433 x,
3434 new_mv,
3435 &bsi->ref_mv[0]->as_mv,
3436 cm->allow_high_precision_mv,
3437 x->errorperbit, &cpi->fn_ptr[bsize],
3438 cpi->sf.mv.subpel_force_stop,
3439 cpi->sf.mv.subpel_iters_per_step,
3440 cond_cost_list(cpi, cost_list),
3441 x->nmvjointcost, x->mvcost,
3442 &distortion,
3443 &x->pred_sse[mbmi->ref_frame[0]],
3444 NULL, 0, 0);
3445
3446 // save motion search result for use in compound prediction
3447 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
3448 }
3449
3450 if (cpi->sf.adaptive_motion_search)
3451 x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
3452
3453 // restore src pointers
3454 mi_buf_restore(x, orig_src, orig_pre);
3455 }
3456
3457 if (has_second_rf) {
3458 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
3459 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
3460 continue;
3461 }
3462
3463 if (has_second_rf && this_mode == NEWMV &&
3464 mbmi->interp_filter == EIGHTTAP) {
3465 // adjust src pointers
3466 mi_buf_shift(x, i);
3467 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
3468 int rate_mv;
3469 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
3470 mi_row, mi_col, seg_mvs[i],
3471 &rate_mv);
3472 seg_mvs[i][mbmi->ref_frame[0]].as_int =
3473 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
3474 seg_mvs[i][mbmi->ref_frame[1]].as_int =
3475 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
3476 }
3477 // restore src pointers
3478 mi_buf_restore(x, orig_src, orig_pre);
3479 }
3480
3481 bsi->rdstat[i][mode_idx].brate =
3482 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
3483 frame_mv, seg_mvs[i], bsi->ref_mv,
3484 x->nmvjointcost, x->mvcost);
3485
3486 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3487 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
3488 mode_mv[this_mode][ref].as_int;
3489 if (num_4x4_blocks_wide > 1)
3490 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
3491 mode_mv[this_mode][ref].as_int;
3492 if (num_4x4_blocks_high > 1)
3493 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
3494 mode_mv[this_mode][ref].as_int;
3495 }
3496
3497 // Trap vectors that reach beyond the UMV borders
3498 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
3499 (has_second_rf &&
3500 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
3501 continue;
3502
3503 if (filter_idx > 0) {
3504 BEST_SEG_INFO *ref_bsi = bsi_buf;
3505 subpelmv = 0;
3506 have_ref = 1;
3507
3508 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3509 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
3510 have_ref &= mode_mv[this_mode][ref].as_int ==
3511 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
3512 }
3513
3514 if (filter_idx > 1 && !subpelmv && !have_ref) {
3515 ref_bsi = bsi_buf + 1;
3516 have_ref = 1;
3517 for (ref = 0; ref < 1 + has_second_rf; ++ref)
3518 have_ref &= mode_mv[this_mode][ref].as_int ==
3519 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
3520 }
3521
3522 if (!subpelmv && have_ref &&
3523 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
3524 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
3525 sizeof(SEG_RDSTAT));
3526 if (num_4x4_blocks_wide > 1)
3527 bsi->rdstat[i + 1][mode_idx].eobs =
3528 ref_bsi->rdstat[i + 1][mode_idx].eobs;
3529 if (num_4x4_blocks_high > 1)
3530 bsi->rdstat[i + 2][mode_idx].eobs =
3531 ref_bsi->rdstat[i + 2][mode_idx].eobs;
3532
3533 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
3534 mode_selected = this_mode;
3535 best_rd = bsi->rdstat[i][mode_idx].brdcost;
3536 }
3537 continue;
3538 }
3539 }
3540
3541 bsi->rdstat[i][mode_idx].brdcost =
3542 encode_inter_mb_segment(cpi, x,
3543 bsi->segment_rd - this_segment_rd, i,
3544 &bsi->rdstat[i][mode_idx].byrate,
3545 &bsi->rdstat[i][mode_idx].bdist,
3546 &bsi->rdstat[i][mode_idx].bsse,
3547 bsi->rdstat[i][mode_idx].ta,
3548 bsi->rdstat[i][mode_idx].tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07003549 idy, idx,
Jingning Han3ee6db62015-08-05 19:00:31 -07003550 mi_row, mi_col);
3551 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
3552 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
3553 bsi->rdstat[i][mode_idx].brate, 0);
3554 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
3555 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
3556 if (num_4x4_blocks_wide > 1)
3557 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
3558 if (num_4x4_blocks_high > 1)
3559 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
3560 }
3561
3562 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
3563 mode_selected = this_mode;
3564 best_rd = bsi->rdstat[i][mode_idx].brdcost;
3565 }
3566 } /*for each 4x4 mode*/
3567
3568 if (best_rd == INT64_MAX) {
3569 int iy, midx;
3570 for (iy = i + 1; iy < 4; ++iy)
3571 for (midx = 0; midx < INTER_MODES; ++midx)
3572 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3573 bsi->segment_rd = INT64_MAX;
3574 return INT64_MAX;
3575 }
3576
3577 mode_idx = INTER_OFFSET(mode_selected);
3578 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
3579 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
3580
3581 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
3582 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
3583 x->mvcost);
3584
3585 br += bsi->rdstat[i][mode_idx].brate;
3586 bd += bsi->rdstat[i][mode_idx].bdist;
3587 block_sse += bsi->rdstat[i][mode_idx].bsse;
3588 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
3589 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
3590
3591 if (this_segment_rd > bsi->segment_rd) {
3592 int iy, midx;
3593 for (iy = i + 1; iy < 4; ++iy)
3594 for (midx = 0; midx < INTER_MODES; ++midx)
3595 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3596 bsi->segment_rd = INT64_MAX;
3597 return INT64_MAX;
3598 }
3599 }
3600 } /* for each label */
3601
3602 bsi->r = br;
3603 bsi->d = bd;
3604 bsi->segment_yrate = segmentyrate;
3605 bsi->segment_rd = this_segment_rd;
3606 bsi->sse = block_sse;
3607
3608 // update the coding decisions
3609 for (k = 0; k < 4; ++k)
3610 bsi->modes[k] = mi->bmi[k].as_mode;
3611
3612 if (bsi->segment_rd > best_rd)
3613 return INT64_MAX;
3614 /* set it to the best */
3615 for (i = 0; i < 4; i++) {
3616 mode_idx = INTER_OFFSET(bsi->modes[i]);
3617 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
3618 if (has_second_ref(mbmi))
3619 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
3620 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
3621 mi->bmi[i].as_mode = bsi->modes[i];
3622 }
3623
3624 /*
3625 * used to set mbmi->mv.as_int
3626 */
3627 *returntotrate = bsi->r;
3628 *returndistortion = bsi->d;
3629 *returnyrate = bsi->segment_yrate;
3630 *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
3631 *psse = bsi->sse;
3632 mbmi->mode = bsi->modes[3];
3633
3634 return bsi->segment_rd;
3635}
3636
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003637static void estimate_ref_frame_costs(const VP10_COMMON *cm,
Jingning Han3ee6db62015-08-05 19:00:31 -07003638 const MACROBLOCKD *xd,
3639 int segment_id,
3640 unsigned int *ref_costs_single,
3641 unsigned int *ref_costs_comp,
3642 vpx_prob *comp_mode_p) {
3643 int seg_ref_active = segfeature_active(&cm->seg, segment_id,
3644 SEG_LVL_REF_FRAME);
3645 if (seg_ref_active) {
3646 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
3647 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
3648 *comp_mode_p = 128;
3649 } else {
3650 vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
3651 vpx_prob comp_inter_p = 128;
3652
3653 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3654 comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
3655 *comp_mode_p = comp_inter_p;
3656 } else {
3657 *comp_mode_p = 128;
3658 }
3659
3660 ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
3661
3662 if (cm->reference_mode != COMPOUND_REFERENCE) {
3663 vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
3664 vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
3665 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3666
3667 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3668 base_cost += vp10_cost_bit(comp_inter_p, 0);
3669
3670 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
3671 ref_costs_single[ALTREF_FRAME] = base_cost;
3672 ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
3673 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3674 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3675 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
3676 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
3677 } else {
3678 ref_costs_single[LAST_FRAME] = 512;
3679 ref_costs_single[GOLDEN_FRAME] = 512;
3680 ref_costs_single[ALTREF_FRAME] = 512;
3681 }
3682 if (cm->reference_mode != SINGLE_REFERENCE) {
3683 vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
3684 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3685
3686 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3687 base_cost += vp10_cost_bit(comp_inter_p, 1);
3688
3689 ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
3690 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
3691 } else {
3692 ref_costs_comp[LAST_FRAME] = 512;
3693 ref_costs_comp[GOLDEN_FRAME] = 512;
3694 }
3695 }
3696}
3697
3698static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3699 int mode_index,
3700 int64_t comp_pred_diff[REFERENCE_MODES],
3701 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
3702 int skippable) {
3703 MACROBLOCKD *const xd = &x->e_mbd;
3704
3705 // Take a snapshot of the coding context so it can be
3706 // restored if we decide to encode this way
3707 ctx->skip = x->skip;
3708 ctx->skippable = skippable;
3709 ctx->best_mode_index = mode_index;
3710 ctx->mic = *xd->mi[0];
3711 ctx->mbmi_ext = *x->mbmi_ext;
3712 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
3713 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
3714 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
3715
3716 memcpy(ctx->best_filter_diff, best_filter_diff,
3717 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
3718}
3719
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003720static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003721 MV_REFERENCE_FRAME ref_frame,
3722 BLOCK_SIZE block_size,
3723 int mi_row, int mi_col,
3724 int_mv frame_nearest_mv[MAX_REF_FRAMES],
3725 int_mv frame_near_mv[MAX_REF_FRAMES],
3726 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003727 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003728 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
3729 MACROBLOCKD *const xd = &x->e_mbd;
3730 MODE_INFO *const mi = xd->mi[0];
3731 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
3732 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
3733 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3734
3735 assert(yv12 != NULL);
3736
3737 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
3738 // use the UV scaling factors.
3739 vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
3740
3741 // Gets an initial list of candidate vectors from neighbours and orders them
3742 vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
3743 NULL, NULL, mbmi_ext->mode_context);
3744
3745 // Candidate refinement carried out at encoder and decoder
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003746 vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
3747 &frame_nearest_mv[ref_frame],
3748 &frame_near_mv[ref_frame]);
Jingning Han3ee6db62015-08-05 19:00:31 -07003749
3750 // Further refinement that is encode side only to test the top few candidates
3751 // in full and choose the best as the centre point for subsequent searches.
3752 // The current implementation doesn't support scaling.
3753 if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
3754 vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
3755 ref_frame, block_size);
3756}
3757
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003758static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003759 BLOCK_SIZE bsize,
3760 int mi_row, int mi_col,
3761 int_mv *tmp_mv, int *rate_mv) {
3762 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003763 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003764 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3765 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
3766 int bestsme = INT_MAX;
3767 int step_param;
3768 int sadpb = x->sadperbit16;
3769 MV mvp_full;
3770 int ref = mbmi->ref_frame[0];
3771 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3772
3773 int tmp_col_min = x->mv_col_min;
3774 int tmp_col_max = x->mv_col_max;
3775 int tmp_row_min = x->mv_row_min;
3776 int tmp_row_max = x->mv_row_max;
3777 int cost_list[5];
3778
3779 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
3780 ref);
3781
3782 MV pred_mv[3];
3783 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3784 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
3785 pred_mv[2] = x->pred_mv[ref];
3786
3787 if (scaled_ref_frame) {
3788 int i;
3789 // Swap out the reference frame for a version that's been scaled to
3790 // match the resolution of the current frame, allowing the existing
3791 // motion search code to be used without additional modifications.
3792 for (i = 0; i < MAX_MB_PLANE; i++)
3793 backup_yv12[i] = xd->plane[i].pre[0];
3794
3795 vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
3796 }
3797
3798 vp10_set_mv_search_range(x, &ref_mv);
3799
3800 // Work out the size of the first step in the mv step search.
James Zern5e16d392015-08-17 18:19:22 -07003801 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
Jingning Han3ee6db62015-08-05 19:00:31 -07003802 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3803 // Take wtd average of the step_params based on the last frame's
3804 // max mv magnitude and that based on the best ref mvs of the current
3805 // block for the given reference.
3806 step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
3807 cpi->mv_step_param) / 2;
3808 } else {
3809 step_param = cpi->mv_step_param;
3810 }
3811
3812 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
James Zern5e16d392015-08-17 18:19:22 -07003813 int boffset =
3814 2 * (b_width_log2_lookup[BLOCK_64X64] -
3815 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
3816 step_param = VPXMAX(step_param, boffset);
Jingning Han3ee6db62015-08-05 19:00:31 -07003817 }
3818
3819 if (cpi->sf.adaptive_motion_search) {
3820 int bwl = b_width_log2_lookup[bsize];
3821 int bhl = b_height_log2_lookup[bsize];
3822 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
3823
3824 if (tlevel < 5)
3825 step_param += 2;
3826
3827 // prev_mv_sad is not setup for dynamically scaled frames.
3828 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
3829 int i;
3830 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
3831 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
3832 x->pred_mv[ref].row = 0;
3833 x->pred_mv[ref].col = 0;
3834 tmp_mv->as_int = INVALID_MV;
3835
3836 if (scaled_ref_frame) {
3837 int i;
3838 for (i = 0; i < MAX_MB_PLANE; ++i)
3839 xd->plane[i].pre[0] = backup_yv12[i];
3840 }
3841 return;
3842 }
3843 }
3844 }
3845 }
3846
3847 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
3848
3849 mvp_full.col >>= 3;
3850 mvp_full.row >>= 3;
3851
3852 bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
3853 cond_cost_list(cpi, cost_list),
3854 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
3855
3856 x->mv_col_min = tmp_col_min;
3857 x->mv_col_max = tmp_col_max;
3858 x->mv_row_min = tmp_row_min;
3859 x->mv_row_max = tmp_row_max;
3860
3861 if (bestsme < INT_MAX) {
3862 int dis; /* TODO: use dis in distortion calculation later. */
3863 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
3864 cm->allow_high_precision_mv,
3865 x->errorperbit,
3866 &cpi->fn_ptr[bsize],
3867 cpi->sf.mv.subpel_force_stop,
3868 cpi->sf.mv.subpel_iters_per_step,
3869 cond_cost_list(cpi, cost_list),
3870 x->nmvjointcost, x->mvcost,
3871 &dis, &x->pred_sse[ref], NULL, 0, 0);
3872 }
3873 *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
3874 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3875
3876 if (cpi->sf.adaptive_motion_search)
3877 x->pred_mv[ref] = tmp_mv->as_mv;
3878
3879 if (scaled_ref_frame) {
3880 int i;
3881 for (i = 0; i < MAX_MB_PLANE; i++)
3882 xd->plane[i].pre[0] = backup_yv12[i];
3883 }
3884}
3885
3886
3887
3888static INLINE void restore_dst_buf(MACROBLOCKD *xd,
3889 uint8_t *orig_dst[MAX_MB_PLANE],
3890 int orig_dst_stride[MAX_MB_PLANE]) {
3891 int i;
3892 for (i = 0; i < MAX_MB_PLANE; i++) {
3893 xd->plane[i].dst.buf = orig_dst[i];
3894 xd->plane[i].dst.stride = orig_dst_stride[i];
3895 }
3896}
3897
3898// In some situations we want to discount tha pparent cost of a new motion
3899// vector. Where there is a subtle motion field and especially where there is
3900// low spatial complexity then it can be hard to cover the cost of a new motion
3901// vector in a single block, even if that motion vector reduces distortion.
3902// However, once established that vector may be usable through the nearest and
3903// near mv modes to reduce distortion in subsequent blocks and also improve
3904// visual quality.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003905static int discount_newmv_test(const VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003906 int this_mode,
3907 int_mv this_mv,
3908 int_mv (*mode_mv)[MAX_REF_FRAMES],
3909 int ref_frame) {
3910 return (!cpi->rc.is_src_frame_alt_ref &&
3911 (this_mode == NEWMV) &&
3912 (this_mv.as_int != 0) &&
3913 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
3914 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
3915 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
3916 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
3917}
3918
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003919#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
3920#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
3921 VP9_INTERP_EXTEND) << 3)
3922
3923// TODO(jingning): this mv clamping function should be block size dependent.
3924static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
3925 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
3926 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
3927 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
3928 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
3929}
3930
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003931static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003932 BLOCK_SIZE bsize,
3933 int *rate2, int64_t *distortion,
3934 int *skippable,
3935 int *rate_y, int *rate_uv,
3936 int *disable_skip,
3937 int_mv (*mode_mv)[MAX_REF_FRAMES],
3938 int mi_row, int mi_col,
3939 int_mv single_newmv[MAX_REF_FRAMES],
3940 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
3941 int (*single_skippable)[MAX_REF_FRAMES],
3942 int64_t *psse,
3943 const int64_t ref_best_rd,
3944 int64_t *mask_filter,
3945 int64_t filter_cache[]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003946 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003947 MACROBLOCKD *xd = &x->e_mbd;
3948 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3949 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3950 const int is_comp_pred = has_second_ref(mbmi);
3951 const int this_mode = mbmi->mode;
3952 int_mv *frame_mv = mode_mv[this_mode];
3953 int i;
3954 int refs[2] = { mbmi->ref_frame[0],
3955 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3956 int_mv cur_mv[2];
3957#if CONFIG_VP9_HIGHBITDEPTH
3958 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
3959 uint8_t *tmp_buf;
3960#else
3961 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
3962#endif // CONFIG_VP9_HIGHBITDEPTH
3963 int pred_exists = 0;
3964 int intpel_mv;
3965 int64_t rd, tmp_rd, best_rd = INT64_MAX;
3966 int best_needs_copy = 0;
3967 uint8_t *orig_dst[MAX_MB_PLANE];
3968 int orig_dst_stride[MAX_MB_PLANE];
3969 int rs = 0;
3970 INTERP_FILTER best_filter = SWITCHABLE;
3971 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
3972 int64_t bsse[MAX_MB_PLANE << 2] = {0};
3973
3974 int bsl = mi_width_log2_lookup[bsize];
3975 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
3976 (((mi_row + mi_col) >> bsl) +
3977 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
3978
3979 int skip_txfm_sb = 0;
3980 int64_t skip_sse_sb = INT64_MAX;
3981 int64_t distortion_y = 0, distortion_uv = 0;
3982
3983#if CONFIG_VP9_HIGHBITDEPTH
3984 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3985 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
3986 } else {
3987 tmp_buf = (uint8_t *)tmp_buf16;
3988 }
3989#endif // CONFIG_VP9_HIGHBITDEPTH
3990
3991 if (pred_filter_search) {
3992 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
3993 if (xd->up_available)
3994 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
3995 if (xd->left_available)
3996 lf = xd->mi[-1]->mbmi.interp_filter;
3997
3998 if ((this_mode != NEWMV) || (af == lf))
3999 best_filter = af;
4000 }
4001
4002 if (is_comp_pred) {
4003 if (frame_mv[refs[0]].as_int == INVALID_MV ||
4004 frame_mv[refs[1]].as_int == INVALID_MV)
4005 return INT64_MAX;
4006
4007 if (cpi->sf.adaptive_mode_search) {
4008 if (single_filter[this_mode][refs[0]] ==
4009 single_filter[this_mode][refs[1]])
4010 best_filter = single_filter[this_mode][refs[0]];
4011 }
4012 }
4013
4014 if (this_mode == NEWMV) {
4015 int rate_mv;
4016 if (is_comp_pred) {
4017 // Initialize mv using single prediction mode result.
4018 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
4019 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
4020
4021 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
4022 joint_motion_search(cpi, x, bsize, frame_mv,
4023 mi_row, mi_col, single_newmv, &rate_mv);
4024 } else {
4025 rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
4026 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
4027 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
4028 rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
4029 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
4030 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
4031 }
4032 *rate2 += rate_mv;
4033 } else {
4034 int_mv tmp_mv;
4035 single_motion_search(cpi, x, bsize, mi_row, mi_col,
4036 &tmp_mv, &rate_mv);
4037 if (tmp_mv.as_int == INVALID_MV)
4038 return INT64_MAX;
4039
4040 frame_mv[refs[0]].as_int =
4041 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
4042 single_newmv[refs[0]].as_int = tmp_mv.as_int;
4043
4044 // Estimate the rate implications of a new mv but discount this
4045 // under certain circumstances where we want to help initiate a weak
4046 // motion field, where the distortion gain for a single block may not
4047 // be enough to overcome the cost of a new mv.
4048 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07004049 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07004050 } else {
4051 *rate2 += rate_mv;
4052 }
4053 }
4054 }
4055
4056 for (i = 0; i < is_comp_pred + 1; ++i) {
4057 cur_mv[i] = frame_mv[refs[i]];
4058 // Clip "next_nearest" so that it does not extend to far out of image
4059 if (this_mode != NEWMV)
4060 clamp_mv2(&cur_mv[i].as_mv, xd);
4061
4062 if (mv_check_bounds(x, &cur_mv[i].as_mv))
4063 return INT64_MAX;
4064 mbmi->mv[i].as_int = cur_mv[i].as_int;
4065 }
4066
4067 // do first prediction into the destination buffer. Do the next
4068 // prediction into a temporary buffer. Then keep track of which one
4069 // of these currently holds the best predictor, and use the other
4070 // one for future predictions. In the end, copy from tmp_buf to
4071 // dst if necessary.
4072 for (i = 0; i < MAX_MB_PLANE; i++) {
4073 orig_dst[i] = xd->plane[i].dst.buf;
4074 orig_dst_stride[i] = xd->plane[i].dst.stride;
4075 }
4076
4077 // We don't include the cost of the second reference here, because there
4078 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
4079 // words if you present them in that order, the second one is always known
4080 // if the first is known.
4081 //
4082 // Under some circumstances we discount the cost of new mv mode to encourage
4083 // initiation of a motion field.
4084 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
4085 mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07004086 *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
4087 mbmi_ext->mode_context[refs[0]]),
4088 cost_mv_ref(cpi, NEARESTMV,
4089 mbmi_ext->mode_context[refs[0]]));
Jingning Han3ee6db62015-08-05 19:00:31 -07004090 } else {
4091 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
4092 }
4093
4094 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
4095 mbmi->mode != NEARESTMV)
4096 return INT64_MAX;
4097
4098 pred_exists = 0;
4099 // Are all MVs integer pel for Y and UV
4100 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
4101 if (is_comp_pred)
4102 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
4103
4104 // Search for best switchable filter by checking the variance of
4105 // pred error irrespective of whether the filter will be used
4106 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4107 filter_cache[i] = INT64_MAX;
4108
4109 if (cm->interp_filter != BILINEAR) {
4110 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
4111 best_filter = EIGHTTAP;
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004112#if CONFIG_EXT_INTERP
4113 } else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
4114 best_filter = EIGHTTAP;
4115#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004116 } else if (best_filter == SWITCHABLE) {
4117 int newbest;
4118 int tmp_rate_sum = 0;
4119 int64_t tmp_dist_sum = 0;
4120
4121 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
4122 int j;
4123 int64_t rs_rd;
4124 int tmp_skip_sb = 0;
4125 int64_t tmp_skip_sse = INT64_MAX;
4126
4127 mbmi->interp_filter = i;
4128 rs = vp10_get_switchable_rate(cpi, xd);
4129 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4130
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004131 if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004132 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
4133 filter_cache[i] = rd;
4134 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004135 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004136 if (cm->interp_filter == SWITCHABLE)
4137 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07004138 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004139 } else {
4140 int rate_sum = 0;
4141 int64_t dist_sum = 0;
4142 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
4143 (cpi->sf.interp_filter_search_mask & (1 << i))) {
4144 rate_sum = INT_MAX;
4145 dist_sum = INT64_MAX;
4146 continue;
4147 }
4148
4149 if ((cm->interp_filter == SWITCHABLE &&
4150 (!i || best_needs_copy)) ||
4151 (cm->interp_filter != SWITCHABLE &&
4152 (cm->interp_filter == mbmi->interp_filter ||
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004153 (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004154 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4155 } else {
4156 for (j = 0; j < MAX_MB_PLANE; j++) {
4157 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
4158 xd->plane[j].dst.stride = 64;
4159 }
4160 }
4161 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
4162 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
4163 &tmp_skip_sb, &tmp_skip_sse);
4164
4165 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
4166 filter_cache[i] = rd;
4167 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004168 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004169 if (cm->interp_filter == SWITCHABLE)
4170 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07004171 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004172
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004173 if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004174 tmp_rate_sum = rate_sum;
4175 tmp_dist_sum = dist_sum;
4176 }
4177 }
4178
4179 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
4180 if (rd / 2 > ref_best_rd) {
4181 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4182 return INT64_MAX;
4183 }
4184 }
4185 newbest = i == 0 || rd < best_rd;
4186
4187 if (newbest) {
4188 best_rd = rd;
4189 best_filter = mbmi->interp_filter;
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004190 if (cm->interp_filter == SWITCHABLE && i &&
4191 !(intpel_mv && IsInterpolatingFilter(i)))
Jingning Han3ee6db62015-08-05 19:00:31 -07004192 best_needs_copy = !best_needs_copy;
4193 }
4194
4195 if ((cm->interp_filter == SWITCHABLE && newbest) ||
4196 (cm->interp_filter != SWITCHABLE &&
4197 cm->interp_filter == mbmi->interp_filter)) {
4198 pred_exists = 1;
4199 tmp_rd = best_rd;
4200
4201 skip_txfm_sb = tmp_skip_sb;
4202 skip_sse_sb = tmp_skip_sse;
4203 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
4204 memcpy(bsse, x->bsse, sizeof(bsse));
4205 }
4206 }
4207 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4208 }
4209 }
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004210
Jingning Han3ee6db62015-08-05 19:00:31 -07004211 // Set the appropriate filter
4212 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
4213 cm->interp_filter : best_filter;
4214 rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
4215
4216 if (pred_exists) {
4217 if (best_needs_copy) {
4218 // again temporarily set the buffers to local memory to prevent a memcpy
4219 for (i = 0; i < MAX_MB_PLANE; i++) {
4220 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
4221 xd->plane[i].dst.stride = 64;
4222 }
4223 }
4224 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
4225 } else {
4226 int tmp_rate;
4227 int64_t tmp_dist;
4228 // Handles the special case when a filter that is not in the
4229 // switchable list (ex. bilinear) is indicated at the frame level, or
4230 // skip condition holds.
4231 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
4232 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
4233 &skip_txfm_sb, &skip_sse_sb);
4234 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
4235 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
4236 memcpy(bsse, x->bsse, sizeof(bsse));
4237 }
4238
4239 if (!is_comp_pred)
4240 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
4241
4242 if (cpi->sf.adaptive_mode_search)
4243 if (is_comp_pred)
4244 if (single_skippable[this_mode][refs[0]] &&
4245 single_skippable[this_mode][refs[1]])
4246 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
4247
4248 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
4249 // if current pred_error modeled rd is substantially more than the best
4250 // so far, do not bother doing full rd
4251 if (rd / 2 > ref_best_rd) {
4252 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4253 return INT64_MAX;
4254 }
4255 }
4256
4257 if (cm->interp_filter == SWITCHABLE)
4258 *rate2 += rs;
4259
4260 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
4261 memcpy(x->bsse, bsse, sizeof(bsse));
4262
4263 if (!skip_txfm_sb) {
4264 int skippable_y, skippable_uv;
4265 int64_t sseuv = INT64_MAX;
4266 int64_t rdcosty = INT64_MAX;
4267
4268 // Y cost and distortion
4269 vp10_subtract_plane(x, bsize, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07004270#if CONFIG_VAR_TX
Jingning Hanf0dee772015-10-26 12:32:30 -07004271 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
Jingning Han4b594d32015-11-02 12:05:47 -08004272#if CONFIG_EXT_TX
4273 select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4274 bsize, ref_best_rd);
4275#else
Jingning Han2cdc1272015-10-09 09:57:42 -07004276 inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4277 bsize, ref_best_rd);
Jingning Han4b594d32015-11-02 12:05:47 -08004278#endif
Jingning Han2cdc1272015-10-09 09:57:42 -07004279 } else {
Jingning Han0f34e352015-11-15 20:52:51 -08004280 int idx, idy;
Jingning Han2cdc1272015-10-09 09:57:42 -07004281 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4282 bsize, ref_best_rd);
Jingning Han0f34e352015-11-15 20:52:51 -08004283 for (idy = 0; idy < xd->n8_h; ++idy)
4284 for (idx = 0; idx < xd->n8_w; ++idx)
4285 mbmi->inter_tx_size[idy * 8 + idx] = mbmi->tx_size;
Jingning Han2cdc1272015-10-09 09:57:42 -07004286 }
4287#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004288 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4289 bsize, ref_best_rd);
Jingning Han704985e2015-10-08 12:05:03 -07004290#endif
4291
Jingning Han3ee6db62015-08-05 19:00:31 -07004292 if (*rate_y == INT_MAX) {
4293 *rate2 = INT_MAX;
4294 *distortion = INT64_MAX;
4295 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4296 return INT64_MAX;
4297 }
4298
4299 *rate2 += *rate_y;
4300 *distortion += distortion_y;
4301
4302 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
James Zern5e16d392015-08-17 18:19:22 -07004303 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
Jingning Han3ee6db62015-08-05 19:00:31 -07004304
Jingning Hana8dad552015-10-08 16:46:10 -07004305#if CONFIG_VAR_TX
4306 if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
4307 &sseuv, bsize, ref_best_rd - rdcosty)) {
4308#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004309 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
4310 &sseuv, bsize, ref_best_rd - rdcosty)) {
Jingning Hana8dad552015-10-08 16:46:10 -07004311#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004312 *rate2 = INT_MAX;
4313 *distortion = INT64_MAX;
4314 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4315 return INT64_MAX;
4316 }
4317
4318 *psse += sseuv;
4319 *rate2 += *rate_uv;
4320 *distortion += distortion_uv;
4321 *skippable = skippable_y && skippable_uv;
4322 } else {
4323 x->skip = 1;
4324 *disable_skip = 1;
4325
4326 // The cost of skip bit needs to be added.
4327 *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4328
4329 *distortion = skip_sse_sb;
4330 }
4331
4332 if (!is_comp_pred)
4333 single_skippable[this_mode][refs[0]] = *skippable;
4334
4335 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4336 return 0; // The rate-distortion cost will be re-calculated by caller.
4337}
4338
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004339void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07004340 RD_COST *rd_cost, BLOCK_SIZE bsize,
4341 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004342 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004343 MACROBLOCKD *const xd = &x->e_mbd;
4344 struct macroblockd_plane *const pd = xd->plane;
4345 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
4346 int y_skip = 0, uv_skip = 0;
4347 int64_t dist_y = 0, dist_uv = 0;
4348 TX_SIZE max_uv_tx_size;
Jingning Han3ee6db62015-08-05 19:00:31 -07004349 ctx->skip = 0;
4350 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
4351 xd->mi[0]->mbmi.ref_frame[1] = NONE;
4352
4353 if (bsize >= BLOCK_8X8) {
4354 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
4355 &dist_y, &y_skip, bsize,
4356 best_rd) >= best_rd) {
4357 rd_cost->rate = INT_MAX;
4358 return;
4359 }
4360 } else {
4361 y_skip = 0;
4362 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
4363 &dist_y, best_rd) >= best_rd) {
4364 rd_cost->rate = INT_MAX;
4365 return;
4366 }
4367 }
4368 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
4369 pd[1].subsampling_x,
4370 pd[1].subsampling_y);
4371 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
James Zern5e16d392015-08-17 18:19:22 -07004372 &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
Jingning Han3ee6db62015-08-05 19:00:31 -07004373 max_uv_tx_size);
4374
4375 if (y_skip && uv_skip) {
4376 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
4377 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4378 rd_cost->dist = dist_y + dist_uv;
4379 } else {
4380 rd_cost->rate = rate_y + rate_uv +
4381 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4382 rd_cost->dist = dist_y + dist_uv;
4383 }
4384
4385 ctx->mic = *xd->mi[0];
4386 ctx->mbmi_ext = *x->mbmi_ext;
4387 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
4388}
4389
4390// This function is designed to apply a bias or adjustment to an rd value based
4391// on the relative variance of the source and reconstruction.
4392#define LOW_VAR_THRESH 16
4393#define VLOW_ADJ_MAX 25
4394#define VHIGH_ADJ_MAX 8
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004395static void rd_variance_adjustment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004396 MACROBLOCK *x,
4397 BLOCK_SIZE bsize,
4398 int64_t *this_rd,
4399 MV_REFERENCE_FRAME ref_frame,
4400 unsigned int source_variance) {
4401 MACROBLOCKD *const xd = &x->e_mbd;
4402 unsigned int recon_variance;
4403 unsigned int absvar_diff = 0;
4404 int64_t var_error = 0;
4405 int64_t var_factor = 0;
4406
4407 if (*this_rd == INT64_MAX)
4408 return;
4409
4410#if CONFIG_VP9_HIGHBITDEPTH
4411 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4412 recon_variance =
4413 vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
4414 } else {
4415 recon_variance =
4416 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
4417 }
4418#else
4419 recon_variance =
4420 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
4421#endif // CONFIG_VP9_HIGHBITDEPTH
4422
4423 if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
4424 absvar_diff = (source_variance > recon_variance)
4425 ? (source_variance - recon_variance)
4426 : (recon_variance - source_variance);
4427
4428 var_error = (200 * source_variance * recon_variance) /
4429 ((source_variance * source_variance) +
4430 (recon_variance * recon_variance));
4431 var_error = 100 - var_error;
4432 }
4433
4434 // Source variance above a threshold and ref frame is intra.
4435 // This case is targeted mainly at discouraging intra modes that give rise
4436 // to a predictor with a low spatial complexity compared to the source.
4437 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
4438 (source_variance > recon_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07004439 var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07004440 // A second possible case of interest is where the source variance
4441 // is very low and we wish to discourage false texture or motion trails.
4442 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
4443 (recon_variance > source_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07004444 var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07004445 }
4446 *this_rd += (*this_rd * var_factor) / 100;
4447}
4448
4449
4450// Do we have an internal image edge (e.g. formatting bars).
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004451int vp10_internal_image_edge(VP10_COMP *cpi) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004452 return (cpi->oxcf.pass == 2) &&
4453 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
4454 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
4455}
4456
4457// Checks to see if a super block is on a horizontal image edge.
4458// In most cases this is the "real" edge unless there are formatting
4459// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004460int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004461 int top_edge = 0;
4462 int bottom_edge = cpi->common.mi_rows;
4463 int is_active_h_edge = 0;
4464
4465 // For two pass account for any formatting bars detected.
4466 if (cpi->oxcf.pass == 2) {
4467 TWO_PASS *twopass = &cpi->twopass;
4468
4469 // The inactive region is specified in MBs not mi units.
4470 // The image edge is in the following MB row.
4471 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
4472
4473 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
James Zern5e16d392015-08-17 18:19:22 -07004474 bottom_edge = VPXMAX(top_edge, bottom_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07004475 }
4476
4477 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
4478 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
4479 is_active_h_edge = 1;
4480 }
4481 return is_active_h_edge;
4482}
4483
4484// Checks to see if a super block is on a vertical image edge.
4485// In most cases this is the "real" edge unless there are formatting
4486// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004487int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004488 int left_edge = 0;
4489 int right_edge = cpi->common.mi_cols;
4490 int is_active_v_edge = 0;
4491
4492 // For two pass account for any formatting bars detected.
4493 if (cpi->oxcf.pass == 2) {
4494 TWO_PASS *twopass = &cpi->twopass;
4495
4496 // The inactive region is specified in MBs not mi units.
4497 // The image edge is in the following MB row.
4498 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
4499
4500 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
James Zern5e16d392015-08-17 18:19:22 -07004501 right_edge = VPXMAX(left_edge, right_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07004502 }
4503
4504 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
4505 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
4506 is_active_v_edge = 1;
4507 }
4508 return is_active_v_edge;
4509}
4510
4511// Checks to see if a super block is at the edge of the active image.
4512// In most cases this is the "real" edge unless there are formatting
4513// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004514int vp10_active_edge_sb(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004515 int mi_row, int mi_col) {
4516 return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
4517 vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
4518}
4519
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004520void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
Jingning Han4fa8e732015-09-10 12:24:06 -07004521 TileDataEnc *tile_data,
4522 MACROBLOCK *x,
4523 int mi_row, int mi_col,
4524 RD_COST *rd_cost, BLOCK_SIZE bsize,
4525 PICK_MODE_CONTEXT *ctx,
4526 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004527 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004528 RD_OPT *const rd_opt = &cpi->rd;
4529 SPEED_FEATURES *const sf = &cpi->sf;
4530 MACROBLOCKD *const xd = &x->e_mbd;
4531 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4532 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
4533 const struct segmentation *const seg = &cm->seg;
4534 PREDICTION_MODE this_mode;
4535 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
4536 unsigned char segment_id = mbmi->segment_id;
4537 int comp_pred, i, k;
4538 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
4539 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
4540 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
4541 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
4542 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
4543 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
4544 VP9_ALT_FLAG };
4545 int64_t best_rd = best_rd_so_far;
4546 int64_t best_pred_diff[REFERENCE_MODES];
4547 int64_t best_pred_rd[REFERENCE_MODES];
4548 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
4549 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4550 MB_MODE_INFO best_mbmode;
4551 int best_mode_skippable = 0;
4552 int midx, best_mode_index = -1;
4553 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4554 vpx_prob comp_mode_p;
4555 int64_t best_intra_rd = INT64_MAX;
4556 unsigned int best_pred_sse = UINT_MAX;
4557 PREDICTION_MODE best_intra_mode = DC_PRED;
4558 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
4559 int64_t dist_uv[TX_SIZES];
4560 int skip_uv[TX_SIZES];
4561 PREDICTION_MODE mode_uv[TX_SIZES];
hui sube3559b2015-10-07 09:29:02 -07004562#if CONFIG_EXT_INTRA
4563 EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
hui su4aa50c12015-11-10 12:09:59 -08004564 int8_t uv_angle_delta[TX_SIZES];
4565 int is_directional_mode;
4566 int rate_overhead, rate_dummy;
hui sube3559b2015-10-07 09:29:02 -07004567#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004568 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
4569 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
4570 int best_skip2 = 0;
4571 uint8_t ref_frame_skip_mask[2] = { 0 };
4572 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
4573 int mode_skip_start = sf->mode_skip_start + 1;
4574 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
4575 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
4576 int64_t mode_threshold[MAX_MODES];
4577 int *mode_map = tile_data->mode_map[bsize];
4578 const int mode_search_skip_flags = sf->mode_search_skip_flags;
4579 int64_t mask_filter = 0;
4580 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4581
4582 vp10_zero(best_mbmode);
4583
Jingning Han3ee6db62015-08-05 19:00:31 -07004584 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4585 filter_cache[i] = INT64_MAX;
4586
4587 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4588 &comp_mode_p);
4589
4590 for (i = 0; i < REFERENCE_MODES; ++i)
4591 best_pred_rd[i] = INT64_MAX;
4592 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4593 best_filter_rd[i] = INT64_MAX;
4594 for (i = 0; i < TX_SIZES; i++)
4595 rate_uv_intra[i] = INT_MAX;
4596 for (i = 0; i < MAX_REF_FRAMES; ++i)
4597 x->pred_sse[i] = INT_MAX;
4598 for (i = 0; i < MB_MODE_COUNT; ++i) {
4599 for (k = 0; k < MAX_REF_FRAMES; ++k) {
4600 single_inter_filter[i][k] = SWITCHABLE;
4601 single_skippable[i][k] = 0;
4602 }
4603 }
4604
4605 rd_cost->rate = INT_MAX;
4606
4607 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4608 x->pred_mv_sad[ref_frame] = INT_MAX;
4609 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
4610 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
4611 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4612 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
4613 }
4614 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4615 frame_mv[ZEROMV][ref_frame].as_int = 0;
4616 }
4617
4618 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4619 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
4620 // Skip checking missing references in both single and compound reference
4621 // modes. Note that a mode will be skipped iff both reference frames
4622 // are masked out.
4623 ref_frame_skip_mask[0] |= (1 << ref_frame);
4624 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Jingning Han1eb760e2015-09-10 12:56:41 -07004625 } else {
Jingning Han3ee6db62015-08-05 19:00:31 -07004626 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
4627 // Skip fixed mv modes for poor references
4628 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
4629 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4630 break;
4631 }
4632 }
4633 }
4634 // If the segment reference frame feature is enabled....
4635 // then do nothing if the current ref frame is not allowed..
4636 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4637 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4638 ref_frame_skip_mask[0] |= (1 << ref_frame);
4639 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4640 }
4641 }
4642
4643 // Disable this drop out case if the ref frame
4644 // segment level feature is enabled for this segment. This is to
4645 // prevent the possibility that we end up unable to pick any mode.
4646 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4647 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4648 // unless ARNR filtering is enabled in which case we want
4649 // an unfiltered alternative. We allow near/nearest as well
4650 // because they may result in zero-zero MVs but be cheaper.
4651 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
4652 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
4653 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4654 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4655 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
4656 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
4657 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
4658 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
4659 }
4660 }
4661
4662 if (cpi->rc.is_src_frame_alt_ref) {
4663 if (sf->alt_ref_search_fp) {
4664 mode_skip_mask[ALTREF_FRAME] = 0;
4665 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
4666 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4667 }
4668 }
4669
4670 if (sf->alt_ref_search_fp)
4671 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
4672 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
4673 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
4674
4675 if (sf->adaptive_mode_search) {
4676 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
4677 cpi->rc.frames_since_golden >= 3)
4678 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
4679 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
4680 }
4681
4682 if (bsize > sf->max_intra_bsize) {
4683 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
4684 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
4685 }
4686
4687 mode_skip_mask[INTRA_FRAME] |=
4688 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
4689
4690 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4691 mode_threshold[i] = 0;
4692 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
4693 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
4694
4695 midx = sf->schedule_mode_search ? mode_skip_start : 0;
4696 while (midx > 4) {
4697 uint8_t end_pos = 0;
4698 for (i = 5; i < midx; ++i) {
4699 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
4700 uint8_t tmp = mode_map[i];
4701 mode_map[i] = mode_map[i - 1];
4702 mode_map[i - 1] = tmp;
4703 end_pos = i;
4704 }
4705 }
4706 midx = end_pos;
4707 }
4708
hui su5d011cb2015-09-15 12:44:13 -07004709 mbmi->palette_mode_info.palette_size[0] = 0;
4710 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004711 for (midx = 0; midx < MAX_MODES; ++midx) {
4712 int mode_index = mode_map[midx];
4713 int mode_excluded = 0;
4714 int64_t this_rd = INT64_MAX;
4715 int disable_skip = 0;
4716 int compmode_cost = 0;
4717 int rate2 = 0, rate_y = 0, rate_uv = 0;
4718 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4719 int skippable = 0;
4720 int this_skip2 = 0;
4721 int64_t total_sse = INT64_MAX;
4722 int early_term = 0;
4723
4724 this_mode = vp10_mode_order[mode_index].mode;
4725 ref_frame = vp10_mode_order[mode_index].ref_frame[0];
4726 second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
4727
4728 // Look at the reference frame of the best mode so far and set the
4729 // skip mask to look at a subset of the remaining modes.
4730 if (midx == mode_skip_start && best_mode_index >= 0) {
4731 switch (best_mbmode.ref_frame[0]) {
4732 case INTRA_FRAME:
4733 break;
4734 case LAST_FRAME:
4735 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
4736 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4737 break;
4738 case GOLDEN_FRAME:
4739 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
4740 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4741 break;
4742 case ALTREF_FRAME:
4743 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
4744 break;
4745 case NONE:
4746 case MAX_REF_FRAMES:
4747 assert(0 && "Invalid Reference frame");
4748 break;
4749 }
4750 }
4751
4752 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004753 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004754 continue;
4755
4756 if (mode_skip_mask[ref_frame] & (1 << this_mode))
4757 continue;
4758
4759 // Test best rd so far against threshold for trying this mode.
4760 if (best_mode_skippable && sf->schedule_mode_search)
4761 mode_threshold[mode_index] <<= 1;
4762
4763 if (best_rd < mode_threshold[mode_index])
4764 continue;
4765
Jingning Han3ee6db62015-08-05 19:00:31 -07004766 comp_pred = second_ref_frame > INTRA_FRAME;
4767 if (comp_pred) {
4768 if (!cpi->allow_comp_inter_inter)
4769 continue;
4770
4771 // Skip compound inter modes if ARF is not available.
4772 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4773 continue;
4774
4775 // Do not allow compound prediction if the segment level reference frame
4776 // feature is in use as in this case there can only be one reference.
4777 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4778 continue;
4779
4780 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4781 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
4782 continue;
4783
4784 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4785 } else {
4786 if (ref_frame != INTRA_FRAME)
4787 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4788 }
4789
4790 if (ref_frame == INTRA_FRAME) {
4791 if (sf->adaptive_mode_search)
4792 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
4793 continue;
4794
4795 if (this_mode != DC_PRED) {
4796 // Disable intra modes other than DC_PRED for blocks with low variance
4797 // Threshold for intra skipping based on source variance
4798 // TODO(debargha): Specialize the threshold for super block sizes
4799 const unsigned int skip_intra_var_thresh = 64;
4800 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4801 x->source_variance < skip_intra_var_thresh)
4802 continue;
4803 // Only search the oblique modes if the best so far is
4804 // one of the neighboring directional modes
4805 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
4806 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
4807 if (best_mode_index >= 0 &&
4808 best_mbmode.ref_frame[0] > INTRA_FRAME)
4809 continue;
4810 }
4811 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
4812 if (conditional_skipintra(this_mode, best_intra_mode))
4813 continue;
4814 }
4815 }
4816 } else {
4817 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
4818 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
4819 this_mode, ref_frames))
4820 continue;
4821 }
4822
4823 mbmi->mode = this_mode;
4824 mbmi->uv_mode = DC_PRED;
4825 mbmi->ref_frame[0] = ref_frame;
4826 mbmi->ref_frame[1] = second_ref_frame;
hui sube3559b2015-10-07 09:29:02 -07004827#if CONFIG_EXT_INTRA
4828 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
4829 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
4830#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004831 // Evaluate all sub-pel filters irrespective of whether we can use
4832 // them for this frame.
4833 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4834 : cm->interp_filter;
4835 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4836
4837 x->skip = 0;
4838 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4839
4840 // Select prediction reference frames.
4841 for (i = 0; i < MAX_MB_PLANE; i++) {
4842 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4843 if (comp_pred)
4844 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4845 }
4846
4847 if (ref_frame == INTRA_FRAME) {
4848 TX_SIZE uv_tx;
4849 struct macroblockd_plane *const pd = &xd->plane[1];
4850 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
hui su4aa50c12015-11-10 12:09:59 -08004851
hui sube3559b2015-10-07 09:29:02 -07004852#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08004853 is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
4854 if (is_directional_mode) {
4855 rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
4856 cpi->mbmode_cost[mbmi->mode];
4857 rate_y = INT_MAX;
4858 this_rd =
4859 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
4860 &skippable, bsize, rate_overhead, best_rd);
4861 } else {
4862 mbmi->angle_delta[0] = 0;
4863 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
4864 NULL, bsize, best_rd);
4865 }
4866
hui sube3559b2015-10-07 09:29:02 -07004867 // TODO(huisu): ext-intra is turned off in lossless mode for now to
4868 // avoid a unit test failure
hui su4aa50c12015-11-10 12:09:59 -08004869 if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
4870 ALLOW_FILTER_INTRA_MODES) {
hui sube3559b2015-10-07 09:29:02 -07004871 MB_MODE_INFO mbmi_copy = *mbmi;
hui sube3559b2015-10-07 09:29:02 -07004872
4873 if (rate_y != INT_MAX) {
hui su4aa50c12015-11-10 12:09:59 -08004874 int this_rate = rate_y +
4875 cpi->mbmode_cost[mbmi->mode] +
hui sube3559b2015-10-07 09:29:02 -07004876 vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
4877 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
4878 } else {
4879 this_rd = best_rd;
4880 }
4881
4882 if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
4883 &skippable, bsize,
4884 cpi->mbmode_cost[mbmi->mode], &this_rd))
4885 *mbmi = mbmi_copy;
4886 }
hui su4aa50c12015-11-10 12:09:59 -08004887#else
4888 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
4889 NULL, bsize, best_rd);
hui sube3559b2015-10-07 09:29:02 -07004890#endif // CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08004891
Jingning Han3ee6db62015-08-05 19:00:31 -07004892 if (rate_y == INT_MAX)
4893 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07004894 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
4895 pd->subsampling_y);
4896 if (rate_uv_intra[uv_tx] == INT_MAX) {
4897 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
4898 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
4899 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
hui sube3559b2015-10-07 09:29:02 -07004900#if CONFIG_EXT_INTRA
4901 ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
hui su4aa50c12015-11-10 12:09:59 -08004902 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
hui sube3559b2015-10-07 09:29:02 -07004903#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004904 }
4905
4906 rate_uv = rate_uv_tokenonly[uv_tx];
4907 distortion_uv = dist_uv[uv_tx];
4908 skippable = skippable && skip_uv[uv_tx];
4909 mbmi->uv_mode = mode_uv[uv_tx];
hui sube3559b2015-10-07 09:29:02 -07004910#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08004911 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
hui sube3559b2015-10-07 09:29:02 -07004912 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
4913 ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
4914 if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
4915 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
4916 ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
hui sube3559b2015-10-07 09:29:02 -07004917 }
4918#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004919
4920 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
hui sube3559b2015-10-07 09:29:02 -07004921#if CONFIG_EXT_INTRA
hui su4aa50c12015-11-10 12:09:59 -08004922 if (is_directional_mode)
4923 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
4924 MAX_ANGLE_DELTAS +
4925 mbmi->angle_delta[0]);
4926
4927 if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) {
hui sube3559b2015-10-07 09:29:02 -07004928 rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
4929 mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
4930 if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
4931 EXT_INTRA_MODE ext_intra_mode =
4932 mbmi->ext_intra_mode_info.ext_intra_mode[0];
hui su4aa50c12015-11-10 12:09:59 -08004933 rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
hui sube3559b2015-10-07 09:29:02 -07004934 }
4935 }
4936#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004937 if (this_mode != DC_PRED && this_mode != TM_PRED)
4938 rate2 += intra_cost_penalty;
4939 distortion2 = distortion_y + distortion_uv;
4940 } else {
4941 this_rd = handle_inter_mode(cpi, x, bsize,
4942 &rate2, &distortion2, &skippable,
4943 &rate_y, &rate_uv,
4944 &disable_skip, frame_mv,
4945 mi_row, mi_col,
4946 single_newmv, single_inter_filter,
4947 single_skippable, &total_sse, best_rd,
4948 &mask_filter, filter_cache);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004949
Jingning Han3ee6db62015-08-05 19:00:31 -07004950 if (this_rd == INT64_MAX)
4951 continue;
4952
4953 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4954
4955 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4956 rate2 += compmode_cost;
4957 }
4958
4959 // Estimate the reference frame signaling cost and add it
4960 // to the rolling cost variable.
4961 if (comp_pred) {
4962 rate2 += ref_costs_comp[ref_frame];
4963 } else {
4964 rate2 += ref_costs_single[ref_frame];
4965 }
4966
4967 if (!disable_skip) {
4968 if (skippable) {
4969 // Back out the coefficient coding costs
4970 rate2 -= (rate_y + rate_uv);
4971
4972 // Cost the skip mb case
4973 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004974
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04004975 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004976 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4977 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4978 // Add in the cost of the no skip flag.
4979 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4980 } else {
4981 // FIXME(rbultje) make this work for splitmv also
4982 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4983 distortion2 = total_sse;
4984 assert(total_sse >= 0);
4985 rate2 -= (rate_y + rate_uv);
4986 this_skip2 = 1;
4987 }
4988 } else {
4989 // Add in the cost of the no skip flag.
4990 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4991 }
4992
4993 // Calculate the final RD estimate for this mode.
4994 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4995 }
4996
4997 // Apply an adjustment to the rd value based on the similarity of the
4998 // source variance and reconstructed variance.
4999 rd_variance_adjustment(cpi, x, bsize, &this_rd,
5000 ref_frame, x->source_variance);
5001
5002 if (ref_frame == INTRA_FRAME) {
5003 // Keep record of best intra rd
5004 if (this_rd < best_intra_rd) {
5005 best_intra_rd = this_rd;
5006 best_intra_mode = mbmi->mode;
5007 }
5008 }
5009
5010 if (!disable_skip && ref_frame == INTRA_FRAME) {
5011 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07005012 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005013 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07005014 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005015 }
5016
5017 // Did this mode help.. i.e. is it the new best mode
5018 if (this_rd < best_rd || x->skip) {
5019 int max_plane = MAX_MB_PLANE;
5020 if (!mode_excluded) {
5021 // Note index of best mode so far
5022 best_mode_index = mode_index;
5023
5024 if (ref_frame == INTRA_FRAME) {
5025 /* required for left and above block mv */
5026 mbmi->mv[0].as_int = 0;
5027 max_plane = 1;
5028 } else {
5029 best_pred_sse = x->pred_sse[ref_frame];
5030 }
5031
5032 rd_cost->rate = rate2;
5033 rd_cost->dist = distortion2;
5034 rd_cost->rdcost = this_rd;
5035 best_rd = this_rd;
5036 best_mbmode = *mbmi;
5037 best_skip2 = this_skip2;
5038 best_mode_skippable = skippable;
5039
5040 if (!x->select_tx_size)
5041 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005042
5043#if CONFIG_VAR_TX
5044 for (i = 0; i < MAX_MB_PLANE; ++i)
5045 memcpy(ctx->blk_skip[i], x->blk_skip[i],
5046 sizeof(uint8_t) * ctx->num_4x4_blk);
5047#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005048 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
hui su088b05f2015-08-12 10:41:51 -07005049 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005050#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005051
5052 // TODO(debargha): enhance this test with a better distortion prediction
5053 // based on qp, activity mask and history
5054 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
5055 (mode_index > MIN_EARLY_TERM_INDEX)) {
5056 int qstep = xd->plane[0].dequant[1];
5057 // TODO(debargha): Enhance this by specializing for each mode_index
5058 int scale = 4;
5059#if CONFIG_VP9_HIGHBITDEPTH
5060 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5061 qstep >>= (xd->bd - 8);
5062 }
5063#endif // CONFIG_VP9_HIGHBITDEPTH
5064 if (x->source_variance < UINT_MAX) {
5065 const int var_adjust = (x->source_variance < 16);
5066 scale -= var_adjust;
5067 }
5068 if (ref_frame > INTRA_FRAME &&
5069 distortion2 * scale < qstep * qstep) {
5070 early_term = 1;
5071 }
5072 }
5073 }
5074 }
5075
5076 /* keep record of best compound/single-only prediction */
5077 if (!disable_skip && ref_frame != INTRA_FRAME) {
5078 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5079
5080 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5081 single_rate = rate2 - compmode_cost;
5082 hybrid_rate = rate2;
5083 } else {
5084 single_rate = rate2;
5085 hybrid_rate = rate2 + compmode_cost;
5086 }
5087
5088 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
5089 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
5090
5091 if (!comp_pred) {
5092 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
5093 best_pred_rd[SINGLE_REFERENCE] = single_rd;
5094 } else {
5095 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
5096 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5097 }
5098 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
5099 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5100
5101 /* keep record of best filter type */
5102 if (!mode_excluded && cm->interp_filter != BILINEAR) {
5103 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5104 SWITCHABLE_FILTERS : cm->interp_filter];
5105
5106 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5107 int64_t adj_rd;
5108 if (ref == INT64_MAX)
5109 adj_rd = 0;
5110 else if (filter_cache[i] == INT64_MAX)
5111 // when early termination is triggered, the encoder does not have
5112 // access to the rate-distortion cost. it only knows that the cost
5113 // should be above the maximum valid value. hence it takes the known
5114 // maximum plus an arbitrary constant as the rate-distortion cost.
5115 adj_rd = mask_filter - ref + 10;
5116 else
5117 adj_rd = filter_cache[i] - ref;
5118
5119 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005120 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005121 }
5122 }
5123 }
5124
5125 if (early_term)
5126 break;
5127
5128 if (x->skip && !comp_pred)
5129 break;
5130 }
5131
5132 // The inter modes' rate costs are not calculated precisely in some cases.
5133 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
5134 // ZEROMV. Here, checks are added for those cases, and the mode decisions
5135 // are corrected.
5136 if (best_mbmode.mode == NEWMV) {
5137 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
5138 best_mbmode.ref_frame[1]};
5139 int comp_pred_mode = refs[1] > INTRA_FRAME;
5140
5141 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
5142 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
5143 best_mbmode.mv[1].as_int) || !comp_pred_mode))
5144 best_mbmode.mode = NEARESTMV;
5145 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
5146 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
5147 best_mbmode.mv[1].as_int) || !comp_pred_mode))
5148 best_mbmode.mode = NEARMV;
5149 else if (best_mbmode.mv[0].as_int == 0 &&
5150 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
5151 best_mbmode.mode = ZEROMV;
5152 }
5153
5154 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
5155 rd_cost->rate = INT_MAX;
5156 rd_cost->rdcost = INT64_MAX;
5157 return;
5158 }
5159
5160 // If we used an estimate for the uv intra rd in the loop above...
5161 if (sf->use_uv_intra_rd_estimate) {
5162 // Do Intra UV best rd mode selection if best mode choice above was intra.
5163 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5164 TX_SIZE uv_tx_size;
5165 *mbmi = best_mbmode;
5166 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
5167 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
5168 &rate_uv_tokenonly[uv_tx_size],
5169 &dist_uv[uv_tx_size],
5170 &skip_uv[uv_tx_size],
5171 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
5172 uv_tx_size);
5173 }
5174 }
5175
5176 assert((cm->interp_filter == SWITCHABLE) ||
5177 (cm->interp_filter == best_mbmode.interp_filter) ||
5178 !is_inter_block(&best_mbmode));
5179
5180 if (!cpi->rc.is_src_frame_alt_ref)
5181 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5182 sf->adaptive_rd_thresh, bsize, best_mode_index);
5183
5184 // macroblock modes
5185 *mbmi = best_mbmode;
5186 x->skip |= best_skip2;
5187
5188 for (i = 0; i < REFERENCE_MODES; ++i) {
5189 if (best_pred_rd[i] == INT64_MAX)
5190 best_pred_diff[i] = INT_MIN;
5191 else
5192 best_pred_diff[i] = best_rd - best_pred_rd[i];
5193 }
5194
5195 if (!x->skip) {
5196 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5197 if (best_filter_rd[i] == INT64_MAX)
5198 best_filter_diff[i] = 0;
5199 else
5200 best_filter_diff[i] = best_rd - best_filter_rd[i];
5201 }
5202 if (cm->interp_filter == SWITCHABLE)
5203 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5204 } else {
5205 vp10_zero(best_filter_diff);
5206 }
5207
5208 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
5209 // updating code causes PSNR loss. Need to figure out the confliction.
5210 x->skip |= best_mode_skippable;
5211
5212 if (!x->skip && !x->select_tx_size) {
5213 int has_high_freq_coeff = 0;
5214 int plane;
5215 int max_plane = is_inter_block(&xd->mi[0]->mbmi)
5216 ? MAX_MB_PLANE : 1;
5217 for (plane = 0; plane < max_plane; ++plane) {
5218 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
5219 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
5220 }
5221
5222 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
5223 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
5224 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
5225 }
5226
5227 best_mode_skippable |= !has_high_freq_coeff;
5228 }
5229
5230 assert(best_mode_index >= 0);
5231
5232 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
5233 best_filter_diff, best_mode_skippable);
5234}
5235
Yaowu Xu26a9afc2015-08-13 09:42:27 -07005236void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07005237 TileDataEnc *tile_data,
5238 MACROBLOCK *x,
5239 RD_COST *rd_cost,
5240 BLOCK_SIZE bsize,
5241 PICK_MODE_CONTEXT *ctx,
5242 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07005243 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07005244 MACROBLOCKD *const xd = &x->e_mbd;
5245 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5246 unsigned char segment_id = mbmi->segment_id;
5247 const int comp_pred = 0;
5248 int i;
5249 int64_t best_pred_diff[REFERENCE_MODES];
5250 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
5251 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
5252 vpx_prob comp_mode_p;
5253 INTERP_FILTER best_filter = SWITCHABLE;
5254 int64_t this_rd = INT64_MAX;
5255 int rate2 = 0;
5256 const int64_t distortion2 = 0;
5257
Jingning Han3ee6db62015-08-05 19:00:31 -07005258 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
5259 &comp_mode_p);
5260
5261 for (i = 0; i < MAX_REF_FRAMES; ++i)
5262 x->pred_sse[i] = INT_MAX;
5263 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
5264 x->pred_mv_sad[i] = INT_MAX;
5265
5266 rd_cost->rate = INT_MAX;
5267
5268 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5269
hui su5d011cb2015-09-15 12:44:13 -07005270 mbmi->palette_mode_info.palette_size[0] = 0;
5271 mbmi->palette_mode_info.palette_size[1] = 0;
hui sube3559b2015-10-07 09:29:02 -07005272#if CONFIG_EXT_INTRA
5273 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
5274 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
5275#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07005276 mbmi->mode = ZEROMV;
5277 mbmi->uv_mode = DC_PRED;
5278 mbmi->ref_frame[0] = LAST_FRAME;
5279 mbmi->ref_frame[1] = NONE;
5280 mbmi->mv[0].as_int = 0;
5281 x->skip = 1;
5282
5283 if (cm->interp_filter != BILINEAR) {
5284 best_filter = EIGHTTAP;
5285 if (cm->interp_filter == SWITCHABLE &&
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005286#if CONFIG_EXT_INTERP
5287 vp10_is_interp_needed(xd) &&
5288#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005289 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
5290 int rs;
5291 int best_rs = INT_MAX;
5292 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5293 mbmi->interp_filter = i;
5294 rs = vp10_get_switchable_rate(cpi, xd);
5295 if (rs < best_rs) {
5296 best_rs = rs;
5297 best_filter = mbmi->interp_filter;
5298 }
5299 }
5300 }
5301 }
5302 // Set the appropriate filter
5303 if (cm->interp_filter == SWITCHABLE) {
5304 mbmi->interp_filter = best_filter;
5305 rate2 += vp10_get_switchable_rate(cpi, xd);
5306 } else {
5307 mbmi->interp_filter = cm->interp_filter;
5308 }
5309
5310 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5311 rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
5312
5313 // Estimate the reference frame signaling cost and add it
5314 // to the rolling cost variable.
5315 rate2 += ref_costs_single[LAST_FRAME];
5316 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5317
5318 rd_cost->rate = rate2;
5319 rd_cost->dist = distortion2;
5320 rd_cost->rdcost = this_rd;
5321
5322 if (this_rd >= best_rd_so_far) {
5323 rd_cost->rate = INT_MAX;
5324 rd_cost->rdcost = INT64_MAX;
5325 return;
5326 }
5327
5328 assert((cm->interp_filter == SWITCHABLE) ||
5329 (cm->interp_filter == mbmi->interp_filter));
5330
5331 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5332 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
5333
5334 vp10_zero(best_pred_diff);
5335 vp10_zero(best_filter_diff);
5336
5337 if (!x->select_tx_size)
5338 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
5339 store_coding_context(x, ctx, THR_ZEROMV,
5340 best_pred_diff, best_filter_diff, 0);
5341}
5342
Yaowu Xu26a9afc2015-08-13 09:42:27 -07005343void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07005344 TileDataEnc *tile_data,
5345 MACROBLOCK *x,
5346 int mi_row, int mi_col,
5347 RD_COST *rd_cost,
5348 BLOCK_SIZE bsize,
5349 PICK_MODE_CONTEXT *ctx,
5350 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07005351 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07005352 RD_OPT *const rd_opt = &cpi->rd;
5353 SPEED_FEATURES *const sf = &cpi->sf;
5354 MACROBLOCKD *const xd = &x->e_mbd;
5355 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5356 const struct segmentation *const seg = &cm->seg;
5357 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
5358 unsigned char segment_id = mbmi->segment_id;
5359 int comp_pred, i;
5360 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
5361 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
5362 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
5363 VP9_ALT_FLAG };
5364 int64_t best_rd = best_rd_so_far;
5365 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
5366 int64_t best_pred_diff[REFERENCE_MODES];
5367 int64_t best_pred_rd[REFERENCE_MODES];
5368 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
5369 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
5370 MB_MODE_INFO best_mbmode;
5371 int ref_index, best_ref_index = 0;
5372 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
5373 vpx_prob comp_mode_p;
5374 INTERP_FILTER tmp_best_filter = SWITCHABLE;
5375 int rate_uv_intra, rate_uv_tokenonly;
5376 int64_t dist_uv;
5377 int skip_uv;
5378 PREDICTION_MODE mode_uv = DC_PRED;
5379 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
5380 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
5381 int_mv seg_mvs[4][MAX_REF_FRAMES];
5382 b_mode_info best_bmodes[4];
5383 int best_skip2 = 0;
5384 int ref_frame_skip_mask[2] = { 0 };
5385 int64_t mask_filter = 0;
5386 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
5387 int internal_active_edge =
5388 vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
5389
Jingning Han3ee6db62015-08-05 19:00:31 -07005390 memset(x->zcoeff_blk[TX_4X4], 0, 4);
5391 vp10_zero(best_mbmode);
5392
hui sube3559b2015-10-07 09:29:02 -07005393#if CONFIG_EXT_INTRA
5394 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
5395 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
5396#endif // CONFIG_EXT_INTRA
5397
Jingning Han3ee6db62015-08-05 19:00:31 -07005398 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
5399 filter_cache[i] = INT64_MAX;
5400
5401 for (i = 0; i < 4; i++) {
5402 int j;
5403 for (j = 0; j < MAX_REF_FRAMES; j++)
5404 seg_mvs[i][j].as_int = INVALID_MV;
5405 }
5406
5407 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
5408 &comp_mode_p);
5409
5410 for (i = 0; i < REFERENCE_MODES; ++i)
5411 best_pred_rd[i] = INT64_MAX;
5412 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
5413 best_filter_rd[i] = INT64_MAX;
5414 rate_uv_intra = INT_MAX;
5415
5416 rd_cost->rate = INT_MAX;
5417
5418 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
5419 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
5420 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
5421 frame_mv[NEARESTMV], frame_mv[NEARMV],
5422 yv12_mb);
5423 } else {
5424 ref_frame_skip_mask[0] |= (1 << ref_frame);
5425 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5426 }
5427 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
5428 frame_mv[ZEROMV][ref_frame].as_int = 0;
5429 }
5430
hui su5d011cb2015-09-15 12:44:13 -07005431 mbmi->palette_mode_info.palette_size[0] = 0;
5432 mbmi->palette_mode_info.palette_size[1] = 0;
5433
Jingning Han3ee6db62015-08-05 19:00:31 -07005434 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
5435 int mode_excluded = 0;
5436 int64_t this_rd = INT64_MAX;
5437 int disable_skip = 0;
5438 int compmode_cost = 0;
5439 int rate2 = 0, rate_y = 0, rate_uv = 0;
5440 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
5441 int skippable = 0;
5442 int i;
5443 int this_skip2 = 0;
5444 int64_t total_sse = INT_MAX;
5445 int early_term = 0;
5446
5447 ref_frame = vp10_ref_order[ref_index].ref_frame[0];
5448 second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
5449
5450 // Look at the reference frame of the best mode so far and set the
5451 // skip mask to look at a subset of the remaining modes.
5452 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
5453 if (ref_index == 3) {
5454 switch (best_mbmode.ref_frame[0]) {
5455 case INTRA_FRAME:
5456 break;
5457 case LAST_FRAME:
5458 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
5459 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5460 break;
5461 case GOLDEN_FRAME:
5462 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
5463 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5464 break;
5465 case ALTREF_FRAME:
5466 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
5467 break;
5468 case NONE:
5469 case MAX_REF_FRAMES:
5470 assert(0 && "Invalid Reference frame");
5471 break;
5472 }
5473 }
5474 }
5475
5476 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07005477 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07005478 continue;
5479
5480 // Test best rd so far against threshold for trying this mode.
5481 if (!internal_active_edge &&
5482 rd_less_than_thresh(best_rd,
5483 rd_opt->threshes[segment_id][bsize][ref_index],
5484 tile_data->thresh_freq_fact[bsize][ref_index]))
5485 continue;
5486
5487 comp_pred = second_ref_frame > INTRA_FRAME;
5488 if (comp_pred) {
5489 if (!cpi->allow_comp_inter_inter)
5490 continue;
5491 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
5492 continue;
5493 // Do not allow compound prediction if the segment level reference frame
5494 // feature is in use as in this case there can only be one reference.
5495 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
5496 continue;
5497
5498 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
5499 best_mbmode.ref_frame[0] == INTRA_FRAME)
5500 continue;
5501 }
5502
5503 // TODO(jingning, jkoleszar): scaling reference frame not supported for
5504 // sub8x8 blocks.
5505 if (ref_frame > INTRA_FRAME &&
5506 vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
5507 continue;
5508
5509 if (second_ref_frame > INTRA_FRAME &&
5510 vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
5511 continue;
5512
5513 if (comp_pred)
5514 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
5515 else if (ref_frame != INTRA_FRAME)
5516 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
5517
5518 // If the segment reference frame feature is enabled....
5519 // then do nothing if the current ref frame is not allowed..
5520 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
5521 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
5522 continue;
5523 // Disable this drop out case if the ref frame
5524 // segment level feature is enabled for this segment. This is to
5525 // prevent the possibility that we end up unable to pick any mode.
5526 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5527 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
5528 // unless ARNR filtering is enabled in which case we want
5529 // an unfiltered alternative. We allow near/nearest as well
5530 // because they may result in zero-zero MVs but be cheaper.
5531 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
5532 continue;
5533 }
5534
5535 mbmi->tx_size = TX_4X4;
5536 mbmi->uv_mode = DC_PRED;
5537 mbmi->ref_frame[0] = ref_frame;
5538 mbmi->ref_frame[1] = second_ref_frame;
5539 // Evaluate all sub-pel filters irrespective of whether we can use
5540 // them for this frame.
5541 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
5542 : cm->interp_filter;
5543 x->skip = 0;
5544 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5545
5546 // Select prediction reference frames.
5547 for (i = 0; i < MAX_MB_PLANE; i++) {
5548 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5549 if (comp_pred)
5550 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5551 }
5552
Jingning Han704985e2015-10-08 12:05:03 -07005553#if CONFIG_VAR_TX
Jingning Han0f34e352015-11-15 20:52:51 -08005554 mbmi->inter_tx_size[0] = mbmi->tx_size;
Jingning Han704985e2015-10-08 12:05:03 -07005555#endif
5556
Jingning Han3ee6db62015-08-05 19:00:31 -07005557 if (ref_frame == INTRA_FRAME) {
5558 int rate;
5559 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
5560 &distortion_y, best_rd) >= best_rd)
5561 continue;
5562 rate2 += rate;
5563 rate2 += intra_cost_penalty;
5564 distortion2 += distortion_y;
5565
5566 if (rate_uv_intra == INT_MAX) {
5567 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4,
5568 &rate_uv_intra,
5569 &rate_uv_tokenonly,
5570 &dist_uv, &skip_uv,
5571 &mode_uv);
5572 }
5573 rate2 += rate_uv_intra;
5574 rate_uv = rate_uv_tokenonly;
5575 distortion2 += dist_uv;
5576 distortion_uv = dist_uv;
5577 mbmi->uv_mode = mode_uv;
5578 } else {
5579 int rate;
5580 int64_t distortion;
5581 int64_t this_rd_thresh;
5582 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
5583 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
5584 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
5585 int tmp_best_skippable = 0;
5586 int switchable_filter_index;
5587 int_mv *second_ref = comp_pred ?
5588 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
5589 b_mode_info tmp_best_bmodes[16];
5590 MB_MODE_INFO tmp_best_mbmode;
5591 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
5592 int pred_exists = 0;
5593 int uv_skippable;
5594
5595 this_rd_thresh = (ref_frame == LAST_FRAME) ?
5596 rd_opt->threshes[segment_id][bsize][THR_LAST] :
5597 rd_opt->threshes[segment_id][bsize][THR_ALTR];
5598 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
5599 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
5600 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
5601 filter_cache[i] = INT64_MAX;
5602
5603 if (cm->interp_filter != BILINEAR) {
5604 tmp_best_filter = EIGHTTAP;
5605 if (x->source_variance < sf->disable_filter_search_var_thresh) {
5606 tmp_best_filter = EIGHTTAP;
5607 } else if (sf->adaptive_pred_interp_filter == 1 &&
5608 ctx->pred_interp_filter < SWITCHABLE) {
5609 tmp_best_filter = ctx->pred_interp_filter;
5610 } else if (sf->adaptive_pred_interp_filter == 2) {
5611 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
5612 ctx->pred_interp_filter : 0;
5613 } else {
5614 for (switchable_filter_index = 0;
5615 switchable_filter_index < SWITCHABLE_FILTERS;
5616 ++switchable_filter_index) {
5617 int newbest, rs;
5618 int64_t rs_rd;
5619 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
5620 mbmi->interp_filter = switchable_filter_index;
5621 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
5622 &mbmi_ext->ref_mvs[ref_frame][0],
5623 second_ref, best_yrd, &rate,
5624 &rate_y, &distortion,
5625 &skippable, &total_sse,
5626 (int) this_rd_thresh, seg_mvs,
5627 bsi, switchable_filter_index,
5628 mi_row, mi_col);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005629#if CONFIG_EXT_INTERP
5630 if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
5631 mbmi->interp_filter != EIGHTTAP) // invalid configuration
5632 continue;
5633#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005634 if (tmp_rd == INT64_MAX)
5635 continue;
5636 rs = vp10_get_switchable_rate(cpi, xd);
5637 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
5638 filter_cache[switchable_filter_index] = tmp_rd;
5639 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07005640 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005641 if (cm->interp_filter == SWITCHABLE)
5642 tmp_rd += rs_rd;
5643
James Zern5e16d392015-08-17 18:19:22 -07005644 mask_filter = VPXMAX(mask_filter, tmp_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005645
5646 newbest = (tmp_rd < tmp_best_rd);
5647 if (newbest) {
5648 tmp_best_filter = mbmi->interp_filter;
5649 tmp_best_rd = tmp_rd;
5650 }
5651 if ((newbest && cm->interp_filter == SWITCHABLE) ||
5652 (mbmi->interp_filter == cm->interp_filter &&
5653 cm->interp_filter != SWITCHABLE)) {
5654 tmp_best_rdu = tmp_rd;
5655 tmp_best_rate = rate;
5656 tmp_best_ratey = rate_y;
5657 tmp_best_distortion = distortion;
5658 tmp_best_sse = total_sse;
5659 tmp_best_skippable = skippable;
5660 tmp_best_mbmode = *mbmi;
5661 for (i = 0; i < 4; i++) {
5662 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
5663 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
5664 }
5665 pred_exists = 1;
5666 if (switchable_filter_index == 0 &&
5667 sf->use_rd_breakout &&
5668 best_rd < INT64_MAX) {
5669 if (tmp_best_rdu / 2 > best_rd) {
5670 // skip searching the other filters if the first is
5671 // already substantially larger than the best so far
5672 tmp_best_filter = mbmi->interp_filter;
5673 tmp_best_rdu = INT64_MAX;
5674 break;
5675 }
5676 }
5677 }
5678 } // switchable_filter_index loop
5679 }
5680 }
5681
5682 if (tmp_best_rdu == INT64_MAX && pred_exists)
5683 continue;
5684
5685 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
5686 tmp_best_filter : cm->interp_filter);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005687
5688
Jingning Han3ee6db62015-08-05 19:00:31 -07005689 if (!pred_exists) {
5690 // Handles the special case when a filter that is not in the
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005691 // switchable list (bilinear) is indicated at the frame level
Jingning Han3ee6db62015-08-05 19:00:31 -07005692 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
5693 &x->mbmi_ext->ref_mvs[ref_frame][0],
5694 second_ref, best_yrd, &rate, &rate_y,
5695 &distortion, &skippable, &total_sse,
5696 (int) this_rd_thresh, seg_mvs, bsi, 0,
5697 mi_row, mi_col);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005698#if CONFIG_EXT_INTERP
5699 if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
5700 mbmi->interp_filter != EIGHTTAP) {
5701 mbmi->interp_filter = EIGHTTAP;
5702 tmp_rd = rd_pick_best_sub8x8_mode(
5703 cpi, x,
5704 &x->mbmi_ext->ref_mvs[ref_frame][0],
5705 second_ref, best_yrd, &rate, &rate_y,
5706 &distortion, &skippable, &total_sse,
5707 (int) this_rd_thresh, seg_mvs, bsi, 0,
5708 mi_row, mi_col);
5709 }
5710#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005711 if (tmp_rd == INT64_MAX)
5712 continue;
5713 } else {
5714 total_sse = tmp_best_sse;
5715 rate = tmp_best_rate;
5716 rate_y = tmp_best_ratey;
5717 distortion = tmp_best_distortion;
5718 skippable = tmp_best_skippable;
5719 *mbmi = tmp_best_mbmode;
5720 for (i = 0; i < 4; i++)
5721 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
5722 }
5723
5724 rate2 += rate;
5725 distortion2 += distortion;
5726
5727 if (cm->interp_filter == SWITCHABLE)
5728 rate2 += vp10_get_switchable_rate(cpi, xd);
5729
5730 if (!mode_excluded)
5731 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
5732 : cm->reference_mode == COMPOUND_REFERENCE;
5733
5734 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
5735
5736 tmp_best_rdu = best_rd -
James Zern5e16d392015-08-17 18:19:22 -07005737 VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
5738 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
Jingning Han3ee6db62015-08-05 19:00:31 -07005739
5740 if (tmp_best_rdu > 0) {
5741 // If even the 'Y' rd value of split is higher than best so far
5742 // then dont bother looking at UV
5743 vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
5744 BLOCK_8X8);
5745 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
Jingning Hana8dad552015-10-08 16:46:10 -07005746#if CONFIG_VAR_TX
5747 if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5748 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5749 continue;
5750#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005751 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5752 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5753 continue;
Jingning Hana8dad552015-10-08 16:46:10 -07005754#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005755 rate2 += rate_uv;
5756 distortion2 += distortion_uv;
5757 skippable = skippable && uv_skippable;
5758 total_sse += uv_sse;
5759 }
5760 }
5761
5762 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5763 rate2 += compmode_cost;
5764
5765 // Estimate the reference frame signaling cost and add it
5766 // to the rolling cost variable.
5767 if (second_ref_frame > INTRA_FRAME) {
5768 rate2 += ref_costs_comp[ref_frame];
5769 } else {
5770 rate2 += ref_costs_single[ref_frame];
5771 }
5772
5773 if (!disable_skip) {
5774 // Skip is never coded at the segment level for sub8x8 blocks and instead
5775 // always coded in the bitstream at the mode info level.
5776
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04005777 if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07005778 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
5779 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
5780 // Add in the cost of the no skip flag.
5781 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5782 } else {
5783 // FIXME(rbultje) make this work for splitmv also
5784 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
5785 distortion2 = total_sse;
5786 assert(total_sse >= 0);
5787 rate2 -= (rate_y + rate_uv);
5788 rate_y = 0;
5789 rate_uv = 0;
5790 this_skip2 = 1;
5791 }
5792 } else {
5793 // Add in the cost of the no skip flag.
5794 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5795 }
5796
5797 // Calculate the final RD estimate for this mode.
5798 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5799 }
5800
5801 if (!disable_skip && ref_frame == INTRA_FRAME) {
5802 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07005803 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005804 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07005805 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005806 }
5807
5808 // Did this mode help.. i.e. is it the new best mode
5809 if (this_rd < best_rd || x->skip) {
5810 if (!mode_excluded) {
5811 int max_plane = MAX_MB_PLANE;
5812 // Note index of best mode so far
5813 best_ref_index = ref_index;
5814
5815 if (ref_frame == INTRA_FRAME) {
5816 /* required for left and above block mv */
5817 mbmi->mv[0].as_int = 0;
5818 max_plane = 1;
5819 }
5820
5821 rd_cost->rate = rate2;
5822 rd_cost->dist = distortion2;
5823 rd_cost->rdcost = this_rd;
5824 best_rd = this_rd;
5825 best_yrd = best_rd -
5826 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
5827 best_mbmode = *mbmi;
5828 best_skip2 = this_skip2;
5829 if (!x->select_tx_size)
5830 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005831
5832#if CONFIG_VAR_TX
5833 for (i = 0; i < MAX_MB_PLANE; ++i)
5834 memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
5835#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005836 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
hui su088b05f2015-08-12 10:41:51 -07005837 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005838#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005839
5840 for (i = 0; i < 4; i++)
5841 best_bmodes[i] = xd->mi[0]->bmi[i];
5842
5843 // TODO(debargha): enhance this test with a better distortion prediction
5844 // based on qp, activity mask and history
5845 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
5846 (ref_index > MIN_EARLY_TERM_INDEX)) {
5847 int qstep = xd->plane[0].dequant[1];
5848 // TODO(debargha): Enhance this by specializing for each mode_index
5849 int scale = 4;
5850#if CONFIG_VP9_HIGHBITDEPTH
5851 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5852 qstep >>= (xd->bd - 8);
5853 }
5854#endif // CONFIG_VP9_HIGHBITDEPTH
5855 if (x->source_variance < UINT_MAX) {
5856 const int var_adjust = (x->source_variance < 16);
5857 scale -= var_adjust;
5858 }
5859 if (ref_frame > INTRA_FRAME &&
5860 distortion2 * scale < qstep * qstep) {
5861 early_term = 1;
5862 }
5863 }
5864 }
5865 }
5866
5867 /* keep record of best compound/single-only prediction */
5868 if (!disable_skip && ref_frame != INTRA_FRAME) {
5869 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5870
5871 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5872 single_rate = rate2 - compmode_cost;
5873 hybrid_rate = rate2;
5874 } else {
5875 single_rate = rate2;
5876 hybrid_rate = rate2 + compmode_cost;
5877 }
5878
5879 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
5880 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
5881
5882 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
5883 best_pred_rd[SINGLE_REFERENCE] = single_rd;
5884 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
5885 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5886
5887 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
5888 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5889 }
5890
5891 /* keep record of best filter type */
5892 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
5893 cm->interp_filter != BILINEAR) {
5894 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5895 SWITCHABLE_FILTERS : cm->interp_filter];
5896 int64_t adj_rd;
5897 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5898 if (ref == INT64_MAX)
5899 adj_rd = 0;
5900 else if (filter_cache[i] == INT64_MAX)
5901 // when early termination is triggered, the encoder does not have
5902 // access to the rate-distortion cost. it only knows that the cost
5903 // should be above the maximum valid value. hence it takes the known
5904 // maximum plus an arbitrary constant as the rate-distortion cost.
5905 adj_rd = mask_filter - ref + 10;
5906 else
5907 adj_rd = filter_cache[i] - ref;
5908
5909 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005910 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005911 }
5912 }
5913
5914 if (early_term)
5915 break;
5916
5917 if (x->skip && !comp_pred)
5918 break;
5919 }
5920
5921 if (best_rd >= best_rd_so_far) {
5922 rd_cost->rate = INT_MAX;
5923 rd_cost->rdcost = INT64_MAX;
5924 return;
5925 }
5926
5927 // If we used an estimate for the uv intra rd in the loop above...
5928 if (sf->use_uv_intra_rd_estimate) {
5929 // Do Intra UV best rd mode selection if best mode choice above was intra.
5930 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5931 *mbmi = best_mbmode;
5932 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
5933 &rate_uv_tokenonly,
5934 &dist_uv,
5935 &skip_uv,
5936 BLOCK_8X8, TX_4X4);
5937 }
5938 }
5939
5940 if (best_rd == INT64_MAX) {
5941 rd_cost->rate = INT_MAX;
5942 rd_cost->dist = INT64_MAX;
5943 rd_cost->rdcost = INT64_MAX;
5944 return;
5945 }
5946
5947 assert((cm->interp_filter == SWITCHABLE) ||
5948 (cm->interp_filter == best_mbmode.interp_filter) ||
5949 !is_inter_block(&best_mbmode));
5950
5951 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5952 sf->adaptive_rd_thresh, bsize, best_ref_index);
5953
5954 // macroblock modes
5955 *mbmi = best_mbmode;
5956 x->skip |= best_skip2;
5957 if (!is_inter_block(&best_mbmode)) {
5958 for (i = 0; i < 4; i++)
5959 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
5960 } else {
5961 for (i = 0; i < 4; ++i)
5962 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
5963
5964 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
5965 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
5966 }
5967
5968 for (i = 0; i < REFERENCE_MODES; ++i) {
5969 if (best_pred_rd[i] == INT64_MAX)
5970 best_pred_diff[i] = INT_MIN;
5971 else
5972 best_pred_diff[i] = best_rd - best_pred_rd[i];
5973 }
5974
5975 if (!x->skip) {
5976 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5977 if (best_filter_rd[i] == INT64_MAX)
5978 best_filter_diff[i] = 0;
5979 else
5980 best_filter_diff[i] = best_rd - best_filter_rd[i];
5981 }
5982 if (cm->interp_filter == SWITCHABLE)
5983 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5984 } else {
5985 vp10_zero(best_filter_diff);
5986 }
5987
5988 store_coding_context(x, ctx, best_ref_index,
5989 best_pred_diff, best_filter_diff, 0);
5990}