blob: 63b2a972ef5ec4b642d0598efa3352a6fdf13a93 [file] [log] [blame]
Jingning Han3ee6db62015-08-05 19:00:31 -07001/*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <math.h>
13
14#include "./vp10_rtcd.h"
15#include "./vpx_dsp_rtcd.h"
16
Johannc5f11912015-08-31 14:36:35 -070017#include "vpx_dsp/vpx_dsp_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070018#include "vpx_mem/vpx_mem.h"
19#include "vpx_ports/mem.h"
Jingning Han3acfe462015-08-12 09:20:31 -070020#include "vpx_ports/system_state.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070021
Jingning Han54d66ef2015-08-06 21:14:07 -070022#include "vp10/common/common.h"
23#include "vp10/common/entropy.h"
24#include "vp10/common/entropymode.h"
25#include "vp10/common/idct.h"
26#include "vp10/common/mvref_common.h"
27#include "vp10/common/pred_common.h"
28#include "vp10/common/quant_common.h"
29#include "vp10/common/reconinter.h"
30#include "vp10/common/reconintra.h"
31#include "vp10/common/scan.h"
32#include "vp10/common/seg_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070033
Jingning Han54d66ef2015-08-06 21:14:07 -070034#include "vp10/encoder/cost.h"
35#include "vp10/encoder/encodemb.h"
36#include "vp10/encoder/encodemv.h"
37#include "vp10/encoder/encoder.h"
38#include "vp10/encoder/mcomp.h"
hui su5d011cb2015-09-15 12:44:13 -070039#include "vp10/encoder/palette.h"
Jingning Han54d66ef2015-08-06 21:14:07 -070040#include "vp10/encoder/quantize.h"
41#include "vp10/encoder/ratectrl.h"
42#include "vp10/encoder/rd.h"
43#include "vp10/encoder/rdopt.h"
44#include "vp10/encoder/aq_variance.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070045
46#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
47 (1 << INTRA_FRAME))
48#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
49 (1 << INTRA_FRAME))
50#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
51 (1 << INTRA_FRAME))
52
53#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
54
55#define MIN_EARLY_TERM_INDEX 3
56#define NEW_MV_DISCOUNT_FACTOR 8
57
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070058#if CONFIG_EXT_TX
Debargha Mukherjeeb8bc0262015-09-11 08:32:56 -070059const double ext_tx_th = 0.98;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070060#endif
61
Jingning Han3ee6db62015-08-05 19:00:31 -070062typedef struct {
63 PREDICTION_MODE mode;
64 MV_REFERENCE_FRAME ref_frame[2];
65} MODE_DEFINITION;
66
67typedef struct {
68 MV_REFERENCE_FRAME ref_frame[2];
69} REF_DEFINITION;
70
71struct rdcost_block_args {
Jingning Han71c15602015-10-13 12:40:39 -070072#if CONFIG_VAR_TX
73 const VP10_COMP *cpi;
74#endif
Jingning Han3ee6db62015-08-05 19:00:31 -070075 MACROBLOCK *x;
76 ENTROPY_CONTEXT t_above[16];
77 ENTROPY_CONTEXT t_left[16];
78 int this_rate;
79 int64_t this_dist;
80 int64_t this_sse;
81 int64_t this_rd;
82 int64_t best_rd;
83 int exit_early;
84 int use_fast_coef_costing;
85 const scan_order *so;
86 uint8_t skippable;
87};
88
89#define LAST_NEW_MV_INDEX 6
90static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
91 {NEARESTMV, {LAST_FRAME, NONE}},
92 {NEARESTMV, {ALTREF_FRAME, NONE}},
93 {NEARESTMV, {GOLDEN_FRAME, NONE}},
94
95 {DC_PRED, {INTRA_FRAME, NONE}},
96
97 {NEWMV, {LAST_FRAME, NONE}},
98 {NEWMV, {ALTREF_FRAME, NONE}},
99 {NEWMV, {GOLDEN_FRAME, NONE}},
100
101 {NEARMV, {LAST_FRAME, NONE}},
102 {NEARMV, {ALTREF_FRAME, NONE}},
103 {NEARMV, {GOLDEN_FRAME, NONE}},
104
105 {ZEROMV, {LAST_FRAME, NONE}},
106 {ZEROMV, {GOLDEN_FRAME, NONE}},
107 {ZEROMV, {ALTREF_FRAME, NONE}},
108
109 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
110 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
111
112 {TM_PRED, {INTRA_FRAME, NONE}},
113
114 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
115 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
116 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
117 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
118
119 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
120 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
121
122 {H_PRED, {INTRA_FRAME, NONE}},
123 {V_PRED, {INTRA_FRAME, NONE}},
124 {D135_PRED, {INTRA_FRAME, NONE}},
125 {D207_PRED, {INTRA_FRAME, NONE}},
126 {D153_PRED, {INTRA_FRAME, NONE}},
127 {D63_PRED, {INTRA_FRAME, NONE}},
128 {D117_PRED, {INTRA_FRAME, NONE}},
129 {D45_PRED, {INTRA_FRAME, NONE}},
130};
131
132static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
133 {{LAST_FRAME, NONE}},
134 {{GOLDEN_FRAME, NONE}},
135 {{ALTREF_FRAME, NONE}},
136 {{LAST_FRAME, ALTREF_FRAME}},
137 {{GOLDEN_FRAME, ALTREF_FRAME}},
138 {{INTRA_FRAME, NONE}},
139};
140
hui su5d011cb2015-09-15 12:44:13 -0700141static INLINE int write_uniform_cost(int n, int v) {
142 int l = get_unsigned_bits(n), m = (1 << l) - n;
143 if (l == 0)
144 return 0;
145 if (v < m)
146 return (l - 1) * vp10_cost_bit(128, 0);
147 else
148 return l * vp10_cost_bit(128, 0);
149}
150
Jingning Han3ee6db62015-08-05 19:00:31 -0700151static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
152 int m, int n, int min_plane, int max_plane) {
153 int i;
154
155 for (i = min_plane; i < max_plane; ++i) {
156 struct macroblock_plane *const p = &x->plane[i];
157 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
158
159 p->coeff = ctx->coeff_pbuf[i][m];
160 p->qcoeff = ctx->qcoeff_pbuf[i][m];
161 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
162 p->eobs = ctx->eobs_pbuf[i][m];
163
164 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
165 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
166 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
167 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
168
169 ctx->coeff_pbuf[i][n] = p->coeff;
170 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
171 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
172 ctx->eobs_pbuf[i][n] = p->eobs;
173 }
174}
175
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700176static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700177 MACROBLOCK *x, MACROBLOCKD *xd,
178 int *out_rate_sum, int64_t *out_dist_sum,
179 int *skip_txfm_sb, int64_t *skip_sse_sb) {
180 // Note our transform coeffs are 8 times an orthogonal transform.
181 // Hence quantizer step is also 8 times. To get effective quantizer
182 // we need to divide by 8 before sending to modeling function.
183 int i;
184 int64_t rate_sum = 0;
185 int64_t dist_sum = 0;
186 const int ref = xd->mi[0]->mbmi.ref_frame[0];
187 unsigned int sse;
188 unsigned int var = 0;
189 unsigned int sum_sse = 0;
190 int64_t total_sse = 0;
191 int skip_flag = 1;
192 const int shift = 6;
193 int rate;
194 int64_t dist;
195 const int dequant_shift =
196#if CONFIG_VP9_HIGHBITDEPTH
197 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
198 xd->bd - 5 :
199#endif // CONFIG_VP9_HIGHBITDEPTH
200 3;
201
202 x->pred_sse[ref] = 0;
203
204 for (i = 0; i < MAX_MB_PLANE; ++i) {
205 struct macroblock_plane *const p = &x->plane[i];
206 struct macroblockd_plane *const pd = &xd->plane[i];
207 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
208 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
209 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
210 const int64_t dc_thr = p->quant_thred[0] >> shift;
211 const int64_t ac_thr = p->quant_thred[1] >> shift;
212 // The low thresholds are used to measure if the prediction errors are
213 // low enough so that we can skip the mode search.
James Zern5e16d392015-08-17 18:19:22 -0700214 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
215 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700216 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
217 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
218 int idx, idy;
219 int lw = b_width_log2_lookup[unit_size] + 2;
220 int lh = b_height_log2_lookup[unit_size] + 2;
221
222 sum_sse = 0;
223
224 for (idy = 0; idy < bh; ++idy) {
225 for (idx = 0; idx < bw; ++idx) {
226 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
227 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
228 int block_idx = (idy << 1) + idx;
229 int low_err_skip = 0;
230
231 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
232 dst, pd->dst.stride, &sse);
233 x->bsse[(i << 2) + block_idx] = sse;
234 sum_sse += sse;
235
236 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
237 if (!x->select_tx_size) {
238 // Check if all ac coefficients can be quantized to zero.
239 if (var < ac_thr || var == 0) {
240 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
241
242 // Check if dc coefficient can be quantized to zero.
243 if (sse - var < dc_thr || sse == var) {
244 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
245
246 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
247 low_err_skip = 1;
248 }
249 }
250 }
251
252 if (skip_flag && !low_err_skip)
253 skip_flag = 0;
254
255 if (i == 0)
256 x->pred_sse[ref] += sse;
257 }
258 }
259
260 total_sse += sum_sse;
261
262 // Fast approximate the modelling function.
263 if (cpi->sf.simple_model_rd_from_var) {
264 int64_t rate;
265 const int64_t square_error = sum_sse;
266 int quantizer = (pd->dequant[1] >> dequant_shift);
267
268 if (quantizer < 120)
269 rate = (square_error * (280 - quantizer)) >> 8;
270 else
271 rate = 0;
272 dist = (square_error * quantizer) >> 8;
273 rate_sum += rate;
274 dist_sum += dist;
275 } else {
276 vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
277 pd->dequant[1] >> dequant_shift,
278 &rate, &dist);
279 rate_sum += rate;
280 dist_sum += dist;
281 }
282 }
283
284 *skip_txfm_sb = skip_flag;
285 *skip_sse_sb = total_sse << 4;
286 *out_rate_sum = (int)rate_sum;
287 *out_dist_sum = dist_sum << 4;
288}
289
290int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291 intptr_t block_size, int64_t *ssz) {
292 int i;
293 int64_t error = 0, sqcoeff = 0;
294
295 for (i = 0; i < block_size; i++) {
296 const int diff = coeff[i] - dqcoeff[i];
297 error += diff * diff;
298 sqcoeff += coeff[i] * coeff[i];
299 }
300
301 *ssz = sqcoeff;
302 return error;
303}
304
305int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
306 int block_size) {
307 int i;
308 int64_t error = 0;
309
310 for (i = 0; i < block_size; i++) {
311 const int diff = coeff[i] - dqcoeff[i];
312 error += diff * diff;
313 }
314
315 return error;
316}
317
318#if CONFIG_VP9_HIGHBITDEPTH
319int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
320 const tran_low_t *dqcoeff,
321 intptr_t block_size,
322 int64_t *ssz, int bd) {
323 int i;
324 int64_t error = 0, sqcoeff = 0;
325 int shift = 2 * (bd - 8);
326 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
327
328 for (i = 0; i < block_size; i++) {
329 const int64_t diff = coeff[i] - dqcoeff[i];
330 error += diff * diff;
331 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
332 }
333 assert(error >= 0 && sqcoeff >= 0);
334 error = (error + rounding) >> shift;
335 sqcoeff = (sqcoeff + rounding) >> shift;
336
337 *ssz = sqcoeff;
338 return error;
339}
340#endif // CONFIG_VP9_HIGHBITDEPTH
341
342/* The trailing '0' is a terminator which is used inside cost_coeffs() to
343 * decide whether to include cost of a trailing EOB node or not (i.e. we
344 * can skip this if the last coefficient in this transform block, e.g. the
345 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
346 * were non-zero). */
347static const int16_t band_counts[TX_SIZES][8] = {
348 { 1, 2, 3, 4, 3, 16 - 13, 0 },
349 { 1, 2, 3, 4, 11, 64 - 21, 0 },
350 { 1, 2, 3, 4, 11, 256 - 21, 0 },
351 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
352};
353static int cost_coeffs(MACROBLOCK *x,
354 int plane, int block,
Jingning Han2cdc1272015-10-09 09:57:42 -0700355#if CONFIG_VAR_TX
356 int coeff_ctx,
357#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700358 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
Jingning Han2cdc1272015-10-09 09:57:42 -0700359#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700360 TX_SIZE tx_size,
361 const int16_t *scan, const int16_t *nb,
362 int use_fast_coef_costing) {
363 MACROBLOCKD *const xd = &x->e_mbd;
364 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
365 const struct macroblock_plane *p = &x->plane[plane];
366 const struct macroblockd_plane *pd = &xd->plane[plane];
367 const PLANE_TYPE type = pd->plane_type;
368 const int16_t *band_count = &band_counts[tx_size][1];
369 const int eob = p->eobs[block];
370 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
371 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
372 x->token_costs[tx_size][type][is_inter_block(mbmi)];
373 uint8_t token_cache[32 * 32];
Jingning Han2cdc1272015-10-09 09:57:42 -0700374#if CONFIG_VAR_TX
375 int pt = coeff_ctx;
376#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700377 int pt = combine_entropy_contexts(*A, *L);
Jingning Han2cdc1272015-10-09 09:57:42 -0700378#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700379 int c, cost;
380#if CONFIG_VP9_HIGHBITDEPTH
381 const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
382#else
383 const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
384#endif
385
Jingning Han2cdc1272015-10-09 09:57:42 -0700386#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700387 // Check for consistency of tx_size with mode info
388 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
389 : get_uv_tx_size(mbmi, pd) == tx_size);
Jingning Han2cdc1272015-10-09 09:57:42 -0700390#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700391
392 if (eob == 0) {
393 // single eob token
394 cost = token_costs[0][0][pt][EOB_TOKEN];
395 c = 0;
396 } else {
397 int band_left = *band_count++;
398
399 // dc token
400 int v = qcoeff[0];
401 int16_t prev_t;
402 EXTRABIT e;
403 vp10_get_token_extra(v, &prev_t, &e);
404 cost = (*token_costs)[0][pt][prev_t] +
405 vp10_get_cost(prev_t, e, cat6_high_cost);
406
407 token_cache[0] = vp10_pt_energy_class[prev_t];
408 ++token_costs;
409
410 // ac tokens
411 for (c = 1; c < eob; c++) {
412 const int rc = scan[c];
413 int16_t t;
414
415 v = qcoeff[rc];
416 vp10_get_token_extra(v, &t, &e);
417 if (use_fast_coef_costing) {
418 cost += (*token_costs)[!prev_t][!prev_t][t] +
419 vp10_get_cost(t, e, cat6_high_cost);
420 } else {
421 pt = get_coef_context(nb, token_cache, c);
422 cost += (*token_costs)[!prev_t][pt][t] +
423 vp10_get_cost(t, e, cat6_high_cost);
424 token_cache[rc] = vp10_pt_energy_class[t];
425 }
426 prev_t = t;
427 if (!--band_left) {
428 band_left = *band_count++;
429 ++token_costs;
430 }
431 }
432
433 // eob token
434 if (band_left) {
435 if (use_fast_coef_costing) {
436 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
437 } else {
438 pt = get_coef_context(nb, token_cache, c);
439 cost += (*token_costs)[0][pt][EOB_TOKEN];
440 }
441 }
442 }
443
Jingning Han2cdc1272015-10-09 09:57:42 -0700444#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700445 // is eob first coefficient;
446 *A = *L = (c > 0);
Jingning Han2cdc1272015-10-09 09:57:42 -0700447#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700448
449 return cost;
450}
451
452static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
453 int64_t *out_dist, int64_t *out_sse) {
454 const int ss_txfrm_size = tx_size << 1;
455 MACROBLOCKD* const xd = &x->e_mbd;
456 const struct macroblock_plane *const p = &x->plane[plane];
457 const struct macroblockd_plane *const pd = &xd->plane[plane];
458 int64_t this_sse;
459 int shift = tx_size == TX_32X32 ? 0 : 2;
460 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
461 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
462#if CONFIG_VP9_HIGHBITDEPTH
463 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
464 *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
465 &this_sse, bd) >> shift;
466#else
467 *out_dist = vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
468 &this_sse) >> shift;
469#endif // CONFIG_VP9_HIGHBITDEPTH
470 *out_sse = this_sse >> shift;
Jingning Han3ee6db62015-08-05 19:00:31 -0700471}
472
Jingning Hanebc48ef2015-10-07 11:43:48 -0700473static int rate_block(int plane, int block, int blk_row, int blk_col,
Jingning Han3ee6db62015-08-05 19:00:31 -0700474 TX_SIZE tx_size, struct rdcost_block_args* args) {
Jingning Han2cdc1272015-10-09 09:57:42 -0700475#if CONFIG_VAR_TX
476 int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
477 *(args->t_left + blk_row));
478 int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
479 tx_size, args->so->scan, args->so->neighbors,
480 args->use_fast_coef_costing);
481 const struct macroblock_plane *p = &args->x->plane[plane];
482 *(args->t_above + blk_col) = !(p->eobs[block] == 0);
483 *(args->t_left + blk_row) = !(p->eobs[block] == 0);
484 return coeff_cost;
485#else
486 return cost_coeffs(args->x, plane, block,
487 args->t_above + blk_col,
488 args->t_left + blk_row,
489 tx_size, args->so->scan, args->so->neighbors,
Jingning Han3ee6db62015-08-05 19:00:31 -0700490 args->use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -0700491#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700492}
493
Jingning Hanebc48ef2015-10-07 11:43:48 -0700494static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
495 BLOCK_SIZE plane_bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700496 TX_SIZE tx_size, void *arg) {
497 struct rdcost_block_args *args = arg;
498 MACROBLOCK *const x = args->x;
499 MACROBLOCKD *const xd = &x->e_mbd;
500 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
501 int64_t rd1, rd2, rd;
502 int rate;
503 int64_t dist;
504 int64_t sse;
505
506 if (args->exit_early)
507 return;
508
509 if (!is_inter_block(mbmi)) {
Jingning Han71c15602015-10-13 12:40:39 -0700510#if CONFIG_VAR_TX
511 struct encode_b_args arg = {x, NULL, &mbmi->skip};
512 uint8_t *dst, *src;
513 int src_stride = x->plane[plane].src.stride;
514 int dst_stride = xd->plane[plane].dst.stride;
515 unsigned int tmp_sse;
516 PREDICTION_MODE mode = (plane == 0) ?
Jingning Han94266f42015-10-16 18:21:11 -0700517 get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
Jingning Han71c15602015-10-13 12:40:39 -0700518
519#if CONFIG_VP9_HIGHBITDEPTH
520 vp10_encode_block_intra(plane, block, blk_row, blk_col,
521 plane_bsize, tx_size, &arg);
522 dist_block(x, plane, block, tx_size, &dist, &sse);
523#else
524 src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
525 dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
526 vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
527 b_height_log2_lookup[plane_bsize],
528 tx_size, mode, dst, dst_stride,
529 dst, dst_stride, blk_col, blk_row, plane);
530 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
531 dst, dst_stride, &tmp_sse);
532 sse = (int64_t)tmp_sse * 16;
533 vp10_encode_block_intra(plane, block, blk_row, blk_col,
534 plane_bsize, tx_size, &arg);
535 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
536 dst, dst_stride, &tmp_sse);
537 dist = (int64_t)tmp_sse * 16;
538#endif // CONFIG_VP9_HIGHBITDEPTH
539#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700540 struct encode_b_args arg = {x, NULL, &mbmi->skip};
Jingning Hanebc48ef2015-10-07 11:43:48 -0700541 vp10_encode_block_intra(plane, block, blk_row, blk_col,
542 plane_bsize, tx_size, &arg);
Jingning Han3ee6db62015-08-05 19:00:31 -0700543 dist_block(x, plane, block, tx_size, &dist, &sse);
Jingning Han71c15602015-10-13 12:40:39 -0700544#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700545 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
546 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
547 SKIP_TXFM_NONE) {
548 // full forward transform and quantization
Jingning Hancaeb10b2015-10-22 17:25:00 -0700549 vp10_xform_quant(x, plane, block, blk_row, blk_col,
550 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700551 dist_block(x, plane, block, tx_size, &dist, &sse);
552 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
553 SKIP_TXFM_AC_ONLY) {
554 // compute DC coefficient
555 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
556 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Jingning Hanebc48ef2015-10-07 11:43:48 -0700557 vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
558 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700559 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
560 dist = sse;
561 if (x->plane[plane].eobs[block]) {
562 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
563 const int64_t resd_sse = coeff[0] - dqcoeff[0];
564 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
565#if CONFIG_VP9_HIGHBITDEPTH
566 dc_correct >>= ((xd->bd - 8) * 2);
567#endif
568 if (tx_size != TX_32X32)
569 dc_correct >>= 2;
570
James Zern5e16d392015-08-17 18:19:22 -0700571 dist = VPXMAX(0, sse - dc_correct);
Jingning Han3ee6db62015-08-05 19:00:31 -0700572 }
573 } else {
574 // SKIP_TXFM_AC_DC
575 // skip forward transform
576 x->plane[plane].eobs[block] = 0;
577 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
578 dist = sse;
579 }
580 } else {
581 // full forward transform and quantization
Jingning Hanebc48ef2015-10-07 11:43:48 -0700582 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700583 dist_block(x, plane, block, tx_size, &dist, &sse);
584 }
585
586 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
587 if (args->this_rd + rd > args->best_rd) {
588 args->exit_early = 1;
589 return;
590 }
591
Jingning Hanebc48ef2015-10-07 11:43:48 -0700592 rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700593 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
595
596 // TODO(jingning): temporarily enabled only for luma component
James Zern5e16d392015-08-17 18:19:22 -0700597 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700598 if (plane == 0)
599 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400600 (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700601
602 args->this_rate += rate;
603 args->this_dist += dist;
604 args->this_sse += sse;
605 args->this_rd += rd;
606
607 if (args->this_rd > args->best_rd) {
608 args->exit_early = 1;
609 return;
610 }
611
612 args->skippable &= !x->plane[plane].eobs[block];
613}
614
615static void txfm_rd_in_plane(MACROBLOCK *x,
Jingning Han71c15602015-10-13 12:40:39 -0700616#if CONFIG_VAR_TX
617 const VP10_COMP *cpi,
618#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700619 int *rate, int64_t *distortion,
620 int *skippable, int64_t *sse,
621 int64_t ref_best_rd, int plane,
622 BLOCK_SIZE bsize, TX_SIZE tx_size,
623 int use_fast_coef_casting) {
624 MACROBLOCKD *const xd = &x->e_mbd;
625 const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su5eed74e2015-08-18 16:57:07 -0700626 TX_TYPE tx_type;
Jingning Han3ee6db62015-08-05 19:00:31 -0700627 struct rdcost_block_args args;
628 vp10_zero(args);
629 args.x = x;
Jingning Han71c15602015-10-13 12:40:39 -0700630#if CONFIG_VAR_TX
631 args.cpi = cpi;
632#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700633 args.best_rd = ref_best_rd;
634 args.use_fast_coef_costing = use_fast_coef_casting;
635 args.skippable = 1;
636
637 if (plane == 0)
638 xd->mi[0]->mbmi.tx_size = tx_size;
639
640 vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
641
hui sub3cc3a02015-08-24 14:37:54 -0700642 tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700643 args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
Jingning Han3ee6db62015-08-05 19:00:31 -0700644
645 vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700646 block_rd_txfm, &args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700647 if (args.exit_early) {
648 *rate = INT_MAX;
649 *distortion = INT64_MAX;
650 *sse = INT64_MAX;
651 *skippable = 0;
652 } else {
653 *distortion = args.this_dist;
654 *rate = args.this_rate;
655 *sse = args.this_sse;
656 *skippable = args.skippable;
657 }
658}
659
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700660static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700661 int *rate, int64_t *distortion,
662 int *skip, int64_t *sse,
663 int64_t ref_best_rd,
664 BLOCK_SIZE bs) {
665 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700666 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700667 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
668 MACROBLOCKD *const xd = &x->e_mbd;
669 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
hui su6c81e372015-09-29 12:09:15 -0700670#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700671 TX_TYPE tx_type, best_tx_type = DCT_DCT;
hui su6c81e372015-09-29 12:09:15 -0700672 int r, s;
673 int64_t d, psse, this_rd, best_rd = INT64_MAX;
674 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
675 int s0 = vp10_cost_bit(skip_prob, 0);
676 int s1 = vp10_cost_bit(skip_prob, 1);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700677 int ext_tx_set;
678 const int is_inter = is_inter_block(mbmi);
hui su6c81e372015-09-29 12:09:15 -0700679#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700680
James Zern5e16d392015-08-17 18:19:22 -0700681 mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700682
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700683#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700684 ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
685
686 if (is_inter &&
687 get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700688 !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700689 for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
690 if (is_inter) {
691 if (!ext_tx_used_inter[ext_tx_set][tx_type])
692 continue;
693 } else {
694 if (!ext_tx_used_intra[ext_tx_set][tx_type])
695 continue;
696 }
697
698 mbmi->tx_type = tx_type;
699 if (ext_tx_set == 1 &&
700 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
hui su4f16f112015-10-02 10:45:27 -0700701 best_tx_type == DCT_DCT) {
702 tx_type = IDTX - 1;
hui su3fa01292015-09-28 18:38:00 -0700703 continue;
704 }
hui su6c81e372015-09-29 12:09:15 -0700705
Jingning Han71c15602015-10-13 12:40:39 -0700706 txfm_rd_in_plane(x,
707#if CONFIG_VAR_TX
708 cpi,
709#endif
710 &r, &d, &s,
hui su6c81e372015-09-29 12:09:15 -0700711 &psse, ref_best_rd, 0, bs, mbmi->tx_size,
712 cpi->sf.use_fast_coef_costing);
713
714 if (r == INT_MAX)
715 continue;
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700716 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
717 if (is_inter) {
718 if (ext_tx_set > 0)
719 r += cpi->inter_tx_type_costs[ext_tx_set]
720 [mbmi->tx_size][mbmi->tx_type];
721 } else {
722 if (ext_tx_set > 0)
723 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
724 [mbmi->mode][mbmi->tx_type];
725 }
hui su3fa01292015-09-28 18:38:00 -0700726 }
hui su6c81e372015-09-29 12:09:15 -0700727
728 if (s)
729 this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
730 else
731 this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700732 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
hui su6c81e372015-09-29 12:09:15 -0700733 this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
734
hui su4f16f112015-10-02 10:45:27 -0700735 if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
hui su6c81e372015-09-29 12:09:15 -0700736 best_rd = this_rd;
hui su4f16f112015-10-02 10:45:27 -0700737 best_tx_type = mbmi->tx_type;
hui su6c81e372015-09-29 12:09:15 -0700738 }
739 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700740 }
hui su6c81e372015-09-29 12:09:15 -0700741
hui su4f16f112015-10-02 10:45:27 -0700742 mbmi->tx_type = best_tx_type;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700743#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700744
Jingning Han71c15602015-10-13 12:40:39 -0700745 txfm_rd_in_plane(x,
746#if CONFIG_VAR_TX
747 cpi,
748#endif
749 rate, distortion, skip,
Jingning Han3ee6db62015-08-05 19:00:31 -0700750 sse, ref_best_rd, 0, bs,
751 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700752
753#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700754 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700755 !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700756 int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
757 if (is_inter)
758 *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
759 [mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700760 else
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700761 *rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
762 [mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700763 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700764#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700765}
766
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400767static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
768 int *rate, int64_t *distortion,
769 int *skip, int64_t *sse,
770 int64_t ref_best_rd,
771 BLOCK_SIZE bs) {
772 MACROBLOCKD *const xd = &x->e_mbd;
773 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
774
775 mbmi->tx_size = TX_4X4;
776
Jingning Han71c15602015-10-13 12:40:39 -0700777 txfm_rd_in_plane(x,
778#if CONFIG_VAR_TX
779 cpi,
780#endif
781 rate, distortion, skip,
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400782 sse, ref_best_rd, 0, bs,
783 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
784}
785
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700786static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700787 int *rate,
788 int64_t *distortion,
789 int *skip,
790 int64_t *psse,
791 int64_t ref_best_rd,
792 BLOCK_SIZE bs) {
793 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700794 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700795 MACROBLOCKD *const xd = &x->e_mbd;
796 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
797 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
hui su38debe52015-09-20 19:18:00 -0700798 int r, s;
799 int64_t d, sse;
800 int64_t rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700801 int n, m;
802 int s0, s1;
hui su38debe52015-09-20 19:18:00 -0700803 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700804 TX_SIZE best_tx = max_tx_size;
805 int start_tx, end_tx;
hui su38debe52015-09-20 19:18:00 -0700806 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
hui su07154b02015-09-22 10:34:18 -0700807#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700808 TX_TYPE tx_type, best_tx_type = DCT_DCT;
809 int ext_tx_set;
hui su07154b02015-09-22 10:34:18 -0700810#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700811 const int is_inter = is_inter_block(mbmi);
hui su07154b02015-09-22 10:34:18 -0700812
Jingning Han3ee6db62015-08-05 19:00:31 -0700813 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
814 assert(skip_prob > 0);
815 s0 = vp10_cost_bit(skip_prob, 0);
816 s1 = vp10_cost_bit(skip_prob, 1);
817
hui su38debe52015-09-20 19:18:00 -0700818 if (tx_select) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700819 start_tx = max_tx_size;
820 end_tx = 0;
821 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700822 const TX_SIZE chosen_tx_size =
823 VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700824 start_tx = chosen_tx_size;
825 end_tx = chosen_tx_size;
826 }
827
hui su38debe52015-09-20 19:18:00 -0700828 *distortion = INT64_MAX;
829 *rate = INT_MAX;
830 *skip = 0;
831 *psse = INT64_MAX;
832
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700833#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700834 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700835#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700836 last_rd = INT64_MAX;
hui su07154b02015-09-22 10:34:18 -0700837 for (n = start_tx; n >= end_tx; --n) {
838 int r_tx_size = 0;
hui su07154b02015-09-22 10:34:18 -0700839 for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
840 if (m == n)
841 r_tx_size += vp10_cost_zero(tx_probs[m]);
842 else
843 r_tx_size += vp10_cost_one(tx_probs[m]);
Shunyao Liaa006d72015-08-19 12:04:56 -0700844 }
hui su07154b02015-09-22 10:34:18 -0700845
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700846#if CONFIG_EXT_TX
847 ext_tx_set = get_ext_tx_set(n, bs, is_inter);
848 if (is_inter) {
849 if (!ext_tx_used_inter[ext_tx_set][tx_type])
850 continue;
851 } else {
852 if (!ext_tx_used_intra[ext_tx_set][tx_type])
853 continue;
854 }
855 mbmi->tx_type = tx_type;
856 if (ext_tx_set == 1 &&
857 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
858 best_tx_type == DCT_DCT) {
859 tx_type = IDTX - 1;
860 break;
861 }
Jingning Han71c15602015-10-13 12:40:39 -0700862 txfm_rd_in_plane(x,
863#if CONFIG_VAR_TX
864 cpi,
865#endif
866 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700867 &sse, ref_best_rd, 0, bs, n,
868 cpi->sf.use_fast_coef_costing);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700869 if (get_ext_tx_types(n, bs, is_inter) > 1 &&
870 !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
871 r != INT_MAX) {
872 if (is_inter) {
873 if (ext_tx_set > 0)
874 r += cpi->inter_tx_type_costs[ext_tx_set]
875 [mbmi->tx_size][mbmi->tx_type];
876 } else {
877 if (ext_tx_set > 0)
878 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
879 [mbmi->mode][mbmi->tx_type];
880 }
hui su3fa01292015-09-28 18:38:00 -0700881 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700882#else // CONFIG_EXT_TX
883 txfm_rd_in_plane(x,
884#if CONFIG_VAR_TX
885 cpi,
886#endif
887 &r, &d, &s,
888 &sse, ref_best_rd, 0, bs, n,
889 cpi->sf.use_fast_coef_costing);
hui su07154b02015-09-22 10:34:18 -0700890#endif // CONFIG_EXT_TX
891
892 if (r == INT_MAX)
893 continue;
894
hui su07154b02015-09-22 10:34:18 -0700895 if (s) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700896 if (is_inter) {
hui su07154b02015-09-22 10:34:18 -0700897 rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
hui su07154b02015-09-22 10:34:18 -0700898 } else {
899 rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
900 }
901 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700902 rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
hui su07154b02015-09-22 10:34:18 -0700903 }
904
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700905 if (tx_select && !(s && is_inter))
906 r += r_tx_size;
907
908 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
hui su07154b02015-09-22 10:34:18 -0700909 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
910
911 // Early termination in transform size search.
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700912 if (cpi->sf.tx_size_search_breakout &&
913 (rd == INT64_MAX ||
914#if CONFIG_EXT_TX
915 (s == 1 && tx_type != DCT_DCT) ||
916#else
917 (s == 1) ||
918#endif
919 (n < (int) max_tx_size && rd > last_rd)))
hui su07154b02015-09-22 10:34:18 -0700920 break;
921
922 last_rd = rd;
hui su4f16f112015-10-02 10:45:27 -0700923 if (rd <
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700924#if CONFIG_EXT_TX
925 (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
hui su07154b02015-09-22 10:34:18 -0700926#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700927 best_rd) {
hui su07154b02015-09-22 10:34:18 -0700928 best_tx = n;
929 best_rd = rd;
930 *distortion = d;
931 *rate = r;
932 *skip = s;
933 *psse = sse;
934#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700935 best_tx_type = mbmi->tx_type;
hui su07154b02015-09-22 10:34:18 -0700936#endif // CONFIG_EXT_TX
937 }
Jingning Han3ee6db62015-08-05 19:00:31 -0700938 }
hui su07154b02015-09-22 10:34:18 -0700939#if CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700940 }
hui su07154b02015-09-22 10:34:18 -0700941#endif // CONFIG_EXT_TX
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700942
Jingning Han3ee6db62015-08-05 19:00:31 -0700943 mbmi->tx_size = best_tx;
hui su07154b02015-09-22 10:34:18 -0700944#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700945 mbmi->tx_type = best_tx_type;
Jingning Han71c15602015-10-13 12:40:39 -0700946 txfm_rd_in_plane(x,
947#if CONFIG_VAR_TX
948 cpi,
949#endif
950 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700951 &sse, ref_best_rd, 0, bs, best_tx,
952 cpi->sf.use_fast_coef_costing);
953#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700954}
955
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700956static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
Jingning Han3ee6db62015-08-05 19:00:31 -0700957 int64_t *distortion, int *skip,
958 int64_t *psse, BLOCK_SIZE bs,
959 int64_t ref_best_rd) {
960 MACROBLOCKD *xd = &x->e_mbd;
961 int64_t sse;
962 int64_t *ret_sse = psse ? psse : &sse;
963
964 assert(bs == xd->mi[0]->mbmi.sb_type);
965
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400966 if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
967 choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
968 ref_best_rd, bs);
969 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
970 xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700971 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
972 bs);
973 } else {
974 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
975 ref_best_rd, bs);
976 }
977}
978
979static int conditional_skipintra(PREDICTION_MODE mode,
980 PREDICTION_MODE best_intra_mode) {
981 if (mode == D117_PRED &&
982 best_intra_mode != V_PRED &&
983 best_intra_mode != D135_PRED)
984 return 1;
985 if (mode == D63_PRED &&
986 best_intra_mode != V_PRED &&
987 best_intra_mode != D45_PRED)
988 return 1;
989 if (mode == D207_PRED &&
990 best_intra_mode != H_PRED &&
991 best_intra_mode != D45_PRED)
992 return 1;
993 if (mode == D153_PRED &&
994 best_intra_mode != H_PRED &&
995 best_intra_mode != D135_PRED)
996 return 1;
997 return 0;
998}
999
hui su5d011cb2015-09-15 12:44:13 -07001000void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1001 int palette_ctx, int dc_mode_cost,
1002 PALETTE_MODE_INFO *palette_mode_info,
1003 uint8_t *best_palette_color_map,
1004 TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
1005 int64_t *best_rd) {
1006 MACROBLOCKD *const xd = &x->e_mbd;
1007 MODE_INFO *const mic = xd->mi[0];
1008 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1009 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1010 int this_rate, this_rate_tokenonly, s;
1011 int64_t this_distortion, this_rd;
1012 int colors, n;
1013 int src_stride = x->plane[0].src.stride;
1014 uint8_t *src = x->plane[0].src.buf;
1015
1016#if CONFIG_VP9_HIGHBITDEPTH
1017 if (cpi->common.use_highbitdepth)
1018 colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
1019 cpi->common.bit_depth);
1020 else
1021#endif // CONFIG_VP9_HIGHBITDEPTH
1022 colors = vp10_count_colors(src, src_stride, rows, cols);
1023 palette_mode_info->palette_size[0] = 0;
1024
1025 if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
1026 int r, c, i, j, k;
1027 int max_itr = 50;
1028 int color_ctx, color_idx = 0;
1029 int color_order[PALETTE_MAX_SIZE];
1030 double *data = x->palette_buffer->kmeans_data_buf;
1031 uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
1032 uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
1033 double centroids[PALETTE_MAX_SIZE];
1034 uint8_t *color_map;
1035 double lb, ub, val;
1036 PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
1037#if CONFIG_VP9_HIGHBITDEPTH
1038 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1039 if (cpi->common.use_highbitdepth)
1040 lb = ub = src16[0];
1041 else
1042#endif // CONFIG_VP9_HIGHBITDEPTH
1043 lb = ub = src[0];
1044
1045#if CONFIG_VP9_HIGHBITDEPTH
1046 if (cpi->common.use_highbitdepth) {
1047 for (r = 0; r < rows; ++r) {
1048 for (c = 0; c < cols; ++c) {
1049 val = src16[r * src_stride + c];
1050 data[r * cols + c] = val;
1051 if (val < lb)
1052 lb = val;
1053 else if (val > ub)
1054 ub = val;
1055 }
1056 }
1057 } else {
1058#endif // CONFIG_VP9_HIGHBITDEPTH
1059 for (r = 0; r < rows; ++r) {
1060 for (c = 0; c < cols; ++c) {
1061 val = src[r * src_stride + c];
1062 data[r * cols + c] = val;
1063 if (val < lb)
1064 lb = val;
1065 else if (val > ub)
1066 ub = val;
1067 }
1068 }
1069#if CONFIG_VP9_HIGHBITDEPTH
1070 }
1071#endif // CONFIG_VP9_HIGHBITDEPTH
1072
1073 mic->mbmi.mode = DC_PRED;
1074
1075 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
1076 n >= 2; --n) {
1077 for (i = 0; i < n; ++i)
1078 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
1079 vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
1080 n, 1, max_itr);
1081 vp10_insertion_sort(centroids, n);
hui su17c817a2015-10-15 18:04:50 -07001082 for (i = 0; i < n; ++i)
1083 centroids[i] = round(centroids[i]);
hui su5d011cb2015-09-15 12:44:13 -07001084 // remove duplicates
1085 i = 1;
1086 k = n;
1087 while (i < k) {
1088 if (centroids[i] == centroids[i - 1]) {
1089 j = i;
1090 while (j < k - 1) {
1091 centroids[j] = centroids[j + 1];
1092 ++j;
1093 }
1094 --k;
1095 } else {
1096 ++i;
1097 }
1098 }
1099
1100#if CONFIG_VP9_HIGHBITDEPTH
1101 if (cpi->common.use_highbitdepth)
1102 for (i = 0; i < k; ++i)
1103 mic->mbmi.palette_mode_info.palette_colors[i] =
1104 clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth);
1105 else
1106#endif // CONFIG_VP9_HIGHBITDEPTH
1107 for (i = 0; i < k; ++i)
Yaowu Xu8ced62f2015-10-14 08:10:05 -07001108 pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
hui su5d011cb2015-09-15 12:44:13 -07001109 pmi->palette_size[0] = k;
1110
1111 vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
1112 for (r = 0; r < rows; ++r)
1113 for (c = 0; c < cols; ++c)
1114 xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
1115
1116 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1117 &s, NULL, bsize, *best_rd);
1118 if (this_rate_tokenonly == INT_MAX)
1119 continue;
1120
1121 this_rate = this_rate_tokenonly + dc_mode_cost +
1122 cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
1123 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
1124 this_rate +=
1125 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1126 [palette_ctx], 1);
1127 color_map = xd->plane[0].color_index_map;
1128 this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
1129 for (i = 0; i < rows; ++i) {
1130 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
1131 color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
1132 k, color_order);
1133 for (r = 0; r < k; ++r)
1134 if (color_map[i * cols + j] == color_order[r]) {
1135 color_idx = r;
1136 break;
1137 }
1138 assert(color_idx < k);
1139 this_rate +=
1140 cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
1141 }
1142 }
1143 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1144
1145 if (this_rd < *best_rd) {
1146 *best_rd = this_rd;
1147 *palette_mode_info = mic->mbmi.palette_mode_info;
1148 memcpy(best_palette_color_map, xd->plane[0].color_index_map,
1149 rows * cols * sizeof(xd->plane[0].color_index_map[0]));
1150 *mode_selected = DC_PRED;
1151 *best_tx = mic->mbmi.tx_size;
1152 }
1153 }
1154 }
1155}
1156
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001157static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001158 int row, int col,
1159 PREDICTION_MODE *best_mode,
1160 const int *bmode_costs,
1161 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1162 int *bestrate, int *bestratey,
1163 int64_t *bestdistortion,
1164 BLOCK_SIZE bsize, int64_t rd_thresh) {
1165 PREDICTION_MODE mode;
1166 MACROBLOCKD *const xd = &x->e_mbd;
1167 int64_t best_rd = rd_thresh;
1168 struct macroblock_plane *p = &x->plane[0];
1169 struct macroblockd_plane *pd = &xd->plane[0];
1170 const int src_stride = p->src.stride;
1171 const int dst_stride = pd->dst.stride;
1172 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
1173 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
1174 ENTROPY_CONTEXT ta[2], tempa[2];
1175 ENTROPY_CONTEXT tl[2], templ[2];
1176 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1177 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1178 int idx, idy;
1179 uint8_t best_dst[8 * 8];
1180#if CONFIG_VP9_HIGHBITDEPTH
1181 uint16_t best_dst16[8 * 8];
1182#endif
1183
1184 memcpy(ta, a, sizeof(ta));
1185 memcpy(tl, l, sizeof(tl));
1186 xd->mi[0]->mbmi.tx_size = TX_4X4;
hui su5d011cb2015-09-15 12:44:13 -07001187 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001188
1189#if CONFIG_VP9_HIGHBITDEPTH
1190 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1191 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1192 int64_t this_rd;
1193 int ratey = 0;
1194 int64_t distortion = 0;
1195 int rate = bmode_costs[mode];
1196
1197 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1198 continue;
1199
1200 // Only do the oblique modes if the best so far is
1201 // one of the neighboring directional modes
1202 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1203 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001204 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001205 }
1206
1207 memcpy(tempa, ta, sizeof(ta));
1208 memcpy(templ, tl, sizeof(tl));
1209
1210 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1211 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1212 const int block = (row + idy) * 2 + (col + idx);
1213 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1214 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1215 int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001216 block,
1217 p->src_diff);
Jingning Han3ee6db62015-08-05 19:00:31 -07001218 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1219 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001220 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001221 dst, dst_stride,
1222 col + idx, row + idy, 0);
1223 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
1224 dst, dst_stride, xd->bd);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001225 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001226 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001227 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001228#if CONFIG_VAR_TX
1229 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1230 *(templ + idy));
1231#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001232 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001233 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001234 ratey += cost_coeffs(x, 0, block,
1235#if CONFIG_VAR_TX
1236 coeff_ctx,
1237#else
1238 tempa + idx, templ + idy,
1239#endif
1240 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001241 so->scan, so->neighbors,
1242 cpi->sf.use_fast_coef_costing);
1243 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1244 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001245 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1246 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001247 xd->bd, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001248 } else {
1249 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001250 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001251 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001252#if CONFIG_VAR_TX
1253 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1254 *(templ + idy));
1255#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001256 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001257 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001258 ratey += cost_coeffs(x, 0, block,
1259#if CONFIG_VAR_TX
1260 coeff_ctx,
1261#else
1262 tempa + idx, templ + idy,
1263#endif
1264 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001265 so->scan, so->neighbors,
1266 cpi->sf.use_fast_coef_costing);
1267 distortion += vp10_highbd_block_error(
1268 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1269 16, &unused, xd->bd) >> 2;
1270 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1271 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001272 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1273 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001274 xd->bd, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001275 }
1276 }
1277 }
1278
1279 rate += ratey;
1280 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1281
1282 if (this_rd < best_rd) {
1283 *bestrate = rate;
1284 *bestratey = ratey;
1285 *bestdistortion = distortion;
1286 best_rd = this_rd;
1287 *best_mode = mode;
1288 memcpy(a, tempa, sizeof(tempa));
1289 memcpy(l, templ, sizeof(templ));
1290 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1291 memcpy(best_dst16 + idy * 8,
1292 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1293 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1294 }
1295 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001296next_highbd:
Jingning Han3ee6db62015-08-05 19:00:31 -07001297 {}
1298 }
Jingning Han481b8342015-09-11 08:56:06 -07001299 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001300 return best_rd;
1301
1302 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1303 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1304 best_dst16 + idy * 8,
1305 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1306 }
1307
1308 return best_rd;
1309 }
1310#endif // CONFIG_VP9_HIGHBITDEPTH
1311
1312 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1313 int64_t this_rd;
1314 int ratey = 0;
1315 int64_t distortion = 0;
1316 int rate = bmode_costs[mode];
1317
1318 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1319 continue;
1320
1321 // Only do the oblique modes if the best so far is
1322 // one of the neighboring directional modes
1323 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1324 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001325 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001326 }
1327
1328 memcpy(tempa, ta, sizeof(ta));
1329 memcpy(templ, tl, sizeof(tl));
1330
1331 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1332 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1333 const int block = (row + idy) * 2 + (col + idx);
1334 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1335 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1336 int16_t *const src_diff =
1337 vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1338 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1339 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001340 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001341 dst, dst_stride, col + idx, row + idy, 0);
1342 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1343
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001344 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001345 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001346 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001347#if CONFIG_VAR_TX
1348 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1349 *(templ + idy));
1350#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001351 vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001352 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001353#if CONFIG_VAR_TX
1354 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1355 so->neighbors, cpi->sf.use_fast_coef_costing);
1356 *(tempa + idx) = !(p->eobs[block] == 0);
1357 *(templ + idy) = !(p->eobs[block] == 0);
1358#else
1359 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1360 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001361 so->scan, so->neighbors,
1362 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07001363#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001364 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1365 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001366 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001367 dst, dst_stride, p->eobs[block], DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001368 } else {
1369 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001370 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001371 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001372#if CONFIG_VAR_TX
1373 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1374 *(templ + idy));
1375#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001376 vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001377 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001378#if CONFIG_VAR_TX
1379 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1380 so->neighbors, cpi->sf.use_fast_coef_costing);
1381 *(tempa + idx) = !(p->eobs[block] == 0);
1382 *(templ + idy) = !(p->eobs[block] == 0);
1383#else
1384 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1385 TX_4X4, so->scan, so->neighbors,
1386 cpi->sf.use_fast_coef_costing);
1387#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001388 distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1389 16, &unused) >> 2;
1390 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1391 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001392 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001393 dst, dst_stride, p->eobs[block], tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001394 }
1395 }
1396 }
1397
1398 rate += ratey;
1399 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1400
1401 if (this_rd < best_rd) {
1402 *bestrate = rate;
1403 *bestratey = ratey;
1404 *bestdistortion = distortion;
1405 best_rd = this_rd;
1406 *best_mode = mode;
1407 memcpy(a, tempa, sizeof(tempa));
1408 memcpy(l, templ, sizeof(templ));
1409 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1410 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1411 num_4x4_blocks_wide * 4);
1412 }
1413 next:
1414 {}
1415 }
1416
Jingning Hanf1376972015-09-10 12:42:21 -07001417 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001418 return best_rd;
1419
1420 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1421 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1422 num_4x4_blocks_wide * 4);
1423
1424 return best_rd;
1425}
1426
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001427static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
Jingning Han3ee6db62015-08-05 19:00:31 -07001428 int *rate, int *rate_y,
1429 int64_t *distortion,
1430 int64_t best_rd) {
1431 int i, j;
1432 const MACROBLOCKD *const xd = &mb->e_mbd;
1433 MODE_INFO *const mic = xd->mi[0];
1434 const MODE_INFO *above_mi = xd->above_mi;
1435 const MODE_INFO *left_mi = xd->left_mi;
1436 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1437 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1438 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1439 int idx, idy;
1440 int cost = 0;
1441 int64_t total_distortion = 0;
1442 int tot_rate_y = 0;
1443 int64_t total_rd = 0;
1444 ENTROPY_CONTEXT t_above[4], t_left[4];
1445 const int *bmode_costs = cpi->mbmode_cost;
1446
1447 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1448 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1449
hui sube3559b2015-10-07 09:29:02 -07001450#if CONFIG_EXT_INTRA
1451 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
1452#endif // CONFIG_EXT_INTRA
1453
Jingning Han3ee6db62015-08-05 19:00:31 -07001454 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1455 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1456 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1457 PREDICTION_MODE best_mode = DC_PRED;
1458 int r = INT_MAX, ry = INT_MAX;
1459 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1460 i = idy * 2 + idx;
1461 if (cpi->common.frame_type == KEY_FRAME) {
1462 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
1463 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
1464
1465 bmode_costs = cpi->y_mode_costs[A][L];
1466 }
1467
1468 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
1469 bmode_costs, t_above + idx, t_left + idy,
1470 &r, &ry, &d, bsize, best_rd - total_rd);
1471 if (this_rd >= best_rd - total_rd)
1472 return INT64_MAX;
1473
1474 total_rd += this_rd;
1475 cost += r;
1476 total_distortion += d;
1477 tot_rate_y += ry;
1478
1479 mic->bmi[i].as_mode = best_mode;
1480 for (j = 1; j < num_4x4_blocks_high; ++j)
1481 mic->bmi[i + j * 2].as_mode = best_mode;
1482 for (j = 1; j < num_4x4_blocks_wide; ++j)
1483 mic->bmi[i + j].as_mode = best_mode;
1484
1485 if (total_rd >= best_rd)
1486 return INT64_MAX;
1487 }
1488 }
1489
1490 *rate = cost;
1491 *rate_y = tot_rate_y;
1492 *distortion = total_distortion;
1493 mic->mbmi.mode = mic->bmi[3].as_mode;
1494
1495 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1496}
1497
hui sube3559b2015-10-07 09:29:02 -07001498#if CONFIG_EXT_INTRA
1499// Return 1 if an ext intra mode is selected; return 0 otherwise.
1500static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x,
1501 int *rate, int *rate_tokenonly,
1502 int64_t *distortion, int *skippable,
1503 BLOCK_SIZE bsize, int mode_cost,
1504 int64_t *best_rd) {
1505 MACROBLOCKD *const xd = &x->e_mbd;
1506 MODE_INFO *const mic = xd->mi[0];
1507 MB_MODE_INFO *mbmi = &mic->mbmi;
1508 int this_rate, this_rate_tokenonly, s;
1509 int ext_intra_selected_flag = 0;
1510 int i, step, delta, angle, best_angle, best_angle_dir;
1511 int deltas[3] = {25, 5, 1};
1512 int branches[3] = {2, 2, 2};
1513 int64_t this_distortion, this_rd, best_angle_rd = INT64_MAX;
1514 EXT_INTRA_MODE mode;
1515 TX_SIZE best_tx_size = TX_4X4;
1516 EXT_INTRA_MODE_INFO ext_intra_mode_info;
1517#if CONFIG_EXT_TX
1518 TX_TYPE best_tx_type;
1519#endif // CONFIG_EXT_TX
1520
1521 vp10_zero(ext_intra_mode_info);
1522 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
1523 mbmi->mode = DC_PRED;
1524
1525 if (!DR_ONLY) {
1526 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
1527 mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
1528 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1529 &s, NULL, bsize, *best_rd);
1530 if (this_rate_tokenonly == INT_MAX)
1531 continue;
1532
1533 this_rate = this_rate_tokenonly +
1534 vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
1535 vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
1536 write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
1537 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1538
1539 if (this_rd < *best_rd) {
1540 *best_rd = this_rd;
1541 best_tx_size = mic->mbmi.tx_size;
1542 ext_intra_mode_info = mbmi->ext_intra_mode_info;
1543#if CONFIG_EXT_TX
1544 best_tx_type = mic->mbmi.tx_type;
1545#endif // CONFIG_EXT_TX
1546 *rate = this_rate;
1547 *rate_tokenonly = this_rate_tokenonly;
1548 *distortion = this_distortion;
1549 *skippable = s;
1550 ext_intra_selected_flag = 1;
1551 }
1552 }
1553 }
1554
1555 mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
1556 if (ANGLE_FAST_SEARCH) {
1557 best_angle = EXT_INTRA_ANGLES / 2;
1558 for (step = 0; step < 3; ++step) {
1559 delta = deltas[step];
1560 for (i = -branches[step]; i <= branches[step]; ++i) {
1561 int64_t rd_thresh;
1562 if (i == 0 && step != 0)
1563 continue;
1564 angle = best_angle + i * delta;
1565 if (angle < 0)
1566 angle = 0;
1567 if (angle >= EXT_INTRA_ANGLES)
1568 angle = EXT_INTRA_ANGLES - 1;
1569 if (angle == best_angle && step != 0)
1570 continue;
1571 mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
1572 if (*best_rd == INT64_MAX)
1573 rd_thresh = best_angle_rd;
1574 else
1575 rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
1576 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1577 &s, NULL, bsize, rd_thresh);
1578 if (this_rate_tokenonly == INT_MAX)
1579 continue;
1580 this_rate = this_rate_tokenonly +
1581 vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
1582 (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
1583 write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
1584 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1585 if (this_rd < *best_rd) {
1586 *best_rd = this_rd;
1587 best_tx_size = mic->mbmi.tx_size;
1588 ext_intra_mode_info = mbmi->ext_intra_mode_info;
1589#if CONFIG_EXT_TX
1590 best_tx_type = mic->mbmi.tx_type;
1591#endif // CONFIG_EXT_TX
1592 *rate = this_rate;
1593 *rate_tokenonly = this_rate_tokenonly;
1594 *distortion = this_distortion;
1595 *skippable = s;
1596 ext_intra_selected_flag = 1;
1597 }
1598 if (this_rd < best_angle_rd) {
1599 best_angle_rd = this_rd;
1600 best_angle_dir = i;
1601 }
1602 }
1603
1604 best_angle += best_angle_dir * delta;
1605 if (best_angle < 0)
1606 best_angle = 0;
1607 if (best_angle >= EXT_INTRA_ANGLES)
1608 best_angle = EXT_INTRA_ANGLES - 1;
1609 if (*best_rd < best_angle_rd / RD_ADJUSTER)
1610 break;
1611 }
1612 } else {
1613 for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
1614 mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
1615 if (prediction_angle_map(angle) == 90 ||
1616 prediction_angle_map(angle) == 180)
1617 continue;
1618 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1619 &s, NULL, bsize, *best_rd);
1620 if (this_rate_tokenonly == INT_MAX)
1621 continue;
1622
1623 this_rate = this_rate_tokenonly +
1624 vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
1625 (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
1626 write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
1627 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1628
1629 if (this_rd < *best_rd) {
1630 *best_rd = this_rd;
1631 best_tx_size = mic->mbmi.tx_size;
1632 ext_intra_mode_info = mbmi->ext_intra_mode_info;
1633#if CONFIG_EXT_TX
1634 best_tx_type = mic->mbmi.tx_type;
1635#endif // CONFIG_EXT_TX
1636 *rate = this_rate;
1637 *rate_tokenonly = this_rate_tokenonly;
1638 *distortion = this_distortion;
1639 *skippable = s;
1640 ext_intra_selected_flag = 1;
1641 }
1642 }
1643 }
1644
1645 if (ext_intra_selected_flag) {
1646 mbmi->mode = DC_PRED;
1647 mbmi->tx_size = best_tx_size;
1648 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
1649 ext_intra_mode_info.use_ext_intra_mode[0];
1650 mbmi->ext_intra_mode_info.ext_intra_mode[0] =
1651 ext_intra_mode_info.ext_intra_mode[0];
1652 mbmi->ext_intra_mode_info.ext_intra_angle[0] =
1653 ext_intra_mode_info.ext_intra_angle[0];
1654#if CONFIG_EXT_TX
1655 mbmi->tx_type = best_tx_type;
1656#endif // CONFIG_EXT_TX
1657 return 1;
1658 } else {
1659 return 0;
1660 }
1661}
1662#endif // CONFIG_EXT_INTRA
1663
Jingning Han3ee6db62015-08-05 19:00:31 -07001664// This function is used only for intra_only frames
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001665static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001666 int *rate, int *rate_tokenonly,
1667 int64_t *distortion, int *skippable,
1668 BLOCK_SIZE bsize,
1669 int64_t best_rd) {
1670 PREDICTION_MODE mode;
1671 PREDICTION_MODE mode_selected = DC_PRED;
1672 MACROBLOCKD *const xd = &x->e_mbd;
1673 MODE_INFO *const mic = xd->mi[0];
1674 int this_rate, this_rate_tokenonly, s;
1675 int64_t this_distortion, this_rd;
1676 TX_SIZE best_tx = TX_4X4;
hui sube3559b2015-10-07 09:29:02 -07001677#if CONFIG_EXT_INTRA
1678 EXT_INTRA_MODE_INFO ext_intra_mode_info;
1679#endif // CONFIG_EXT_INTRA
hui su3fa01292015-09-28 18:38:00 -07001680#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001681 TX_TYPE best_tx_type = DCT_DCT;
hui su3fa01292015-09-28 18:38:00 -07001682#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001683 int *bmode_costs;
hui su5d011cb2015-09-15 12:44:13 -07001684 PALETTE_MODE_INFO palette_mode_info;
hui suaaf6f622015-10-14 20:18:18 -07001685 uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
1686 x->palette_buffer->best_palette_color_map : NULL;
hui su5d011cb2015-09-15 12:44:13 -07001687 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1688 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1689 int palette_ctx = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001690 const MODE_INFO *above_mi = xd->above_mi;
1691 const MODE_INFO *left_mi = xd->left_mi;
1692 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
1693 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
1694 bmode_costs = cpi->y_mode_costs[A][L];
1695
hui sube3559b2015-10-07 09:29:02 -07001696#if CONFIG_EXT_INTRA
1697 ext_intra_mode_info.use_ext_intra_mode[0] = 0;
1698 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
1699#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07001700 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07001701 palette_mode_info.palette_size[0] = 0;
1702 mic->mbmi.palette_mode_info.palette_size[0] = 0;
1703 if (above_mi)
1704 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1705 if (left_mi)
1706 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1707
Jingning Han3ee6db62015-08-05 19:00:31 -07001708 /* Y Search for intra prediction mode */
hui sube3559b2015-10-07 09:29:02 -07001709 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
Jingning Han3ee6db62015-08-05 19:00:31 -07001710 mic->mbmi.mode = mode;
1711
1712 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1713 &s, NULL, bsize, best_rd);
1714
1715 if (this_rate_tokenonly == INT_MAX)
1716 continue;
1717
1718 this_rate = this_rate_tokenonly + bmode_costs[mode];
hui su5d011cb2015-09-15 12:44:13 -07001719 if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
1720 this_rate +=
1721 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1722 [palette_ctx], 0);
hui sube3559b2015-10-07 09:29:02 -07001723#if CONFIG_EXT_INTRA
1724 if (mode == DC_PRED)
1725 this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
1726#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07001727 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1728
1729 if (this_rd < best_rd) {
1730 mode_selected = mode;
1731 best_rd = this_rd;
1732 best_tx = mic->mbmi.tx_size;
hui su3fa01292015-09-28 18:38:00 -07001733#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001734 best_tx_type = mic->mbmi.tx_type;
hui su3fa01292015-09-28 18:38:00 -07001735#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001736 *rate = this_rate;
1737 *rate_tokenonly = this_rate_tokenonly;
1738 *distortion = this_distortion;
1739 *skippable = s;
1740 }
1741 }
1742
hui su5d011cb2015-09-15 12:44:13 -07001743 if (cpi->common.allow_screen_content_tools)
1744 rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
1745 &palette_mode_info, best_palette_color_map,
1746 &best_tx, &mode_selected, &best_rd);
1747
hui sube3559b2015-10-07 09:29:02 -07001748#if CONFIG_EXT_INTRA
1749 if (!palette_mode_info.palette_size[0] > 0) {
1750 if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
1751 skippable, bsize, bmode_costs[DC_PRED],
1752 &best_rd)) {
1753 mode_selected = mic->mbmi.mode;
1754 best_tx = mic->mbmi.tx_size;
1755 ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
1756#if CONFIG_EXT_TX
1757 best_tx_type = mic->mbmi.tx_type;
1758#endif // CONFIG_EXT_TX
1759 }
1760 }
1761
1762 mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
1763 ext_intra_mode_info.use_ext_intra_mode[0];
1764 if (ext_intra_mode_info.use_ext_intra_mode[0]) {
1765 mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
1766 ext_intra_mode_info.ext_intra_mode[0];
1767 mic->mbmi.ext_intra_mode_info.ext_intra_angle[0] =
1768 ext_intra_mode_info.ext_intra_angle[0];
1769 }
1770#endif // CONFIG_EXT_INTRA
1771
Jingning Han3ee6db62015-08-05 19:00:31 -07001772 mic->mbmi.mode = mode_selected;
1773 mic->mbmi.tx_size = best_tx;
hui su3fa01292015-09-28 18:38:00 -07001774#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001775 mic->mbmi.tx_type = best_tx_type;
hui su3fa01292015-09-28 18:38:00 -07001776#endif // CONFIG_EXT_TX
hui su5d011cb2015-09-15 12:44:13 -07001777 mic->mbmi.palette_mode_info.palette_size[0] =
1778 palette_mode_info.palette_size[0];
1779 if (palette_mode_info.palette_size[0] > 0) {
1780 memcpy(mic->mbmi.palette_mode_info.palette_colors,
1781 palette_mode_info.palette_colors,
1782 PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
1783 memcpy(xd->plane[0].color_index_map, best_palette_color_map,
1784 rows * cols * sizeof(best_palette_color_map[0]));
1785 }
Jingning Han3ee6db62015-08-05 19:00:31 -07001786
1787 return best_rd;
1788}
1789
Jingning Hana8dad552015-10-08 16:46:10 -07001790#if CONFIG_VAR_TX
Jingning Han71c15602015-10-13 12:40:39 -07001791static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
Jingning Han2cdc1272015-10-09 09:57:42 -07001792 int blk_row, int blk_col, int plane, int block,
1793 int plane_bsize, int coeff_ctx,
1794 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1795 MACROBLOCKD *xd = &x->e_mbd;
1796 const struct macroblock_plane *const p = &x->plane[plane];
1797 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han71c15602015-10-13 12:40:39 -07001798#if CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001799 const int ss_txfrm_size = tx_size << 1;
1800 int64_t this_sse;
1801 int shift = tx_size == TX_32X32 ? 0 : 2;
1802 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han71c15602015-10-13 12:40:39 -07001803#endif
1804 unsigned int tmp_sse = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001805 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1806 PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
1807 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1808 const scan_order *const scan_order =
1809 get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
1810
Jingning Han71c15602015-10-13 12:40:39 -07001811 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
1812 int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
1813 int src_stride = p->src.stride;
1814 uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
1815 uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
1816 DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
1817
1818 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1819 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1820
1821 if (xd->mb_to_bottom_edge < 0)
1822 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1823 if (xd->mb_to_right_edge < 0)
1824 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1825
Jingning Han2cdc1272015-10-09 09:57:42 -07001826 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
1827
Jingning Han71c15602015-10-13 12:40:39 -07001828 vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
1829 NULL, 0, NULL, 0, bh, bh);
1830
1831 if (blk_row + (bh >> 2) > max_blocks_high ||
1832 blk_col + (bh >> 2) > max_blocks_wide) {
1833 int idx, idy;
1834 unsigned int this_sse;
1835 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1836 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1837 for (idy = 0; idy < blocks_height; idy += 2) {
1838 for (idx = 0; idx < blocks_width; idx += 2) {
1839 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1840 src_stride,
1841 rec_buffer + 4 * idy * 32 + 4 * idx,
1842 32, &this_sse);
1843 tmp_sse += this_sse;
1844 }
1845 }
1846 } else {
1847 cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
1848 }
1849
Jingning Han2cdc1272015-10-09 09:57:42 -07001850#if CONFIG_VP9_HIGHBITDEPTH
1851 *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
1852 &this_sse, xd->bd) >> shift;
Jingning Han2cdc1272015-10-09 09:57:42 -07001853 *bsse += this_sse >> shift;
Jingning Han71c15602015-10-13 12:40:39 -07001854#else
1855 *bsse += (int64_t)tmp_sse * 16;
1856
1857 if (p->eobs[block] > 0) {
Jingning Han71c15602015-10-13 12:40:39 -07001858 switch (tx_size) {
1859 case TX_32X32:
1860 vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
1861 tx_type);
1862 break;
1863 case TX_16X16:
1864 vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
1865 tx_type);
1866 break;
1867 case TX_8X8:
1868 vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
1869 tx_type);
1870 break;
1871 case TX_4X4:
1872 vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
1873 tx_type,
1874 xd->lossless[xd->mi[0]->mbmi.segment_id]);
1875 break;
1876 default:
1877 assert(0 && "Invalid transform size");
1878 break;
1879 }
1880
1881 if ((bh >> 2) + blk_col > max_blocks_wide ||
1882 (bh >> 2) + blk_row > max_blocks_high) {
1883 int idx, idy;
1884 unsigned int this_sse;
1885 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1886 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1887 tmp_sse = 0;
1888 for (idy = 0; idy < blocks_height; idy += 2) {
1889 for (idx = 0; idx < blocks_width; idx += 2) {
1890 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1891 src_stride,
1892 rec_buffer + 4 * idy * 32 + 4 * idx,
1893 32, &this_sse);
1894 tmp_sse += this_sse;
1895 }
1896 }
1897 } else {
1898 cpi->fn_ptr[txm_bsize].vf(src, src_stride,
1899 rec_buffer, 32, &tmp_sse);
1900 }
1901 }
1902 *dist += (int64_t)tmp_sse * 16;
1903#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001904
1905 *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
1906 scan_order->scan, scan_order->neighbors, 0);
1907 *skip &= (p->eobs[block] == 0);
1908}
1909
1910static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
1911 int blk_row, int blk_col, int plane, int block,
1912 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
Jingning Han2cdc1272015-10-09 09:57:42 -07001913 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
Jingning Han3edad6e2015-10-14 09:38:17 -07001914 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Jingning Han2cdc1272015-10-09 09:57:42 -07001915 int *rate, int64_t *dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001916 int64_t *bsse, int *skip,
1917 int64_t ref_best_rd, int *is_cost_valid) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001918 MACROBLOCKD *const xd = &x->e_mbd;
1919 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1920 struct macroblock_plane *const p = &x->plane[plane];
1921 struct macroblockd_plane *const pd = &xd->plane[plane];
1922 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1923 (blk_col >> (1 - pd->subsampling_x));
1924 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1925 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1926 int64_t this_rd = INT64_MAX;
Jingning Han2cdc1272015-10-09 09:57:42 -07001927 ENTROPY_CONTEXT *pta = ta + blk_col;
1928 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han3a279612015-10-12 19:20:58 -07001929 ENTROPY_CONTEXT stxa = 0, stxl = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001930 int coeff_ctx, i;
Jingning Han3edad6e2015-10-14 09:38:17 -07001931 int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
1932 tx_left + (blk_row >> 1), tx_size);
1933
Jingning Han3a279612015-10-12 19:20:58 -07001934 int64_t sum_dist = 0, sum_bsse = 0;
1935 int64_t sum_rd = INT64_MAX;
Jingning Han3edad6e2015-10-14 09:38:17 -07001936 int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
Jingning Han3a279612015-10-12 19:20:58 -07001937 int all_skip = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001938 int tmp_eob = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001939 int zero_blk_rate;
Jingning Han1e48f742015-10-13 11:59:49 -07001940
1941 if (ref_best_rd < 0) {
1942 *is_cost_valid = 0;
1943 return;
1944 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001945
1946 switch (tx_size) {
1947 case TX_4X4:
Jingning Han3a279612015-10-12 19:20:58 -07001948 stxa = pta[0];
1949 stxl = ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001950 break;
1951 case TX_8X8:
Jingning Han3a279612015-10-12 19:20:58 -07001952 stxa = !!*(const uint16_t *)&pta[0];
1953 stxl = !!*(const uint16_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001954 break;
1955 case TX_16X16:
Jingning Han3a279612015-10-12 19:20:58 -07001956 stxa = !!*(const uint32_t *)&pta[0];
1957 stxl = !!*(const uint32_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001958 break;
1959 case TX_32X32:
Jingning Han3a279612015-10-12 19:20:58 -07001960 stxa = !!*(const uint64_t *)&pta[0];
1961 stxl = !!*(const uint64_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001962 break;
1963 default:
1964 assert(0 && "Invalid transform size.");
1965 break;
1966 }
Jingning Han3a279612015-10-12 19:20:58 -07001967 coeff_ctx = combine_entropy_contexts(stxa, stxl);
Jingning Han2cdc1272015-10-09 09:57:42 -07001968
1969 if (xd->mb_to_bottom_edge < 0)
1970 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1971 if (xd->mb_to_right_edge < 0)
1972 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1973
1974 *rate = 0;
1975 *dist = 0;
1976 *bsse = 0;
1977 *skip = 1;
1978
1979 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
1980 return;
1981
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001982 zero_blk_rate =
1983 x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
1984
Jingning Han1e48f742015-10-13 11:59:49 -07001985 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
1986 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han71c15602015-10-13 12:40:39 -07001987 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han1e48f742015-10-13 11:59:49 -07001988 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001989
Jingning Han47c7fd92015-10-30 13:00:48 -07001990 if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
1991 RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) &&
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001992 !xd->lossless[mbmi->segment_id]) {
1993 *rate = zero_blk_rate;
1994 *dist = *bsse;
1995 *skip = 1;
1996 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
1997 p->eobs[block] = 0;
1998 } else {
1999 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
2000 *skip = 0;
2001 }
2002
Jingning Han1e48f742015-10-13 11:59:49 -07002003 if (tx_size > TX_4X4)
Jingning Han3edad6e2015-10-14 09:38:17 -07002004 *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
Jingning Han1e48f742015-10-13 11:59:49 -07002005 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
2006 tmp_eob = p->eobs[block];
2007 }
2008
Jingning Han2cdc1272015-10-09 09:57:42 -07002009 if (tx_size > TX_4X4) {
2010 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Han3a279612015-10-12 19:20:58 -07002011 int bsl = b_height_log2_lookup[bsize];
Jingning Han2cdc1272015-10-09 09:57:42 -07002012 int sub_step = 1 << (2 * (tx_size - 1));
2013 int i;
Jingning Han3a279612015-10-12 19:20:58 -07002014 int this_rate;
2015 int64_t this_dist;
2016 int64_t this_bsse;
2017 int this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07002018 int this_cost_valid = 1;
2019 int64_t tmp_rd = 0;
Jingning Han3a279612015-10-12 19:20:58 -07002020
2021 --bsl;
Jingning Han236623c2015-10-26 19:39:30 -07002022 for (i = 0; i < 4 && this_cost_valid; ++i) {
Jingning Han3a279612015-10-12 19:20:58 -07002023 int offsetr = (i >> 1) << bsl;
2024 int offsetc = (i & 0x01) << bsl;
Jingning Han2cdc1272015-10-09 09:57:42 -07002025 select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
2026 plane, block + i * sub_step, tx_size - 1,
Jingning Han3edad6e2015-10-14 09:38:17 -07002027 plane_bsize, ta, tl, tx_above, tx_left,
2028 &this_rate, &this_dist,
Jingning Han1e48f742015-10-13 11:59:49 -07002029 &this_bsse, &this_skip,
2030 ref_best_rd - tmp_rd, &this_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07002031 sum_rate += this_rate;
2032 sum_dist += this_dist;
2033 sum_bsse += this_bsse;
2034 all_skip &= this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07002035 tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
2036 if (this_rd < tmp_rd)
2037 break;
Jingning Han2cdc1272015-10-09 09:57:42 -07002038 }
Jingning Han1e48f742015-10-13 11:59:49 -07002039 if (this_cost_valid)
2040 sum_rd = tmp_rd;
Jingning Han3a279612015-10-12 19:20:58 -07002041 }
2042
2043 if (this_rd < sum_rd) {
Jingning Han79fe7242015-10-23 14:27:21 -07002044 int idx, idy;
Jingning Han3a279612015-10-12 19:20:58 -07002045 for (i = 0; i < (1 << tx_size); ++i)
Jingning Han1e48f742015-10-13 11:59:49 -07002046 pta[i] = ptl[i] = !(tmp_eob == 0);
Jingning Han3edad6e2015-10-14 09:38:17 -07002047 txfm_partition_update(tx_above + (blk_col >> 1),
2048 tx_left + (blk_row >> 1), tx_size);
Jingning Han1e48f742015-10-13 11:59:49 -07002049 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han79fe7242015-10-23 14:27:21 -07002050
2051 for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
2052 for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
2053 mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
Jingning Han3a279612015-10-12 19:20:58 -07002054 mbmi->tx_size = tx_size;
Jingning Han236623c2015-10-26 19:39:30 -07002055 if (this_rd == INT64_MAX)
2056 *is_cost_valid = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07002057 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
Jingning Han3a279612015-10-12 19:20:58 -07002058 } else {
2059 *rate = sum_rate;
2060 *dist = sum_dist;
2061 *bsse = sum_bsse;
2062 *skip = all_skip;
Jingning Han236623c2015-10-26 19:39:30 -07002063 if (sum_rd == INT64_MAX)
2064 *is_cost_valid = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07002065 }
2066}
2067
2068static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
2069 int *rate, int64_t *distortion, int *skippable,
2070 int64_t *sse, BLOCK_SIZE bsize,
2071 int64_t ref_best_rd) {
2072 MACROBLOCKD *const xd = &x->e_mbd;
2073 int is_cost_valid = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07002074 int64_t this_rd = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07002075
2076 if (ref_best_rd < 0)
2077 is_cost_valid = 0;
2078
2079 *rate = 0;
2080 *distortion = 0;
2081 *sse = 0;
2082 *skippable = 1;
2083
2084 if (is_cost_valid) {
2085 const struct macroblockd_plane *const pd = &xd->plane[0];
2086 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2087 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
2088 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
2089 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
2090 int bh = num_4x4_blocks_wide_lookup[txb_size];
2091 int idx, idy;
2092 int block = 0;
2093 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
2094 ENTROPY_CONTEXT ctxa[16], ctxl[16];
Jingning Han3edad6e2015-10-14 09:38:17 -07002095 TXFM_CONTEXT tx_above[8], tx_left[8];
Jingning Han2cdc1272015-10-09 09:57:42 -07002096
2097 int pnrate = 0, pnskip = 1;
2098 int64_t pndist = 0, pnsse = 0;
2099
2100 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
Jingning Han3edad6e2015-10-14 09:38:17 -07002101 memcpy(tx_above, xd->above_txfm_context,
2102 sizeof(TXFM_CONTEXT) * (mi_width >> 1));
2103 memcpy(tx_left, xd->left_txfm_context,
2104 sizeof(TXFM_CONTEXT) * (mi_height >> 1));
Jingning Han2cdc1272015-10-09 09:57:42 -07002105
2106 for (idy = 0; idy < mi_height; idy += bh) {
2107 for (idx = 0; idx < mi_width; idx += bh) {
2108 select_tx_block(cpi, x, idy, idx, 0, block,
Jingning Han3a279612015-10-12 19:20:58 -07002109 max_txsize_lookup[plane_bsize], plane_bsize,
Jingning Han3edad6e2015-10-14 09:38:17 -07002110 ctxa, ctxl, tx_above, tx_left,
2111 &pnrate, &pndist, &pnsse, &pnskip,
Jingning Han1e48f742015-10-13 11:59:49 -07002112 ref_best_rd - this_rd, &is_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07002113 *rate += pnrate;
2114 *distortion += pndist;
2115 *sse += pnsse;
2116 *skippable &= pnskip;
Jingning Han1e48f742015-10-13 11:59:49 -07002117 this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
2118 RDCOST(x->rdmult, x->rddiv, 0, pnsse));
Jingning Han2cdc1272015-10-09 09:57:42 -07002119 block += step;
2120 }
2121 }
2122 }
2123
2124 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2125 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2126 if (this_rd > ref_best_rd)
2127 is_cost_valid = 0;
2128
2129 if (!is_cost_valid) {
2130 // reset cost value
2131 *rate = INT_MAX;
2132 *distortion = INT64_MAX;
2133 *sse = INT64_MAX;
2134 *skippable = 0;
2135 }
2136}
2137
Jingning Han4b594d32015-11-02 12:05:47 -08002138#if CONFIG_EXT_TX
2139static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
2140 int *rate, int64_t *distortion, int *skippable,
2141 int64_t *sse, BLOCK_SIZE bsize,
2142 int64_t ref_best_rd) {
2143 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
2144 const VP10_COMMON *const cm = &cpi->common;
2145 MACROBLOCKD *const xd = &x->e_mbd;
2146 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2147 int64_t rd = INT64_MAX;
2148 int64_t best_rd = INT64_MAX;
2149 TX_TYPE tx_type, best_tx_type = DCT_DCT;
2150 int ext_tx_set;
2151 const int is_inter = is_inter_block(mbmi);
2152 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
2153 int s0 = vp10_cost_bit(skip_prob, 0);
2154 int s1 = vp10_cost_bit(skip_prob, 1);
Jingning Han696ee002015-11-03 08:56:47 -08002155 TX_SIZE best_tx_size[64];
Jingning Han493d0232015-11-03 12:59:24 -08002156 TX_SIZE best_tx = TX_SIZES;
2157 uint8_t best_blk_skip[256];
2158 const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
Jingning Han696ee002015-11-03 08:56:47 -08002159 int idx, idy;
Jingning Han4b594d32015-11-02 12:05:47 -08002160
2161 *distortion = INT64_MAX;
2162 *rate = INT_MAX;
2163 *skippable = 0;
2164 *sse = INT64_MAX;
2165
Jingning Han696ee002015-11-03 08:56:47 -08002166 ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
2167
Jingning Han4b594d32015-11-02 12:05:47 -08002168 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
2169 int this_rate = 0;
2170 int this_skip = 1;
2171 int64_t this_dist = 0;
2172 int64_t this_sse = 0;
2173
Jingning Han4b594d32015-11-02 12:05:47 -08002174 if (is_inter) {
2175 if (!ext_tx_used_inter[ext_tx_set][tx_type])
2176 continue;
2177 } else {
2178 if (!ext_tx_used_intra[ext_tx_set][tx_type])
2179 continue;
2180 }
2181
2182 mbmi->tx_type = tx_type;
2183
2184 if (ext_tx_set == 1 &&
2185 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
2186 best_tx_type == DCT_DCT) {
2187 tx_type = IDTX - 1;
2188 break;
2189 }
2190
2191 inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
2192 bsize, ref_best_rd);
2193
2194 if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
2195 !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2196 this_rate != INT_MAX) {
2197 if (is_inter) {
2198 if (ext_tx_set > 0)
2199 this_rate += cpi->inter_tx_type_costs[ext_tx_set]
Jingning Han696ee002015-11-03 08:56:47 -08002200 [max_tx_size][mbmi->tx_type];
Jingning Han4b594d32015-11-02 12:05:47 -08002201 } else {
2202 if (ext_tx_set > 0)
Jingning Han696ee002015-11-03 08:56:47 -08002203 this_rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size]
Jingning Han4b594d32015-11-02 12:05:47 -08002204 [mbmi->mode][mbmi->tx_type];
2205 }
2206 }
2207
2208 if (this_rate == INT_MAX)
2209 continue;
2210
2211 if (this_skip)
2212 rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
2213 else
2214 rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
2215
2216 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
2217 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
2218
2219 if (rd <
2220 (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
2221 best_rd) {
2222 best_rd = rd;
2223 *distortion = this_dist;
2224 *rate = this_rate;
2225 *skippable = this_skip;
2226 *sse = this_sse;
2227 best_tx_type = mbmi->tx_type;
Jingning Han493d0232015-11-03 12:59:24 -08002228 best_tx = mbmi->tx_size;
2229 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Jingning Han696ee002015-11-03 08:56:47 -08002230 for (idy = 0; idy < xd->n8_h; ++idy)
2231 for (idx = 0; idx < xd->n8_w; ++idx)
2232 best_tx_size[idy * 8 + idx] = mbmi->inter_tx_size[idy * 8 + idx];
Jingning Han4b594d32015-11-02 12:05:47 -08002233 }
2234 }
2235
2236 mbmi->tx_type = best_tx_type;
Jingning Han696ee002015-11-03 08:56:47 -08002237 for (idy = 0; idy < xd->n8_h; ++idy)
2238 for (idx = 0; idx < xd->n8_w; ++idx)
2239 mbmi->inter_tx_size[idy * 8 + idx] = best_tx_size[idy * 8 + idx];
Jingning Han493d0232015-11-03 12:59:24 -08002240 mbmi->tx_size = best_tx;
2241 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Jingning Han4b594d32015-11-02 12:05:47 -08002242}
2243#endif
2244
Jingning Hana8dad552015-10-08 16:46:10 -07002245static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
2246 int blk_row, int blk_col, int plane, int block,
2247 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
2248 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
2249 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
2250 MACROBLOCKD *const xd = &x->e_mbd;
2251 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Han2cdc1272015-10-09 09:57:42 -07002252 struct macroblock_plane *const p = &x->plane[plane];
Jingning Hana8dad552015-10-08 16:46:10 -07002253 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han2cdc1272015-10-09 09:57:42 -07002254 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Hana8dad552015-10-08 16:46:10 -07002255 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
2256 (blk_col >> (1 - pd->subsampling_x));
2257 TX_SIZE plane_tx_size = plane ?
Jingning Han2cdc1272015-10-09 09:57:42 -07002258 get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
2259 0, 0) :
Jingning Hana8dad552015-10-08 16:46:10 -07002260 mbmi->inter_tx_size[tx_idx];
2261
2262 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
2263 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
2264
2265 if (xd->mb_to_bottom_edge < 0)
2266 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
2267 if (xd->mb_to_right_edge < 0)
2268 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
2269
2270 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
2271 return;
2272
2273 if (tx_size == plane_tx_size) {
Jingning Han2cdc1272015-10-09 09:57:42 -07002274 int coeff_ctx, i;
Jingning Hana8dad552015-10-08 16:46:10 -07002275 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
Jingning Han2cdc1272015-10-09 09:57:42 -07002276 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Jingning Hana8dad552015-10-08 16:46:10 -07002277 switch (tx_size) {
2278 case TX_4X4:
2279 break;
2280 case TX_8X8:
2281 ta[0] = !!*(const uint16_t *)&ta[0];
2282 tl[0] = !!*(const uint16_t *)&tl[0];
2283 break;
2284 case TX_16X16:
2285 ta[0] = !!*(const uint32_t *)&ta[0];
2286 tl[0] = !!*(const uint32_t *)&tl[0];
2287 break;
2288 case TX_32X32:
2289 ta[0] = !!*(const uint64_t *)&ta[0];
2290 tl[0] = !!*(const uint64_t *)&tl[0];
2291 break;
2292 default:
2293 assert(0 && "Invalid transform size.");
2294 break;
2295 }
Jingning Han2cdc1272015-10-09 09:57:42 -07002296 coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
Jingning Han71c15602015-10-13 12:40:39 -07002297 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han2cdc1272015-10-09 09:57:42 -07002298 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hana8dad552015-10-08 16:46:10 -07002299 for (i = 0; i < (1 << tx_size); ++i) {
Jingning Han2cdc1272015-10-09 09:57:42 -07002300 ta[i] = !(p->eobs[block] == 0);
2301 tl[i] = !(p->eobs[block] == 0);
Jingning Hana8dad552015-10-08 16:46:10 -07002302 }
Jingning Hana8dad552015-10-08 16:46:10 -07002303 } else {
Jingning Hana8dad552015-10-08 16:46:10 -07002304 int bsl = b_width_log2_lookup[bsize];
2305 int step = 1 << (2 * (tx_size - 1));
2306 int i;
2307
2308 assert(bsl > 0);
2309 --bsl;
2310
2311 for (i = 0; i < 4; ++i) {
2312 int offsetr = (i >> 1) << bsl;
2313 int offsetc = (i & 0x01) << bsl;
2314 tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
2315 block + i * step, tx_size - 1, plane_bsize,
2316 above_ctx, left_ctx, rate, dist, bsse, skip);
2317 }
2318 }
2319}
2320
2321// Return value 0: early termination triggered, no valid rd cost available;
2322// 1: rd cost values are valid.
2323static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
2324 int *rate, int64_t *distortion, int *skippable,
2325 int64_t *sse, BLOCK_SIZE bsize,
2326 int64_t ref_best_rd) {
2327 MACROBLOCKD *const xd = &x->e_mbd;
2328 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2329 int plane;
2330 int is_cost_valid = 1;
2331 int64_t this_rd;
2332
2333 if (ref_best_rd < 0)
2334 is_cost_valid = 0;
2335
2336 if (is_inter_block(mbmi) && is_cost_valid) {
2337 int plane;
2338 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2339 vp10_subtract_plane(x, bsize, plane);
2340 }
2341
2342 *rate = 0;
2343 *distortion = 0;
2344 *sse = 0;
2345 *skippable = 1;
2346
2347 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
2348 const struct macroblockd_plane *const pd = &xd->plane[plane];
2349 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2350 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
2351 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
2352 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
2353 int bh = num_4x4_blocks_wide_lookup[txb_size];
2354 int idx, idy;
2355 int block = 0;
2356 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
2357 int pnrate = 0, pnskip = 1;
2358 int64_t pndist = 0, pnsse = 0;
2359 ENTROPY_CONTEXT ta[16], tl[16];
2360
2361 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
2362
2363 for (idy = 0; idy < mi_height; idy += bh) {
2364 for (idx = 0; idx < mi_width; idx += bh) {
2365 tx_block_rd(cpi, x, idy, idx, plane, block,
2366 max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
2367 &pnrate, &pndist, &pnsse, &pnskip);
2368 block += step;
2369 }
2370 }
2371
2372 if (pnrate == INT_MAX) {
2373 is_cost_valid = 0;
2374 break;
2375 }
2376
2377 *rate += pnrate;
2378 *distortion += pndist;
2379 *sse += pnsse;
2380 *skippable &= pnskip;
2381
2382 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2383 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2384
2385 if (this_rd > ref_best_rd) {
2386 is_cost_valid = 0;
2387 break;
2388 }
2389 }
2390
2391 if (!is_cost_valid) {
2392 // reset cost value
2393 *rate = INT_MAX;
2394 *distortion = INT64_MAX;
2395 *sse = INT64_MAX;
2396 *skippable = 0;
2397 }
2398
2399 return is_cost_valid;
2400}
2401#endif
2402
Jingning Han3ee6db62015-08-05 19:00:31 -07002403// Return value 0: early termination triggered, no valid rd cost available;
2404// 1: rd cost values are valid.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002405static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002406 int *rate, int64_t *distortion, int *skippable,
2407 int64_t *sse, BLOCK_SIZE bsize,
2408 int64_t ref_best_rd) {
2409 MACROBLOCKD *const xd = &x->e_mbd;
2410 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2411 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
2412 int plane;
2413 int pnrate = 0, pnskip = 1;
2414 int64_t pndist = 0, pnsse = 0;
2415 int is_cost_valid = 1;
2416
2417 if (ref_best_rd < 0)
2418 is_cost_valid = 0;
2419
2420 if (is_inter_block(mbmi) && is_cost_valid) {
2421 int plane;
2422 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2423 vp10_subtract_plane(x, bsize, plane);
2424 }
2425
2426 *rate = 0;
2427 *distortion = 0;
2428 *sse = 0;
2429 *skippable = 1;
2430
2431 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Jingning Han71c15602015-10-13 12:40:39 -07002432 txfm_rd_in_plane(x,
2433#if CONFIG_VAR_TX
2434 cpi,
2435#endif
2436 &pnrate, &pndist, &pnskip, &pnsse,
Jingning Han3ee6db62015-08-05 19:00:31 -07002437 ref_best_rd, plane, bsize, uv_tx_size,
2438 cpi->sf.use_fast_coef_costing);
2439 if (pnrate == INT_MAX) {
2440 is_cost_valid = 0;
2441 break;
2442 }
2443 *rate += pnrate;
2444 *distortion += pndist;
2445 *sse += pnsse;
2446 *skippable &= pnskip;
2447 }
2448
2449 if (!is_cost_valid) {
2450 // reset cost value
2451 *rate = INT_MAX;
2452 *distortion = INT64_MAX;
2453 *sse = INT64_MAX;
2454 *skippable = 0;
2455 }
2456
2457 return is_cost_valid;
2458}
2459
hui sube3559b2015-10-07 09:29:02 -07002460#if CONFIG_EXT_INTRA
2461// Return 1 if an ext intra mode is selected; return 0 otherwise.
2462static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
2463 PICK_MODE_CONTEXT *ctx,
2464 int *rate, int *rate_tokenonly,
2465 int64_t *distortion, int *skippable,
2466 BLOCK_SIZE bsize, int64_t *best_rd) {
2467 MACROBLOCKD *const xd = &x->e_mbd;
2468 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2469 int ext_intra_selected_flag = 0;
2470 int this_rate_tokenonly, this_rate, s;
2471 int64_t this_distortion, this_sse, this_rd, best_angle_rd = INT64_MAX;
2472 EXT_INTRA_MODE mode;
2473 int i, step, delta, angle, best_angle, best_angle_dir;
2474 int deltas[3] = {25, 5, 1};
2475 int branches[3] = {2, 2, 2};
2476 EXT_INTRA_MODE_INFO ext_intra_mode_info;
2477
2478 vp10_zero(ext_intra_mode_info);
2479 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
2480 mbmi->uv_mode = DC_PRED;
2481
2482 if (!DR_ONLY) {
2483 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
2484 mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
2485 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2486 &this_distortion, &s, &this_sse, bsize, *best_rd))
2487 continue;
2488
2489 this_rate = this_rate_tokenonly +
2490 vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
2491 vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
2492 cpi->intra_uv_mode_cost[mbmi->uv_mode] +
2493 write_uniform_cost(FILTER_INTRA_MODES, mode);
2494 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2495 if (this_rd < *best_rd) {
2496 *best_rd = this_rd;
2497 *rate = this_rate;
2498 *rate_tokenonly = this_rate_tokenonly;
2499 *distortion = this_distortion;
2500 *skippable = s;
2501 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2502 ext_intra_selected_flag = 1;
2503 if (!x->select_tx_size)
2504 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2505 }
2506 }
2507 }
2508
2509 mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
2510 if (ANGLE_FAST_SEARCH) {
2511 best_angle = EXT_INTRA_ANGLES / 2;
2512 for (step = 0; step < 3; ++step) {
2513 delta = deltas[step];
2514 for (i = -branches[step]; i <= branches[step]; ++i) {
2515 int64_t rd_thresh;
2516 if (i == 0 && step != 0)
2517 continue;
2518 angle = best_angle + i * delta;
2519 if (angle < 0)
2520 angle = 0;
2521 if (angle >= EXT_INTRA_ANGLES)
2522 angle = EXT_INTRA_ANGLES - 1;
2523 if (angle == best_angle && step != 0)
2524 continue;
2525 mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
2526 if (*best_rd == INT64_MAX)
2527 rd_thresh = best_angle_rd;
2528 else
2529 rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
2530 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion,
2531 &s, &this_sse, bsize, rd_thresh))
2532 continue;
2533 this_rate = this_rate_tokenonly +
2534 vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
2535 (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
2536 cpi->intra_uv_mode_cost[mbmi->uv_mode] +
2537 write_uniform_cost(EXT_INTRA_ANGLES, angle);
2538 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2539 if (this_rd < *best_rd) {
2540 *best_rd = this_rd;
2541 *rate = this_rate;
2542 *rate_tokenonly = this_rate_tokenonly;
2543 *distortion = this_distortion;
2544 *skippable = s;
2545 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2546 ext_intra_selected_flag = 1;
2547 if (!x->select_tx_size)
2548 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2549 }
2550 if (this_rd < best_angle_rd) {
2551 best_angle_rd = this_rd;
2552 best_angle_dir = i;
2553 }
2554 }
2555 best_angle += best_angle_dir * delta;
2556 if (best_angle < 0)
2557 best_angle = 0;
2558 if (best_angle >= EXT_INTRA_ANGLES)
2559 best_angle = EXT_INTRA_ANGLES - 1;
2560 if (*best_rd < best_angle_rd / RD_ADJUSTER)
2561 break;
2562 }
2563 } else {
2564 for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
2565 mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
2566 if (prediction_angle_map(angle) == 90 ||
2567 prediction_angle_map(angle) == 180)
2568 continue;
2569 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2570 &this_distortion, &s, &this_sse, bsize, *best_rd))
2571 continue;
2572
2573 this_rate = this_rate_tokenonly +
2574 vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
2575 (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
2576 cpi->intra_uv_mode_cost[mbmi->uv_mode] +
2577 write_uniform_cost(EXT_INTRA_ANGLES, angle);
2578 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2579 if (this_rd < *best_rd) {
2580 *best_rd = this_rd;
2581 *rate = this_rate;
2582 *rate_tokenonly = this_rate_tokenonly;
2583 *distortion = this_distortion;
2584 *skippable = s;
2585 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2586 ext_intra_selected_flag = 1;
2587 if (!x->select_tx_size)
2588 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2589 }
2590 }
2591 }
2592
2593 if (ext_intra_selected_flag) {
2594 mbmi->uv_mode = DC_PRED;
2595 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
2596 ext_intra_mode_info.use_ext_intra_mode[1];
2597 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
2598 ext_intra_mode_info.ext_intra_mode[1];
2599 mbmi->ext_intra_mode_info.ext_intra_angle[1] =
2600 ext_intra_mode_info.ext_intra_angle[1];
2601
2602 return 1;
2603 } else {
2604 return 0;
2605 }
2606}
2607#endif // CONFIG_EXT_INTRA
2608
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002609static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002610 PICK_MODE_CONTEXT *ctx,
2611 int *rate, int *rate_tokenonly,
2612 int64_t *distortion, int *skippable,
2613 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
2614 MACROBLOCKD *xd = &x->e_mbd;
hui sube3559b2015-10-07 09:29:02 -07002615 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Jingning Han3ee6db62015-08-05 19:00:31 -07002616 PREDICTION_MODE mode;
2617 PREDICTION_MODE mode_selected = DC_PRED;
2618 int64_t best_rd = INT64_MAX, this_rd;
2619 int this_rate_tokenonly, this_rate, s;
2620 int64_t this_distortion, this_sse;
hui sube3559b2015-10-07 09:29:02 -07002621#if CONFIG_EXT_INTRA
2622 EXT_INTRA_MODE_INFO ext_intra_mode_info;
Jingning Han3ee6db62015-08-05 19:00:31 -07002623
hui sube3559b2015-10-07 09:29:02 -07002624 ext_intra_mode_info.use_ext_intra_mode[1] = 0;
2625 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
2626#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07002627 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07002628 xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07002629 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2630 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
2631 continue;
2632
hui sube3559b2015-10-07 09:29:02 -07002633 mbmi->uv_mode = mode;
Jingning Han3ee6db62015-08-05 19:00:31 -07002634
2635 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2636 &this_distortion, &s, &this_sse, bsize, best_rd))
2637 continue;
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002638 this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mode];
hui sube3559b2015-10-07 09:29:02 -07002639#if CONFIG_EXT_INTRA
2640 if (mode == DC_PRED)
2641 this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
2642#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07002643 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2644
2645 if (this_rd < best_rd) {
2646 mode_selected = mode;
2647 best_rd = this_rd;
2648 *rate = this_rate;
2649 *rate_tokenonly = this_rate_tokenonly;
2650 *distortion = this_distortion;
2651 *skippable = s;
2652 if (!x->select_tx_size)
2653 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2654 }
2655 }
2656
hui sube3559b2015-10-07 09:29:02 -07002657#if CONFIG_EXT_INTRA
2658 if (mbmi->sb_type >= BLOCK_8X8) {
2659 if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
2660 skippable, bsize, &best_rd)) {
2661 mode_selected = mbmi->uv_mode;
2662 ext_intra_mode_info = mbmi->ext_intra_mode_info;
2663 }
2664 }
2665
2666 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
2667 ext_intra_mode_info.use_ext_intra_mode[1];
2668 if (ext_intra_mode_info.use_ext_intra_mode[1])
2669 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
2670 ext_intra_mode_info.ext_intra_mode[1];
2671#endif // CONFIG_EXT_INTRA
2672 mbmi->uv_mode = mode_selected;
Jingning Han3ee6db62015-08-05 19:00:31 -07002673 return best_rd;
2674}
2675
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002676static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002677 int *rate, int *rate_tokenonly,
2678 int64_t *distortion, int *skippable,
2679 BLOCK_SIZE bsize) {
Jingning Han3ee6db62015-08-05 19:00:31 -07002680 int64_t unused;
2681
2682 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
2683 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
2684 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
2685 skippable, &unused, bsize, INT64_MAX);
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002686 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[DC_PRED];
Jingning Han3ee6db62015-08-05 19:00:31 -07002687 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
2688}
2689
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002690static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002691 PICK_MODE_CONTEXT *ctx,
2692 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
2693 int *rate_uv, int *rate_uv_tokenonly,
2694 int64_t *dist_uv, int *skip_uv,
2695 PREDICTION_MODE *mode_uv) {
2696 // Use an estimated rd for uv_intra based on DC_PRED if the
2697 // appropriate speed flag is set.
2698 if (cpi->sf.use_uv_intra_rd_estimate) {
2699 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
2700 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
2701 // Else do a proper rd search for each possible transform size that may
2702 // be considered in the main rd loop.
2703 } else {
2704 rd_pick_intra_sbuv_mode(cpi, x, ctx,
2705 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
2706 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
2707 }
2708 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
2709}
2710
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002711static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
Jingning Han3ee6db62015-08-05 19:00:31 -07002712 int mode_context) {
2713 assert(is_inter_mode(mode));
2714 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
2715}
2716
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002717static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
Jingning Han3ee6db62015-08-05 19:00:31 -07002718 int i,
2719 PREDICTION_MODE mode, int_mv this_mv[2],
2720 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
2721 int_mv seg_mvs[MAX_REF_FRAMES],
2722 int_mv *best_ref_mv[2], const int *mvjcost,
2723 int *mvcost[2]) {
2724 MODE_INFO *const mic = xd->mi[0];
2725 const MB_MODE_INFO *const mbmi = &mic->mbmi;
2726 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2727 int thismvcost = 0;
2728 int idx, idy;
2729 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
2730 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
2731 const int is_compound = has_second_ref(mbmi);
2732
2733 switch (mode) {
2734 case NEWMV:
2735 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
2736 thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
2737 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2738 if (is_compound) {
2739 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
2740 thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
2741 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2742 }
2743 break;
2744 case NEARMV:
2745 case NEARESTMV:
2746 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
2747 if (is_compound)
2748 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
2749 break;
2750 case ZEROMV:
2751 this_mv[0].as_int = 0;
2752 if (is_compound)
2753 this_mv[1].as_int = 0;
2754 break;
2755 default:
2756 break;
2757 }
2758
2759 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
2760 if (is_compound)
2761 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
2762
2763 mic->bmi[i].as_mode = mode;
2764
2765 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
2766 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
2767 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
2768
2769 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
2770 thismvcost;
2771}
2772
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002773static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07002774 MACROBLOCK *x,
2775 int64_t best_yrd,
2776 int i,
2777 int *labelyrate,
2778 int64_t *distortion, int64_t *sse,
2779 ENTROPY_CONTEXT *ta,
2780 ENTROPY_CONTEXT *tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002781 int ir, int ic,
Jingning Han3ee6db62015-08-05 19:00:31 -07002782 int mi_row, int mi_col) {
2783 int k;
2784 MACROBLOCKD *xd = &x->e_mbd;
2785 struct macroblockd_plane *const pd = &xd->plane[0];
2786 struct macroblock_plane *const p = &x->plane[0];
2787 MODE_INFO *const mi = xd->mi[0];
2788 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
2789 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
2790 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
2791 int idx, idy;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002792 void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
Jingning Han3ee6db62015-08-05 19:00:31 -07002793
2794 const uint8_t *const src =
2795 &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
2796 uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2797 pd->dst.stride)];
2798 int64_t thisdistortion = 0, thissse = 0;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002799 int thisrate = 0;
hui sub3cc3a02015-08-24 14:37:54 -07002800 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07002801 const scan_order *so = get_scan(TX_4X4, tx_type, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07002802
Yaowu Xu7c514e22015-09-28 15:55:46 -07002803 vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
2804
Jingning Han3ee6db62015-08-05 19:00:31 -07002805#if CONFIG_VP9_HIGHBITDEPTH
2806 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002807 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_highbd_fwht4x4
2808 : vpx_highbd_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002809 } else {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002810 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002811 }
2812#else
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002813 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002814#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han3ee6db62015-08-05 19:00:31 -07002815
2816#if CONFIG_VP9_HIGHBITDEPTH
2817 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2818 vpx_highbd_subtract_block(
2819 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2820 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
2821 } else {
2822 vpx_subtract_block(
2823 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2824 8, src, p->src.stride, dst, pd->dst.stride);
2825 }
2826#else
2827 vpx_subtract_block(height, width,
2828 vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2829 8, src, p->src.stride, dst, pd->dst.stride);
2830#endif // CONFIG_VP9_HIGHBITDEPTH
2831
2832 k = i;
2833 for (idy = 0; idy < height / 4; ++idy) {
2834 for (idx = 0; idx < width / 4; ++idx) {
2835 int64_t ssz, rd, rd1, rd2;
2836 tran_low_t* coeff;
Jingning Han2cdc1272015-10-09 09:57:42 -07002837#if CONFIG_VAR_TX
2838 int coeff_ctx;
2839#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002840 k += (idy * 2 + idx);
Jingning Han2cdc1272015-10-09 09:57:42 -07002841#if CONFIG_VAR_TX
2842 coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
2843 *(tl + (k >> 1)));
2844#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002845 coeff = BLOCK_OFFSET(p->coeff, k);
Yaowu Xu7c514e22015-09-28 15:55:46 -07002846 fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
2847 coeff, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002848 vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
2849#if CONFIG_VP9_HIGHBITDEPTH
2850 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2851 thisdistortion += vp10_highbd_block_error(coeff,
2852 BLOCK_OFFSET(pd->dqcoeff, k),
2853 16, &ssz, xd->bd);
2854 } else {
2855 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2856 16, &ssz);
2857 }
2858#else
2859 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2860 16, &ssz);
2861#endif // CONFIG_VP9_HIGHBITDEPTH
2862 thissse += ssz;
Jingning Han2cdc1272015-10-09 09:57:42 -07002863#if CONFIG_VAR_TX
2864 thisrate += cost_coeffs(x, 0, k, coeff_ctx,
2865 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07002866 so->scan, so->neighbors,
2867 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07002868 *(ta + (k & 1)) = !(p->eobs[k] == 0);
2869 *(tl + (k >> 1)) = !(p->eobs[k] == 0);
2870#else
2871 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
2872 TX_4X4,
2873 so->scan, so->neighbors,
2874 cpi->sf.use_fast_coef_costing);
2875#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002876 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
2877 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
James Zern5e16d392015-08-17 18:19:22 -07002878 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -07002879 if (rd >= best_yrd)
2880 return INT64_MAX;
2881 }
2882 }
2883
2884 *distortion = thisdistortion >> 2;
2885 *labelyrate = thisrate;
2886 *sse = thissse >> 2;
2887
2888 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
2889}
2890
2891typedef struct {
2892 int eobs;
2893 int brate;
2894 int byrate;
2895 int64_t bdist;
2896 int64_t bsse;
2897 int64_t brdcost;
2898 int_mv mvs[2];
2899 ENTROPY_CONTEXT ta[2];
2900 ENTROPY_CONTEXT tl[2];
2901} SEG_RDSTAT;
2902
2903typedef struct {
2904 int_mv *ref_mv[2];
2905 int_mv mvp;
2906
2907 int64_t segment_rd;
2908 int r;
2909 int64_t d;
2910 int64_t sse;
2911 int segment_yrate;
2912 PREDICTION_MODE modes[4];
2913 SEG_RDSTAT rdstat[4][INTER_MODES];
2914 int mvthresh;
2915} BEST_SEG_INFO;
2916
2917static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
2918 return (mv->row >> 3) < x->mv_row_min ||
2919 (mv->row >> 3) > x->mv_row_max ||
2920 (mv->col >> 3) < x->mv_col_min ||
2921 (mv->col >> 3) > x->mv_col_max;
2922}
2923
2924static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
2925 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
2926 struct macroblock_plane *const p = &x->plane[0];
2927 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
2928
2929 p->src.buf = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2930 p->src.stride)];
2931 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
2932 pd->pre[0].buf = &pd->pre[0].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2933 pd->pre[0].stride)];
2934 if (has_second_ref(mbmi))
2935 pd->pre[1].buf = &pd->pre[1].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2936 pd->pre[1].stride)];
2937}
2938
2939static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
2940 struct buf_2d orig_pre[2]) {
2941 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
2942 x->plane[0].src = orig_src;
2943 x->e_mbd.plane[0].pre[0] = orig_pre[0];
2944 if (has_second_ref(mbmi))
2945 x->e_mbd.plane[0].pre[1] = orig_pre[1];
2946}
2947
Jingning Han3ee6db62015-08-05 19:00:31 -07002948// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
2949// TODO(aconverse): Find out if this is still productive then clean up or remove
2950static int check_best_zero_mv(
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002951 const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
Jingning Han3ee6db62015-08-05 19:00:31 -07002952 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
2953 const MV_REFERENCE_FRAME ref_frames[2]) {
2954 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2955 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
2956 (ref_frames[1] == NONE ||
2957 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
2958 int rfc = mode_context[ref_frames[0]];
2959 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2960 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2961 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2962
2963 if (this_mode == NEARMV) {
2964 if (c1 > c3) return 0;
2965 } else if (this_mode == NEARESTMV) {
2966 if (c2 > c3) return 0;
2967 } else {
2968 assert(this_mode == ZEROMV);
2969 if (ref_frames[1] == NONE) {
2970 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
2971 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
2972 return 0;
2973 } else {
2974 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
2975 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
2976 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
2977 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
2978 return 0;
2979 }
2980 }
2981 }
2982 return 1;
2983}
2984
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002985static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002986 BLOCK_SIZE bsize,
2987 int_mv *frame_mv,
2988 int mi_row, int mi_col,
2989 int_mv single_newmv[MAX_REF_FRAMES],
2990 int *rate_mv) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07002991 const VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07002992 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2993 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2994 MACROBLOCKD *xd = &x->e_mbd;
2995 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2996 const int refs[2] = {mbmi->ref_frame[0],
2997 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
2998 int_mv ref_mv[2];
2999 int ite, ref;
3000 const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
3001 struct scale_factors sf;
3002
3003 // Do joint motion search in compound mode to get more accurate mv.
3004 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
3005 int last_besterr[2] = {INT_MAX, INT_MAX};
3006 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
3007 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
3008 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
3009 };
3010
3011 // Prediction buffer from second frame.
3012#if CONFIG_VP9_HIGHBITDEPTH
3013 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
3014 uint8_t *second_pred;
3015#else
3016 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
3017#endif // CONFIG_VP9_HIGHBITDEPTH
3018
3019 for (ref = 0; ref < 2; ++ref) {
3020 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
3021
3022 if (scaled_ref_frame[ref]) {
3023 int i;
3024 // Swap out the reference frame for a version that's been scaled to
3025 // match the resolution of the current frame, allowing the existing
3026 // motion search code to be used without additional modifications.
3027 for (i = 0; i < MAX_MB_PLANE; i++)
3028 backup_yv12[ref][i] = xd->plane[i].pre[ref];
3029 vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
3030 NULL);
3031 }
3032
3033 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
3034 }
3035
3036 // Since we have scaled the reference frames to match the size of the current
3037 // frame we must use a unit scaling factor during mode selection.
3038#if CONFIG_VP9_HIGHBITDEPTH
3039 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
Debargha Mukherjee85514c42015-10-30 09:19:36 -07003040 cm->width, cm->height,
3041 cm->use_highbitdepth);
Jingning Han3ee6db62015-08-05 19:00:31 -07003042#else
3043 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
Debargha Mukherjee85514c42015-10-30 09:19:36 -07003044 cm->width, cm->height);
Jingning Han3ee6db62015-08-05 19:00:31 -07003045#endif // CONFIG_VP9_HIGHBITDEPTH
3046
3047 // Allow joint search multiple times iteratively for each reference frame
3048 // and break out of the search loop if it couldn't find a better mv.
3049 for (ite = 0; ite < 4; ite++) {
3050 struct buf_2d ref_yv12[2];
3051 int bestsme = INT_MAX;
3052 int sadpb = x->sadperbit16;
3053 MV tmp_mv;
3054 int search_range = 3;
3055
3056 int tmp_col_min = x->mv_col_min;
3057 int tmp_col_max = x->mv_col_max;
3058 int tmp_row_min = x->mv_row_min;
3059 int tmp_row_max = x->mv_row_max;
3060 int id = ite % 2; // Even iterations search in the first reference frame,
3061 // odd iterations search in the second. The predictor
3062 // found for the 'other' reference frame is factored in.
3063
3064 // Initialized here because of compiler problem in Visual Studio.
3065 ref_yv12[0] = xd->plane[0].pre[0];
3066 ref_yv12[1] = xd->plane[0].pre[1];
3067
3068 // Get the prediction block from the 'other' reference frame.
3069#if CONFIG_VP9_HIGHBITDEPTH
3070 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3071 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
3072 vp10_highbd_build_inter_predictor(ref_yv12[!id].buf,
3073 ref_yv12[!id].stride,
3074 second_pred, pw,
3075 &frame_mv[refs[!id]].as_mv,
3076 &sf, pw, ph, 0,
3077 kernel, MV_PRECISION_Q3,
3078 mi_col * MI_SIZE, mi_row * MI_SIZE,
3079 xd->bd);
3080 } else {
3081 second_pred = (uint8_t *)second_pred_alloc_16;
3082 vp10_build_inter_predictor(ref_yv12[!id].buf,
3083 ref_yv12[!id].stride,
3084 second_pred, pw,
3085 &frame_mv[refs[!id]].as_mv,
3086 &sf, pw, ph, 0,
3087 kernel, MV_PRECISION_Q3,
3088 mi_col * MI_SIZE, mi_row * MI_SIZE);
3089 }
3090#else
3091 vp10_build_inter_predictor(ref_yv12[!id].buf,
3092 ref_yv12[!id].stride,
3093 second_pred, pw,
3094 &frame_mv[refs[!id]].as_mv,
3095 &sf, pw, ph, 0,
3096 kernel, MV_PRECISION_Q3,
3097 mi_col * MI_SIZE, mi_row * MI_SIZE);
3098#endif // CONFIG_VP9_HIGHBITDEPTH
3099
3100 // Do compound motion search on the current reference frame.
3101 if (id)
3102 xd->plane[0].pre[0] = ref_yv12[id];
3103 vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
3104
3105 // Use the mv result from the single mode as mv predictor.
3106 tmp_mv = frame_mv[refs[id]].as_mv;
3107
3108 tmp_mv.col >>= 3;
3109 tmp_mv.row >>= 3;
3110
3111 // Small-range full-pixel motion search.
3112 bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
3113 search_range,
3114 &cpi->fn_ptr[bsize],
3115 &ref_mv[id].as_mv, second_pred);
3116 if (bestsme < INT_MAX)
3117 bestsme = vp10_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
3118 second_pred, &cpi->fn_ptr[bsize], 1);
3119
3120 x->mv_col_min = tmp_col_min;
3121 x->mv_col_max = tmp_col_max;
3122 x->mv_row_min = tmp_row_min;
3123 x->mv_row_max = tmp_row_max;
3124
3125 if (bestsme < INT_MAX) {
3126 int dis; /* TODO: use dis in distortion calculation later. */
3127 unsigned int sse;
3128 bestsme = cpi->find_fractional_mv_step(
3129 x, &tmp_mv,
3130 &ref_mv[id].as_mv,
3131 cpi->common.allow_high_precision_mv,
3132 x->errorperbit,
3133 &cpi->fn_ptr[bsize],
3134 0, cpi->sf.mv.subpel_iters_per_step,
3135 NULL,
3136 x->nmvjointcost, x->mvcost,
3137 &dis, &sse, second_pred,
3138 pw, ph);
3139 }
3140
3141 // Restore the pointer to the first (possibly scaled) prediction buffer.
3142 if (id)
3143 xd->plane[0].pre[0] = ref_yv12[0];
3144
3145 if (bestsme < last_besterr[id]) {
3146 frame_mv[refs[id]].as_mv = tmp_mv;
3147 last_besterr[id] = bestsme;
3148 } else {
3149 break;
3150 }
3151 }
3152
3153 *rate_mv = 0;
3154
3155 for (ref = 0; ref < 2; ++ref) {
3156 if (scaled_ref_frame[ref]) {
3157 // Restore the prediction frame pointers to their unscaled versions.
3158 int i;
3159 for (i = 0; i < MAX_MB_PLANE; i++)
3160 xd->plane[i].pre[ref] = backup_yv12[ref][i];
3161 }
3162
3163 *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
3164 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
3165 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3166 }
3167}
3168
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003169static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003170 int_mv *best_ref_mv,
3171 int_mv *second_best_ref_mv,
3172 int64_t best_rd, int *returntotrate,
3173 int *returnyrate,
3174 int64_t *returndistortion,
3175 int *skippable, int64_t *psse,
3176 int mvthresh,
3177 int_mv seg_mvs[4][MAX_REF_FRAMES],
3178 BEST_SEG_INFO *bsi_buf, int filter_idx,
3179 int mi_row, int mi_col) {
3180 int i;
3181 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
3182 MACROBLOCKD *xd = &x->e_mbd;
3183 MODE_INFO *mi = xd->mi[0];
3184 MB_MODE_INFO *mbmi = &mi->mbmi;
3185 int mode_idx;
3186 int k, br = 0, idx, idy;
3187 int64_t bd = 0, block_sse = 0;
3188 PREDICTION_MODE this_mode;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003189 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003190 struct macroblock_plane *const p = &x->plane[0];
3191 struct macroblockd_plane *const pd = &xd->plane[0];
3192 const int label_count = 4;
3193 int64_t this_segment_rd = 0;
3194 int label_mv_thresh;
3195 int segmentyrate = 0;
3196 const BLOCK_SIZE bsize = mbmi->sb_type;
3197 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
3198 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
3199 ENTROPY_CONTEXT t_above[2], t_left[2];
3200 int subpelmv = 1, have_ref = 0;
3201 const int has_second_rf = has_second_ref(mbmi);
3202 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
3203 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3204
3205 vp10_zero(*bsi);
3206
3207 bsi->segment_rd = best_rd;
3208 bsi->ref_mv[0] = best_ref_mv;
3209 bsi->ref_mv[1] = second_best_ref_mv;
3210 bsi->mvp.as_int = best_ref_mv->as_int;
3211 bsi->mvthresh = mvthresh;
3212
3213 for (i = 0; i < 4; i++)
3214 bsi->modes[i] = ZEROMV;
3215
3216 memcpy(t_above, pd->above_context, sizeof(t_above));
3217 memcpy(t_left, pd->left_context, sizeof(t_left));
3218
3219 // 64 makes this threshold really big effectively
3220 // making it so that we very rarely check mvs on
3221 // segments. setting this to 1 would make mv thresh
3222 // roughly equal to what it is for macroblocks
3223 label_mv_thresh = 1 * bsi->mvthresh / label_count;
3224
3225 // Segmentation method overheads
3226 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
3227 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
3228 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
3229 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
3230 int_mv mode_mv[MB_MODE_COUNT][2];
3231 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3232 PREDICTION_MODE mode_selected = ZEROMV;
3233 int64_t best_rd = INT64_MAX;
3234 const int i = idy * 2 + idx;
3235 int ref;
3236
3237 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3238 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
3239 frame_mv[ZEROMV][frame].as_int = 0;
3240 vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
3241 &frame_mv[NEARESTMV][frame],
3242 &frame_mv[NEARMV][frame],
3243 mbmi_ext->mode_context);
3244 }
3245
3246 // search for the best motion vector on this segment
3247 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
3248 const struct buf_2d orig_src = x->plane[0].src;
3249 struct buf_2d orig_pre[2];
3250
3251 mode_idx = INTER_OFFSET(this_mode);
3252 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
3253 if (!(inter_mode_mask & (1 << this_mode)))
3254 continue;
3255
3256 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
3257 this_mode, mbmi->ref_frame))
3258 continue;
3259
3260 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
3261 memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
3262 sizeof(bsi->rdstat[i][mode_idx].ta));
3263 memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
3264 sizeof(bsi->rdstat[i][mode_idx].tl));
3265
3266 // motion search for newmv (single predictor case only)
3267 if (!has_second_rf && this_mode == NEWMV &&
3268 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
3269 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
3270 int step_param = 0;
3271 int thissme, bestsme = INT_MAX;
3272 int sadpb = x->sadperbit4;
3273 MV mvp_full;
3274 int max_mv;
3275 int cost_list[5];
3276
3277 /* Is the best so far sufficiently good that we cant justify doing
3278 * and new motion search. */
3279 if (best_rd < label_mv_thresh)
3280 break;
3281
3282 if (cpi->oxcf.mode != BEST) {
3283 // use previous block's result as next block's MV predictor.
3284 if (i > 0) {
3285 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
3286 if (i == 2)
3287 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
3288 }
3289 }
3290 if (i == 0)
3291 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
3292 else
James Zern5e16d392015-08-17 18:19:22 -07003293 max_mv =
3294 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
Jingning Han3ee6db62015-08-05 19:00:31 -07003295
3296 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3297 // Take wtd average of the step_params based on the last frame's
3298 // max mv magnitude and the best ref mvs of the current block for
3299 // the given reference.
3300 step_param = (vp10_init_search_range(max_mv) +
3301 cpi->mv_step_param) / 2;
3302 } else {
3303 step_param = cpi->mv_step_param;
3304 }
3305
3306 mvp_full.row = bsi->mvp.as_mv.row >> 3;
3307 mvp_full.col = bsi->mvp.as_mv.col >> 3;
3308
3309 if (cpi->sf.adaptive_motion_search) {
3310 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
3311 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
James Zern5e16d392015-08-17 18:19:22 -07003312 step_param = VPXMAX(step_param, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07003313 }
3314
3315 // adjust src pointer for this block
3316 mi_buf_shift(x, i);
3317
3318 vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
3319
3320 bestsme = vp10_full_pixel_search(
3321 cpi, x, bsize, &mvp_full, step_param, sadpb,
3322 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
3323 &bsi->ref_mv[0]->as_mv, new_mv,
3324 INT_MAX, 1);
3325
3326 // Should we do a full search (best quality only)
3327 if (cpi->oxcf.mode == BEST) {
3328 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
3329 /* Check if mvp_full is within the range. */
3330 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
3331 x->mv_row_min, x->mv_row_max);
3332 thissme = cpi->full_search_sad(x, &mvp_full,
3333 sadpb, 16, &cpi->fn_ptr[bsize],
3334 &bsi->ref_mv[0]->as_mv,
3335 &best_mv->as_mv);
3336 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
3337 if (thissme < bestsme) {
3338 bestsme = thissme;
3339 *new_mv = best_mv->as_mv;
3340 } else {
3341 // The full search result is actually worse so re-instate the
3342 // previous best vector
3343 best_mv->as_mv = *new_mv;
3344 }
3345 }
3346
3347 if (bestsme < INT_MAX) {
3348 int distortion;
3349 cpi->find_fractional_mv_step(
3350 x,
3351 new_mv,
3352 &bsi->ref_mv[0]->as_mv,
3353 cm->allow_high_precision_mv,
3354 x->errorperbit, &cpi->fn_ptr[bsize],
3355 cpi->sf.mv.subpel_force_stop,
3356 cpi->sf.mv.subpel_iters_per_step,
3357 cond_cost_list(cpi, cost_list),
3358 x->nmvjointcost, x->mvcost,
3359 &distortion,
3360 &x->pred_sse[mbmi->ref_frame[0]],
3361 NULL, 0, 0);
3362
3363 // save motion search result for use in compound prediction
3364 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
3365 }
3366
3367 if (cpi->sf.adaptive_motion_search)
3368 x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
3369
3370 // restore src pointers
3371 mi_buf_restore(x, orig_src, orig_pre);
3372 }
3373
3374 if (has_second_rf) {
3375 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
3376 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
3377 continue;
3378 }
3379
3380 if (has_second_rf && this_mode == NEWMV &&
3381 mbmi->interp_filter == EIGHTTAP) {
3382 // adjust src pointers
3383 mi_buf_shift(x, i);
3384 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
3385 int rate_mv;
3386 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
3387 mi_row, mi_col, seg_mvs[i],
3388 &rate_mv);
3389 seg_mvs[i][mbmi->ref_frame[0]].as_int =
3390 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
3391 seg_mvs[i][mbmi->ref_frame[1]].as_int =
3392 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
3393 }
3394 // restore src pointers
3395 mi_buf_restore(x, orig_src, orig_pre);
3396 }
3397
3398 bsi->rdstat[i][mode_idx].brate =
3399 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
3400 frame_mv, seg_mvs[i], bsi->ref_mv,
3401 x->nmvjointcost, x->mvcost);
3402
3403 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3404 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
3405 mode_mv[this_mode][ref].as_int;
3406 if (num_4x4_blocks_wide > 1)
3407 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
3408 mode_mv[this_mode][ref].as_int;
3409 if (num_4x4_blocks_high > 1)
3410 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
3411 mode_mv[this_mode][ref].as_int;
3412 }
3413
3414 // Trap vectors that reach beyond the UMV borders
3415 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
3416 (has_second_rf &&
3417 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
3418 continue;
3419
3420 if (filter_idx > 0) {
3421 BEST_SEG_INFO *ref_bsi = bsi_buf;
3422 subpelmv = 0;
3423 have_ref = 1;
3424
3425 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
3426 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
3427 have_ref &= mode_mv[this_mode][ref].as_int ==
3428 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
3429 }
3430
3431 if (filter_idx > 1 && !subpelmv && !have_ref) {
3432 ref_bsi = bsi_buf + 1;
3433 have_ref = 1;
3434 for (ref = 0; ref < 1 + has_second_rf; ++ref)
3435 have_ref &= mode_mv[this_mode][ref].as_int ==
3436 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
3437 }
3438
3439 if (!subpelmv && have_ref &&
3440 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
3441 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
3442 sizeof(SEG_RDSTAT));
3443 if (num_4x4_blocks_wide > 1)
3444 bsi->rdstat[i + 1][mode_idx].eobs =
3445 ref_bsi->rdstat[i + 1][mode_idx].eobs;
3446 if (num_4x4_blocks_high > 1)
3447 bsi->rdstat[i + 2][mode_idx].eobs =
3448 ref_bsi->rdstat[i + 2][mode_idx].eobs;
3449
3450 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
3451 mode_selected = this_mode;
3452 best_rd = bsi->rdstat[i][mode_idx].brdcost;
3453 }
3454 continue;
3455 }
3456 }
3457
3458 bsi->rdstat[i][mode_idx].brdcost =
3459 encode_inter_mb_segment(cpi, x,
3460 bsi->segment_rd - this_segment_rd, i,
3461 &bsi->rdstat[i][mode_idx].byrate,
3462 &bsi->rdstat[i][mode_idx].bdist,
3463 &bsi->rdstat[i][mode_idx].bsse,
3464 bsi->rdstat[i][mode_idx].ta,
3465 bsi->rdstat[i][mode_idx].tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07003466 idy, idx,
Jingning Han3ee6db62015-08-05 19:00:31 -07003467 mi_row, mi_col);
3468 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
3469 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
3470 bsi->rdstat[i][mode_idx].brate, 0);
3471 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
3472 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
3473 if (num_4x4_blocks_wide > 1)
3474 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
3475 if (num_4x4_blocks_high > 1)
3476 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
3477 }
3478
3479 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
3480 mode_selected = this_mode;
3481 best_rd = bsi->rdstat[i][mode_idx].brdcost;
3482 }
3483 } /*for each 4x4 mode*/
3484
3485 if (best_rd == INT64_MAX) {
3486 int iy, midx;
3487 for (iy = i + 1; iy < 4; ++iy)
3488 for (midx = 0; midx < INTER_MODES; ++midx)
3489 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3490 bsi->segment_rd = INT64_MAX;
3491 return INT64_MAX;
3492 }
3493
3494 mode_idx = INTER_OFFSET(mode_selected);
3495 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
3496 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
3497
3498 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
3499 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
3500 x->mvcost);
3501
3502 br += bsi->rdstat[i][mode_idx].brate;
3503 bd += bsi->rdstat[i][mode_idx].bdist;
3504 block_sse += bsi->rdstat[i][mode_idx].bsse;
3505 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
3506 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
3507
3508 if (this_segment_rd > bsi->segment_rd) {
3509 int iy, midx;
3510 for (iy = i + 1; iy < 4; ++iy)
3511 for (midx = 0; midx < INTER_MODES; ++midx)
3512 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3513 bsi->segment_rd = INT64_MAX;
3514 return INT64_MAX;
3515 }
3516 }
3517 } /* for each label */
3518
3519 bsi->r = br;
3520 bsi->d = bd;
3521 bsi->segment_yrate = segmentyrate;
3522 bsi->segment_rd = this_segment_rd;
3523 bsi->sse = block_sse;
3524
3525 // update the coding decisions
3526 for (k = 0; k < 4; ++k)
3527 bsi->modes[k] = mi->bmi[k].as_mode;
3528
3529 if (bsi->segment_rd > best_rd)
3530 return INT64_MAX;
3531 /* set it to the best */
3532 for (i = 0; i < 4; i++) {
3533 mode_idx = INTER_OFFSET(bsi->modes[i]);
3534 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
3535 if (has_second_ref(mbmi))
3536 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
3537 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
3538 mi->bmi[i].as_mode = bsi->modes[i];
3539 }
3540
3541 /*
3542 * used to set mbmi->mv.as_int
3543 */
3544 *returntotrate = bsi->r;
3545 *returndistortion = bsi->d;
3546 *returnyrate = bsi->segment_yrate;
3547 *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
3548 *psse = bsi->sse;
3549 mbmi->mode = bsi->modes[3];
3550
3551 return bsi->segment_rd;
3552}
3553
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003554static void estimate_ref_frame_costs(const VP10_COMMON *cm,
Jingning Han3ee6db62015-08-05 19:00:31 -07003555 const MACROBLOCKD *xd,
3556 int segment_id,
3557 unsigned int *ref_costs_single,
3558 unsigned int *ref_costs_comp,
3559 vpx_prob *comp_mode_p) {
3560 int seg_ref_active = segfeature_active(&cm->seg, segment_id,
3561 SEG_LVL_REF_FRAME);
3562 if (seg_ref_active) {
3563 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
3564 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
3565 *comp_mode_p = 128;
3566 } else {
3567 vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
3568 vpx_prob comp_inter_p = 128;
3569
3570 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3571 comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
3572 *comp_mode_p = comp_inter_p;
3573 } else {
3574 *comp_mode_p = 128;
3575 }
3576
3577 ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
3578
3579 if (cm->reference_mode != COMPOUND_REFERENCE) {
3580 vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
3581 vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
3582 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3583
3584 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3585 base_cost += vp10_cost_bit(comp_inter_p, 0);
3586
3587 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
3588 ref_costs_single[ALTREF_FRAME] = base_cost;
3589 ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
3590 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3591 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3592 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
3593 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
3594 } else {
3595 ref_costs_single[LAST_FRAME] = 512;
3596 ref_costs_single[GOLDEN_FRAME] = 512;
3597 ref_costs_single[ALTREF_FRAME] = 512;
3598 }
3599 if (cm->reference_mode != SINGLE_REFERENCE) {
3600 vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
3601 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3602
3603 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3604 base_cost += vp10_cost_bit(comp_inter_p, 1);
3605
3606 ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
3607 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
3608 } else {
3609 ref_costs_comp[LAST_FRAME] = 512;
3610 ref_costs_comp[GOLDEN_FRAME] = 512;
3611 }
3612 }
3613}
3614
3615static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3616 int mode_index,
3617 int64_t comp_pred_diff[REFERENCE_MODES],
3618 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
3619 int skippable) {
3620 MACROBLOCKD *const xd = &x->e_mbd;
3621
3622 // Take a snapshot of the coding context so it can be
3623 // restored if we decide to encode this way
3624 ctx->skip = x->skip;
3625 ctx->skippable = skippable;
3626 ctx->best_mode_index = mode_index;
3627 ctx->mic = *xd->mi[0];
3628 ctx->mbmi_ext = *x->mbmi_ext;
3629 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
3630 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
3631 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
3632
3633 memcpy(ctx->best_filter_diff, best_filter_diff,
3634 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
3635}
3636
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003637static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003638 MV_REFERENCE_FRAME ref_frame,
3639 BLOCK_SIZE block_size,
3640 int mi_row, int mi_col,
3641 int_mv frame_nearest_mv[MAX_REF_FRAMES],
3642 int_mv frame_near_mv[MAX_REF_FRAMES],
3643 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003644 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003645 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
3646 MACROBLOCKD *const xd = &x->e_mbd;
3647 MODE_INFO *const mi = xd->mi[0];
3648 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
3649 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
3650 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3651
3652 assert(yv12 != NULL);
3653
3654 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
3655 // use the UV scaling factors.
3656 vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
3657
3658 // Gets an initial list of candidate vectors from neighbours and orders them
3659 vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
3660 NULL, NULL, mbmi_ext->mode_context);
3661
3662 // Candidate refinement carried out at encoder and decoder
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003663 vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
3664 &frame_nearest_mv[ref_frame],
3665 &frame_near_mv[ref_frame]);
Jingning Han3ee6db62015-08-05 19:00:31 -07003666
3667 // Further refinement that is encode side only to test the top few candidates
3668 // in full and choose the best as the centre point for subsequent searches.
3669 // The current implementation doesn't support scaling.
3670 if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
3671 vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
3672 ref_frame, block_size);
3673}
3674
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003675static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003676 BLOCK_SIZE bsize,
3677 int mi_row, int mi_col,
3678 int_mv *tmp_mv, int *rate_mv) {
3679 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003680 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003681 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3682 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
3683 int bestsme = INT_MAX;
3684 int step_param;
3685 int sadpb = x->sadperbit16;
3686 MV mvp_full;
3687 int ref = mbmi->ref_frame[0];
3688 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3689
3690 int tmp_col_min = x->mv_col_min;
3691 int tmp_col_max = x->mv_col_max;
3692 int tmp_row_min = x->mv_row_min;
3693 int tmp_row_max = x->mv_row_max;
3694 int cost_list[5];
3695
3696 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
3697 ref);
3698
3699 MV pred_mv[3];
3700 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3701 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
3702 pred_mv[2] = x->pred_mv[ref];
3703
3704 if (scaled_ref_frame) {
3705 int i;
3706 // Swap out the reference frame for a version that's been scaled to
3707 // match the resolution of the current frame, allowing the existing
3708 // motion search code to be used without additional modifications.
3709 for (i = 0; i < MAX_MB_PLANE; i++)
3710 backup_yv12[i] = xd->plane[i].pre[0];
3711
3712 vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
3713 }
3714
3715 vp10_set_mv_search_range(x, &ref_mv);
3716
3717 // Work out the size of the first step in the mv step search.
James Zern5e16d392015-08-17 18:19:22 -07003718 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
Jingning Han3ee6db62015-08-05 19:00:31 -07003719 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3720 // Take wtd average of the step_params based on the last frame's
3721 // max mv magnitude and that based on the best ref mvs of the current
3722 // block for the given reference.
3723 step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
3724 cpi->mv_step_param) / 2;
3725 } else {
3726 step_param = cpi->mv_step_param;
3727 }
3728
3729 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
James Zern5e16d392015-08-17 18:19:22 -07003730 int boffset =
3731 2 * (b_width_log2_lookup[BLOCK_64X64] -
3732 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
3733 step_param = VPXMAX(step_param, boffset);
Jingning Han3ee6db62015-08-05 19:00:31 -07003734 }
3735
3736 if (cpi->sf.adaptive_motion_search) {
3737 int bwl = b_width_log2_lookup[bsize];
3738 int bhl = b_height_log2_lookup[bsize];
3739 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
3740
3741 if (tlevel < 5)
3742 step_param += 2;
3743
3744 // prev_mv_sad is not setup for dynamically scaled frames.
3745 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
3746 int i;
3747 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
3748 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
3749 x->pred_mv[ref].row = 0;
3750 x->pred_mv[ref].col = 0;
3751 tmp_mv->as_int = INVALID_MV;
3752
3753 if (scaled_ref_frame) {
3754 int i;
3755 for (i = 0; i < MAX_MB_PLANE; ++i)
3756 xd->plane[i].pre[0] = backup_yv12[i];
3757 }
3758 return;
3759 }
3760 }
3761 }
3762 }
3763
3764 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
3765
3766 mvp_full.col >>= 3;
3767 mvp_full.row >>= 3;
3768
3769 bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
3770 cond_cost_list(cpi, cost_list),
3771 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
3772
3773 x->mv_col_min = tmp_col_min;
3774 x->mv_col_max = tmp_col_max;
3775 x->mv_row_min = tmp_row_min;
3776 x->mv_row_max = tmp_row_max;
3777
3778 if (bestsme < INT_MAX) {
3779 int dis; /* TODO: use dis in distortion calculation later. */
3780 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
3781 cm->allow_high_precision_mv,
3782 x->errorperbit,
3783 &cpi->fn_ptr[bsize],
3784 cpi->sf.mv.subpel_force_stop,
3785 cpi->sf.mv.subpel_iters_per_step,
3786 cond_cost_list(cpi, cost_list),
3787 x->nmvjointcost, x->mvcost,
3788 &dis, &x->pred_sse[ref], NULL, 0, 0);
3789 }
3790 *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
3791 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3792
3793 if (cpi->sf.adaptive_motion_search)
3794 x->pred_mv[ref] = tmp_mv->as_mv;
3795
3796 if (scaled_ref_frame) {
3797 int i;
3798 for (i = 0; i < MAX_MB_PLANE; i++)
3799 xd->plane[i].pre[0] = backup_yv12[i];
3800 }
3801}
3802
3803
3804
3805static INLINE void restore_dst_buf(MACROBLOCKD *xd,
3806 uint8_t *orig_dst[MAX_MB_PLANE],
3807 int orig_dst_stride[MAX_MB_PLANE]) {
3808 int i;
3809 for (i = 0; i < MAX_MB_PLANE; i++) {
3810 xd->plane[i].dst.buf = orig_dst[i];
3811 xd->plane[i].dst.stride = orig_dst_stride[i];
3812 }
3813}
3814
3815// In some situations we want to discount tha pparent cost of a new motion
3816// vector. Where there is a subtle motion field and especially where there is
3817// low spatial complexity then it can be hard to cover the cost of a new motion
3818// vector in a single block, even if that motion vector reduces distortion.
3819// However, once established that vector may be usable through the nearest and
3820// near mv modes to reduce distortion in subsequent blocks and also improve
3821// visual quality.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003822static int discount_newmv_test(const VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003823 int this_mode,
3824 int_mv this_mv,
3825 int_mv (*mode_mv)[MAX_REF_FRAMES],
3826 int ref_frame) {
3827 return (!cpi->rc.is_src_frame_alt_ref &&
3828 (this_mode == NEWMV) &&
3829 (this_mv.as_int != 0) &&
3830 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
3831 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
3832 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
3833 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
3834}
3835
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003836#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
3837#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
3838 VP9_INTERP_EXTEND) << 3)
3839
3840// TODO(jingning): this mv clamping function should be block size dependent.
3841static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
3842 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
3843 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
3844 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
3845 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
3846}
3847
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003848static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003849 BLOCK_SIZE bsize,
3850 int *rate2, int64_t *distortion,
3851 int *skippable,
3852 int *rate_y, int *rate_uv,
3853 int *disable_skip,
3854 int_mv (*mode_mv)[MAX_REF_FRAMES],
3855 int mi_row, int mi_col,
3856 int_mv single_newmv[MAX_REF_FRAMES],
3857 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
3858 int (*single_skippable)[MAX_REF_FRAMES],
3859 int64_t *psse,
3860 const int64_t ref_best_rd,
3861 int64_t *mask_filter,
3862 int64_t filter_cache[]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003863 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003864 MACROBLOCKD *xd = &x->e_mbd;
3865 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3866 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3867 const int is_comp_pred = has_second_ref(mbmi);
3868 const int this_mode = mbmi->mode;
3869 int_mv *frame_mv = mode_mv[this_mode];
3870 int i;
3871 int refs[2] = { mbmi->ref_frame[0],
3872 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3873 int_mv cur_mv[2];
3874#if CONFIG_VP9_HIGHBITDEPTH
3875 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
3876 uint8_t *tmp_buf;
3877#else
3878 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
3879#endif // CONFIG_VP9_HIGHBITDEPTH
3880 int pred_exists = 0;
3881 int intpel_mv;
3882 int64_t rd, tmp_rd, best_rd = INT64_MAX;
3883 int best_needs_copy = 0;
3884 uint8_t *orig_dst[MAX_MB_PLANE];
3885 int orig_dst_stride[MAX_MB_PLANE];
3886 int rs = 0;
3887 INTERP_FILTER best_filter = SWITCHABLE;
3888 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
3889 int64_t bsse[MAX_MB_PLANE << 2] = {0};
3890
3891 int bsl = mi_width_log2_lookup[bsize];
3892 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
3893 (((mi_row + mi_col) >> bsl) +
3894 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
3895
3896 int skip_txfm_sb = 0;
3897 int64_t skip_sse_sb = INT64_MAX;
3898 int64_t distortion_y = 0, distortion_uv = 0;
3899
3900#if CONFIG_VP9_HIGHBITDEPTH
3901 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3902 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
3903 } else {
3904 tmp_buf = (uint8_t *)tmp_buf16;
3905 }
3906#endif // CONFIG_VP9_HIGHBITDEPTH
3907
3908 if (pred_filter_search) {
3909 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
3910 if (xd->up_available)
3911 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
3912 if (xd->left_available)
3913 lf = xd->mi[-1]->mbmi.interp_filter;
3914
3915 if ((this_mode != NEWMV) || (af == lf))
3916 best_filter = af;
3917 }
3918
3919 if (is_comp_pred) {
3920 if (frame_mv[refs[0]].as_int == INVALID_MV ||
3921 frame_mv[refs[1]].as_int == INVALID_MV)
3922 return INT64_MAX;
3923
3924 if (cpi->sf.adaptive_mode_search) {
3925 if (single_filter[this_mode][refs[0]] ==
3926 single_filter[this_mode][refs[1]])
3927 best_filter = single_filter[this_mode][refs[0]];
3928 }
3929 }
3930
3931 if (this_mode == NEWMV) {
3932 int rate_mv;
3933 if (is_comp_pred) {
3934 // Initialize mv using single prediction mode result.
3935 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
3936 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
3937
3938 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
3939 joint_motion_search(cpi, x, bsize, frame_mv,
3940 mi_row, mi_col, single_newmv, &rate_mv);
3941 } else {
3942 rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
3943 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
3944 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3945 rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
3946 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
3947 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3948 }
3949 *rate2 += rate_mv;
3950 } else {
3951 int_mv tmp_mv;
3952 single_motion_search(cpi, x, bsize, mi_row, mi_col,
3953 &tmp_mv, &rate_mv);
3954 if (tmp_mv.as_int == INVALID_MV)
3955 return INT64_MAX;
3956
3957 frame_mv[refs[0]].as_int =
3958 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
3959 single_newmv[refs[0]].as_int = tmp_mv.as_int;
3960
3961 // Estimate the rate implications of a new mv but discount this
3962 // under certain circumstances where we want to help initiate a weak
3963 // motion field, where the distortion gain for a single block may not
3964 // be enough to overcome the cost of a new mv.
3965 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07003966 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07003967 } else {
3968 *rate2 += rate_mv;
3969 }
3970 }
3971 }
3972
3973 for (i = 0; i < is_comp_pred + 1; ++i) {
3974 cur_mv[i] = frame_mv[refs[i]];
3975 // Clip "next_nearest" so that it does not extend to far out of image
3976 if (this_mode != NEWMV)
3977 clamp_mv2(&cur_mv[i].as_mv, xd);
3978
3979 if (mv_check_bounds(x, &cur_mv[i].as_mv))
3980 return INT64_MAX;
3981 mbmi->mv[i].as_int = cur_mv[i].as_int;
3982 }
3983
3984 // do first prediction into the destination buffer. Do the next
3985 // prediction into a temporary buffer. Then keep track of which one
3986 // of these currently holds the best predictor, and use the other
3987 // one for future predictions. In the end, copy from tmp_buf to
3988 // dst if necessary.
3989 for (i = 0; i < MAX_MB_PLANE; i++) {
3990 orig_dst[i] = xd->plane[i].dst.buf;
3991 orig_dst_stride[i] = xd->plane[i].dst.stride;
3992 }
3993
3994 // We don't include the cost of the second reference here, because there
3995 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
3996 // words if you present them in that order, the second one is always known
3997 // if the first is known.
3998 //
3999 // Under some circumstances we discount the cost of new mv mode to encourage
4000 // initiation of a motion field.
4001 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
4002 mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07004003 *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
4004 mbmi_ext->mode_context[refs[0]]),
4005 cost_mv_ref(cpi, NEARESTMV,
4006 mbmi_ext->mode_context[refs[0]]));
Jingning Han3ee6db62015-08-05 19:00:31 -07004007 } else {
4008 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
4009 }
4010
4011 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
4012 mbmi->mode != NEARESTMV)
4013 return INT64_MAX;
4014
4015 pred_exists = 0;
4016 // Are all MVs integer pel for Y and UV
4017 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
4018 if (is_comp_pred)
4019 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
4020
4021 // Search for best switchable filter by checking the variance of
4022 // pred error irrespective of whether the filter will be used
4023 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4024 filter_cache[i] = INT64_MAX;
4025
4026 if (cm->interp_filter != BILINEAR) {
4027 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
4028 best_filter = EIGHTTAP;
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004029#if CONFIG_EXT_INTERP
4030 } else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
4031 best_filter = EIGHTTAP;
4032#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004033 } else if (best_filter == SWITCHABLE) {
4034 int newbest;
4035 int tmp_rate_sum = 0;
4036 int64_t tmp_dist_sum = 0;
4037
4038 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
4039 int j;
4040 int64_t rs_rd;
4041 int tmp_skip_sb = 0;
4042 int64_t tmp_skip_sse = INT64_MAX;
4043
4044 mbmi->interp_filter = i;
4045 rs = vp10_get_switchable_rate(cpi, xd);
4046 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4047
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004048 if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004049 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
4050 filter_cache[i] = rd;
4051 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004052 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004053 if (cm->interp_filter == SWITCHABLE)
4054 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07004055 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004056 } else {
4057 int rate_sum = 0;
4058 int64_t dist_sum = 0;
4059 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
4060 (cpi->sf.interp_filter_search_mask & (1 << i))) {
4061 rate_sum = INT_MAX;
4062 dist_sum = INT64_MAX;
4063 continue;
4064 }
4065
4066 if ((cm->interp_filter == SWITCHABLE &&
4067 (!i || best_needs_copy)) ||
4068 (cm->interp_filter != SWITCHABLE &&
4069 (cm->interp_filter == mbmi->interp_filter ||
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004070 (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004071 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4072 } else {
4073 for (j = 0; j < MAX_MB_PLANE; j++) {
4074 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
4075 xd->plane[j].dst.stride = 64;
4076 }
4077 }
4078 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
4079 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
4080 &tmp_skip_sb, &tmp_skip_sse);
4081
4082 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
4083 filter_cache[i] = rd;
4084 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004085 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004086 if (cm->interp_filter == SWITCHABLE)
4087 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07004088 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004089
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004090 if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004091 tmp_rate_sum = rate_sum;
4092 tmp_dist_sum = dist_sum;
4093 }
4094 }
4095
4096 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
4097 if (rd / 2 > ref_best_rd) {
4098 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4099 return INT64_MAX;
4100 }
4101 }
4102 newbest = i == 0 || rd < best_rd;
4103
4104 if (newbest) {
4105 best_rd = rd;
4106 best_filter = mbmi->interp_filter;
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004107 if (cm->interp_filter == SWITCHABLE && i &&
4108 !(intpel_mv && IsInterpolatingFilter(i)))
Jingning Han3ee6db62015-08-05 19:00:31 -07004109 best_needs_copy = !best_needs_copy;
4110 }
4111
4112 if ((cm->interp_filter == SWITCHABLE && newbest) ||
4113 (cm->interp_filter != SWITCHABLE &&
4114 cm->interp_filter == mbmi->interp_filter)) {
4115 pred_exists = 1;
4116 tmp_rd = best_rd;
4117
4118 skip_txfm_sb = tmp_skip_sb;
4119 skip_sse_sb = tmp_skip_sse;
4120 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
4121 memcpy(bsse, x->bsse, sizeof(bsse));
4122 }
4123 }
4124 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4125 }
4126 }
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004127
Jingning Han3ee6db62015-08-05 19:00:31 -07004128 // Set the appropriate filter
4129 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
4130 cm->interp_filter : best_filter;
4131 rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
4132
4133 if (pred_exists) {
4134 if (best_needs_copy) {
4135 // again temporarily set the buffers to local memory to prevent a memcpy
4136 for (i = 0; i < MAX_MB_PLANE; i++) {
4137 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
4138 xd->plane[i].dst.stride = 64;
4139 }
4140 }
4141 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
4142 } else {
4143 int tmp_rate;
4144 int64_t tmp_dist;
4145 // Handles the special case when a filter that is not in the
4146 // switchable list (ex. bilinear) is indicated at the frame level, or
4147 // skip condition holds.
4148 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
4149 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
4150 &skip_txfm_sb, &skip_sse_sb);
4151 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
4152 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
4153 memcpy(bsse, x->bsse, sizeof(bsse));
4154 }
4155
4156 if (!is_comp_pred)
4157 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
4158
4159 if (cpi->sf.adaptive_mode_search)
4160 if (is_comp_pred)
4161 if (single_skippable[this_mode][refs[0]] &&
4162 single_skippable[this_mode][refs[1]])
4163 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
4164
4165 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
4166 // if current pred_error modeled rd is substantially more than the best
4167 // so far, do not bother doing full rd
4168 if (rd / 2 > ref_best_rd) {
4169 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4170 return INT64_MAX;
4171 }
4172 }
4173
4174 if (cm->interp_filter == SWITCHABLE)
4175 *rate2 += rs;
4176
4177 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
4178 memcpy(x->bsse, bsse, sizeof(bsse));
4179
4180 if (!skip_txfm_sb) {
4181 int skippable_y, skippable_uv;
4182 int64_t sseuv = INT64_MAX;
4183 int64_t rdcosty = INT64_MAX;
4184
4185 // Y cost and distortion
4186 vp10_subtract_plane(x, bsize, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07004187#if CONFIG_VAR_TX
Jingning Hanf0dee772015-10-26 12:32:30 -07004188 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
Jingning Han4b594d32015-11-02 12:05:47 -08004189#if CONFIG_EXT_TX
4190 select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4191 bsize, ref_best_rd);
4192#else
Jingning Han2cdc1272015-10-09 09:57:42 -07004193 inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4194 bsize, ref_best_rd);
Jingning Han4b594d32015-11-02 12:05:47 -08004195#endif
Jingning Han2cdc1272015-10-09 09:57:42 -07004196 } else {
4197 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4198 bsize, ref_best_rd);
4199 for (i = 0; i < 64; ++i)
4200 mbmi->inter_tx_size[i] = mbmi->tx_size;
4201 }
4202#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004203 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
4204 bsize, ref_best_rd);
Jingning Han704985e2015-10-08 12:05:03 -07004205#endif
4206
Jingning Han3ee6db62015-08-05 19:00:31 -07004207 if (*rate_y == INT_MAX) {
4208 *rate2 = INT_MAX;
4209 *distortion = INT64_MAX;
4210 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4211 return INT64_MAX;
4212 }
4213
4214 *rate2 += *rate_y;
4215 *distortion += distortion_y;
4216
4217 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
James Zern5e16d392015-08-17 18:19:22 -07004218 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
Jingning Han3ee6db62015-08-05 19:00:31 -07004219
Jingning Hana8dad552015-10-08 16:46:10 -07004220#if CONFIG_VAR_TX
4221 if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
4222 &sseuv, bsize, ref_best_rd - rdcosty)) {
4223#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004224 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
4225 &sseuv, bsize, ref_best_rd - rdcosty)) {
Jingning Hana8dad552015-10-08 16:46:10 -07004226#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004227 *rate2 = INT_MAX;
4228 *distortion = INT64_MAX;
4229 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4230 return INT64_MAX;
4231 }
4232
4233 *psse += sseuv;
4234 *rate2 += *rate_uv;
4235 *distortion += distortion_uv;
4236 *skippable = skippable_y && skippable_uv;
4237 } else {
4238 x->skip = 1;
4239 *disable_skip = 1;
4240
4241 // The cost of skip bit needs to be added.
4242 *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4243
4244 *distortion = skip_sse_sb;
4245 }
4246
4247 if (!is_comp_pred)
4248 single_skippable[this_mode][refs[0]] = *skippable;
4249
4250 restore_dst_buf(xd, orig_dst, orig_dst_stride);
4251 return 0; // The rate-distortion cost will be re-calculated by caller.
4252}
4253
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004254void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07004255 RD_COST *rd_cost, BLOCK_SIZE bsize,
4256 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004257 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004258 MACROBLOCKD *const xd = &x->e_mbd;
4259 struct macroblockd_plane *const pd = xd->plane;
4260 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
4261 int y_skip = 0, uv_skip = 0;
4262 int64_t dist_y = 0, dist_uv = 0;
4263 TX_SIZE max_uv_tx_size;
Jingning Han3ee6db62015-08-05 19:00:31 -07004264 ctx->skip = 0;
4265 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
4266 xd->mi[0]->mbmi.ref_frame[1] = NONE;
4267
4268 if (bsize >= BLOCK_8X8) {
4269 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
4270 &dist_y, &y_skip, bsize,
4271 best_rd) >= best_rd) {
4272 rd_cost->rate = INT_MAX;
4273 return;
4274 }
4275 } else {
4276 y_skip = 0;
4277 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
4278 &dist_y, best_rd) >= best_rd) {
4279 rd_cost->rate = INT_MAX;
4280 return;
4281 }
4282 }
4283 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
4284 pd[1].subsampling_x,
4285 pd[1].subsampling_y);
4286 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
James Zern5e16d392015-08-17 18:19:22 -07004287 &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
Jingning Han3ee6db62015-08-05 19:00:31 -07004288 max_uv_tx_size);
4289
4290 if (y_skip && uv_skip) {
4291 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
4292 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4293 rd_cost->dist = dist_y + dist_uv;
4294 } else {
4295 rd_cost->rate = rate_y + rate_uv +
4296 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4297 rd_cost->dist = dist_y + dist_uv;
4298 }
4299
4300 ctx->mic = *xd->mi[0];
4301 ctx->mbmi_ext = *x->mbmi_ext;
4302 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
4303}
4304
4305// This function is designed to apply a bias or adjustment to an rd value based
4306// on the relative variance of the source and reconstruction.
4307#define LOW_VAR_THRESH 16
4308#define VLOW_ADJ_MAX 25
4309#define VHIGH_ADJ_MAX 8
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004310static void rd_variance_adjustment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004311 MACROBLOCK *x,
4312 BLOCK_SIZE bsize,
4313 int64_t *this_rd,
4314 MV_REFERENCE_FRAME ref_frame,
4315 unsigned int source_variance) {
4316 MACROBLOCKD *const xd = &x->e_mbd;
4317 unsigned int recon_variance;
4318 unsigned int absvar_diff = 0;
4319 int64_t var_error = 0;
4320 int64_t var_factor = 0;
4321
4322 if (*this_rd == INT64_MAX)
4323 return;
4324
4325#if CONFIG_VP9_HIGHBITDEPTH
4326 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4327 recon_variance =
4328 vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
4329 } else {
4330 recon_variance =
4331 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
4332 }
4333#else
4334 recon_variance =
4335 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
4336#endif // CONFIG_VP9_HIGHBITDEPTH
4337
4338 if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
4339 absvar_diff = (source_variance > recon_variance)
4340 ? (source_variance - recon_variance)
4341 : (recon_variance - source_variance);
4342
4343 var_error = (200 * source_variance * recon_variance) /
4344 ((source_variance * source_variance) +
4345 (recon_variance * recon_variance));
4346 var_error = 100 - var_error;
4347 }
4348
4349 // Source variance above a threshold and ref frame is intra.
4350 // This case is targeted mainly at discouraging intra modes that give rise
4351 // to a predictor with a low spatial complexity compared to the source.
4352 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
4353 (source_variance > recon_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07004354 var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07004355 // A second possible case of interest is where the source variance
4356 // is very low and we wish to discourage false texture or motion trails.
4357 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
4358 (recon_variance > source_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07004359 var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07004360 }
4361 *this_rd += (*this_rd * var_factor) / 100;
4362}
4363
4364
4365// Do we have an internal image edge (e.g. formatting bars).
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004366int vp10_internal_image_edge(VP10_COMP *cpi) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004367 return (cpi->oxcf.pass == 2) &&
4368 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
4369 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
4370}
4371
4372// Checks to see if a super block is on a horizontal image edge.
4373// In most cases this is the "real" edge unless there are formatting
4374// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004375int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004376 int top_edge = 0;
4377 int bottom_edge = cpi->common.mi_rows;
4378 int is_active_h_edge = 0;
4379
4380 // For two pass account for any formatting bars detected.
4381 if (cpi->oxcf.pass == 2) {
4382 TWO_PASS *twopass = &cpi->twopass;
4383
4384 // The inactive region is specified in MBs not mi units.
4385 // The image edge is in the following MB row.
4386 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
4387
4388 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
James Zern5e16d392015-08-17 18:19:22 -07004389 bottom_edge = VPXMAX(top_edge, bottom_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07004390 }
4391
4392 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
4393 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
4394 is_active_h_edge = 1;
4395 }
4396 return is_active_h_edge;
4397}
4398
4399// Checks to see if a super block is on a vertical image edge.
4400// In most cases this is the "real" edge unless there are formatting
4401// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004402int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004403 int left_edge = 0;
4404 int right_edge = cpi->common.mi_cols;
4405 int is_active_v_edge = 0;
4406
4407 // For two pass account for any formatting bars detected.
4408 if (cpi->oxcf.pass == 2) {
4409 TWO_PASS *twopass = &cpi->twopass;
4410
4411 // The inactive region is specified in MBs not mi units.
4412 // The image edge is in the following MB row.
4413 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
4414
4415 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
James Zern5e16d392015-08-17 18:19:22 -07004416 right_edge = VPXMAX(left_edge, right_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07004417 }
4418
4419 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
4420 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
4421 is_active_v_edge = 1;
4422 }
4423 return is_active_v_edge;
4424}
4425
4426// Checks to see if a super block is at the edge of the active image.
4427// In most cases this is the "real" edge unless there are formatting
4428// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004429int vp10_active_edge_sb(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004430 int mi_row, int mi_col) {
4431 return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
4432 vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
4433}
4434
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004435void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
Jingning Han4fa8e732015-09-10 12:24:06 -07004436 TileDataEnc *tile_data,
4437 MACROBLOCK *x,
4438 int mi_row, int mi_col,
4439 RD_COST *rd_cost, BLOCK_SIZE bsize,
4440 PICK_MODE_CONTEXT *ctx,
4441 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004442 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004443 RD_OPT *const rd_opt = &cpi->rd;
4444 SPEED_FEATURES *const sf = &cpi->sf;
4445 MACROBLOCKD *const xd = &x->e_mbd;
4446 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4447 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
4448 const struct segmentation *const seg = &cm->seg;
4449 PREDICTION_MODE this_mode;
4450 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
4451 unsigned char segment_id = mbmi->segment_id;
4452 int comp_pred, i, k;
4453 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
4454 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
4455 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
4456 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
4457 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
4458 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
4459 VP9_ALT_FLAG };
4460 int64_t best_rd = best_rd_so_far;
4461 int64_t best_pred_diff[REFERENCE_MODES];
4462 int64_t best_pred_rd[REFERENCE_MODES];
4463 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
4464 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4465 MB_MODE_INFO best_mbmode;
4466 int best_mode_skippable = 0;
4467 int midx, best_mode_index = -1;
4468 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4469 vpx_prob comp_mode_p;
4470 int64_t best_intra_rd = INT64_MAX;
4471 unsigned int best_pred_sse = UINT_MAX;
4472 PREDICTION_MODE best_intra_mode = DC_PRED;
4473 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
4474 int64_t dist_uv[TX_SIZES];
4475 int skip_uv[TX_SIZES];
4476 PREDICTION_MODE mode_uv[TX_SIZES];
hui sube3559b2015-10-07 09:29:02 -07004477#if CONFIG_EXT_INTRA
4478 EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
4479#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004480 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
4481 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
4482 int best_skip2 = 0;
4483 uint8_t ref_frame_skip_mask[2] = { 0 };
4484 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
4485 int mode_skip_start = sf->mode_skip_start + 1;
4486 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
4487 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
4488 int64_t mode_threshold[MAX_MODES];
4489 int *mode_map = tile_data->mode_map[bsize];
4490 const int mode_search_skip_flags = sf->mode_search_skip_flags;
4491 int64_t mask_filter = 0;
4492 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4493
4494 vp10_zero(best_mbmode);
4495
Jingning Han3ee6db62015-08-05 19:00:31 -07004496 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4497 filter_cache[i] = INT64_MAX;
4498
4499 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4500 &comp_mode_p);
4501
4502 for (i = 0; i < REFERENCE_MODES; ++i)
4503 best_pred_rd[i] = INT64_MAX;
4504 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4505 best_filter_rd[i] = INT64_MAX;
4506 for (i = 0; i < TX_SIZES; i++)
4507 rate_uv_intra[i] = INT_MAX;
4508 for (i = 0; i < MAX_REF_FRAMES; ++i)
4509 x->pred_sse[i] = INT_MAX;
4510 for (i = 0; i < MB_MODE_COUNT; ++i) {
4511 for (k = 0; k < MAX_REF_FRAMES; ++k) {
4512 single_inter_filter[i][k] = SWITCHABLE;
4513 single_skippable[i][k] = 0;
4514 }
4515 }
4516
4517 rd_cost->rate = INT_MAX;
4518
4519 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4520 x->pred_mv_sad[ref_frame] = INT_MAX;
4521 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
4522 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
4523 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4524 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
4525 }
4526 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4527 frame_mv[ZEROMV][ref_frame].as_int = 0;
4528 }
4529
4530 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4531 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
4532 // Skip checking missing references in both single and compound reference
4533 // modes. Note that a mode will be skipped iff both reference frames
4534 // are masked out.
4535 ref_frame_skip_mask[0] |= (1 << ref_frame);
4536 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Jingning Han1eb760e2015-09-10 12:56:41 -07004537 } else {
Jingning Han3ee6db62015-08-05 19:00:31 -07004538 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
4539 // Skip fixed mv modes for poor references
4540 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
4541 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4542 break;
4543 }
4544 }
4545 }
4546 // If the segment reference frame feature is enabled....
4547 // then do nothing if the current ref frame is not allowed..
4548 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4549 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4550 ref_frame_skip_mask[0] |= (1 << ref_frame);
4551 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4552 }
4553 }
4554
4555 // Disable this drop out case if the ref frame
4556 // segment level feature is enabled for this segment. This is to
4557 // prevent the possibility that we end up unable to pick any mode.
4558 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4559 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4560 // unless ARNR filtering is enabled in which case we want
4561 // an unfiltered alternative. We allow near/nearest as well
4562 // because they may result in zero-zero MVs but be cheaper.
4563 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
4564 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
4565 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4566 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4567 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
4568 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
4569 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
4570 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
4571 }
4572 }
4573
4574 if (cpi->rc.is_src_frame_alt_ref) {
4575 if (sf->alt_ref_search_fp) {
4576 mode_skip_mask[ALTREF_FRAME] = 0;
4577 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
4578 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4579 }
4580 }
4581
4582 if (sf->alt_ref_search_fp)
4583 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
4584 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
4585 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
4586
4587 if (sf->adaptive_mode_search) {
4588 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
4589 cpi->rc.frames_since_golden >= 3)
4590 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
4591 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
4592 }
4593
4594 if (bsize > sf->max_intra_bsize) {
4595 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
4596 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
4597 }
4598
4599 mode_skip_mask[INTRA_FRAME] |=
4600 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
4601
4602 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4603 mode_threshold[i] = 0;
4604 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
4605 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
4606
4607 midx = sf->schedule_mode_search ? mode_skip_start : 0;
4608 while (midx > 4) {
4609 uint8_t end_pos = 0;
4610 for (i = 5; i < midx; ++i) {
4611 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
4612 uint8_t tmp = mode_map[i];
4613 mode_map[i] = mode_map[i - 1];
4614 mode_map[i - 1] = tmp;
4615 end_pos = i;
4616 }
4617 }
4618 midx = end_pos;
4619 }
4620
hui su5d011cb2015-09-15 12:44:13 -07004621 mbmi->palette_mode_info.palette_size[0] = 0;
4622 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004623 for (midx = 0; midx < MAX_MODES; ++midx) {
4624 int mode_index = mode_map[midx];
4625 int mode_excluded = 0;
4626 int64_t this_rd = INT64_MAX;
4627 int disable_skip = 0;
4628 int compmode_cost = 0;
4629 int rate2 = 0, rate_y = 0, rate_uv = 0;
4630 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4631 int skippable = 0;
4632 int this_skip2 = 0;
4633 int64_t total_sse = INT64_MAX;
4634 int early_term = 0;
4635
4636 this_mode = vp10_mode_order[mode_index].mode;
4637 ref_frame = vp10_mode_order[mode_index].ref_frame[0];
4638 second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
4639
4640 // Look at the reference frame of the best mode so far and set the
4641 // skip mask to look at a subset of the remaining modes.
4642 if (midx == mode_skip_start && best_mode_index >= 0) {
4643 switch (best_mbmode.ref_frame[0]) {
4644 case INTRA_FRAME:
4645 break;
4646 case LAST_FRAME:
4647 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
4648 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4649 break;
4650 case GOLDEN_FRAME:
4651 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
4652 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4653 break;
4654 case ALTREF_FRAME:
4655 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
4656 break;
4657 case NONE:
4658 case MAX_REF_FRAMES:
4659 assert(0 && "Invalid Reference frame");
4660 break;
4661 }
4662 }
4663
4664 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004665 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004666 continue;
4667
4668 if (mode_skip_mask[ref_frame] & (1 << this_mode))
4669 continue;
4670
4671 // Test best rd so far against threshold for trying this mode.
4672 if (best_mode_skippable && sf->schedule_mode_search)
4673 mode_threshold[mode_index] <<= 1;
4674
4675 if (best_rd < mode_threshold[mode_index])
4676 continue;
4677
Jingning Han3ee6db62015-08-05 19:00:31 -07004678 comp_pred = second_ref_frame > INTRA_FRAME;
4679 if (comp_pred) {
4680 if (!cpi->allow_comp_inter_inter)
4681 continue;
4682
4683 // Skip compound inter modes if ARF is not available.
4684 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4685 continue;
4686
4687 // Do not allow compound prediction if the segment level reference frame
4688 // feature is in use as in this case there can only be one reference.
4689 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4690 continue;
4691
4692 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4693 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
4694 continue;
4695
4696 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4697 } else {
4698 if (ref_frame != INTRA_FRAME)
4699 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4700 }
4701
4702 if (ref_frame == INTRA_FRAME) {
4703 if (sf->adaptive_mode_search)
4704 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
4705 continue;
4706
4707 if (this_mode != DC_PRED) {
4708 // Disable intra modes other than DC_PRED for blocks with low variance
4709 // Threshold for intra skipping based on source variance
4710 // TODO(debargha): Specialize the threshold for super block sizes
4711 const unsigned int skip_intra_var_thresh = 64;
4712 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4713 x->source_variance < skip_intra_var_thresh)
4714 continue;
4715 // Only search the oblique modes if the best so far is
4716 // one of the neighboring directional modes
4717 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
4718 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
4719 if (best_mode_index >= 0 &&
4720 best_mbmode.ref_frame[0] > INTRA_FRAME)
4721 continue;
4722 }
4723 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
4724 if (conditional_skipintra(this_mode, best_intra_mode))
4725 continue;
4726 }
4727 }
4728 } else {
4729 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
4730 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
4731 this_mode, ref_frames))
4732 continue;
4733 }
4734
4735 mbmi->mode = this_mode;
4736 mbmi->uv_mode = DC_PRED;
4737 mbmi->ref_frame[0] = ref_frame;
4738 mbmi->ref_frame[1] = second_ref_frame;
hui sube3559b2015-10-07 09:29:02 -07004739#if CONFIG_EXT_INTRA
4740 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
4741 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
4742#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004743 // Evaluate all sub-pel filters irrespective of whether we can use
4744 // them for this frame.
4745 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4746 : cm->interp_filter;
4747 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4748
4749 x->skip = 0;
4750 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4751
4752 // Select prediction reference frames.
4753 for (i = 0; i < MAX_MB_PLANE; i++) {
4754 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4755 if (comp_pred)
4756 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4757 }
4758
4759 if (ref_frame == INTRA_FRAME) {
4760 TX_SIZE uv_tx;
4761 struct macroblockd_plane *const pd = &xd->plane[1];
4762 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4763 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
4764 NULL, bsize, best_rd);
hui sube3559b2015-10-07 09:29:02 -07004765#if CONFIG_EXT_INTRA
4766 // TODO(huisu): ext-intra is turned off in lossless mode for now to
4767 // avoid a unit test failure
4768 if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id]) {
4769 MB_MODE_INFO mbmi_copy = *mbmi;
4770 int rate_dummy;
4771
4772 if (rate_y != INT_MAX) {
4773 int this_rate = rate_y + cpi->mbmode_cost[mbmi->mode] +
4774 vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
4775 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
4776 } else {
4777 this_rd = best_rd;
4778 }
4779
4780 if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
4781 &skippable, bsize,
4782 cpi->mbmode_cost[mbmi->mode], &this_rd))
4783 *mbmi = mbmi_copy;
4784 }
4785#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004786 if (rate_y == INT_MAX)
4787 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07004788 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
4789 pd->subsampling_y);
4790 if (rate_uv_intra[uv_tx] == INT_MAX) {
4791 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
4792 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
4793 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
hui sube3559b2015-10-07 09:29:02 -07004794#if CONFIG_EXT_INTRA
4795 ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
4796#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004797 }
4798
4799 rate_uv = rate_uv_tokenonly[uv_tx];
4800 distortion_uv = dist_uv[uv_tx];
4801 skippable = skippable && skip_uv[uv_tx];
4802 mbmi->uv_mode = mode_uv[uv_tx];
hui sube3559b2015-10-07 09:29:02 -07004803#if CONFIG_EXT_INTRA
4804 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
4805 ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
4806 if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
4807 mbmi->ext_intra_mode_info.ext_intra_mode[1] =
4808 ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
4809 mbmi->ext_intra_mode_info.ext_intra_angle[1] =
4810 ext_intra_mode_info_uv[uv_tx].ext_intra_angle[1];
4811 }
4812#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004813
4814 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
hui sube3559b2015-10-07 09:29:02 -07004815#if CONFIG_EXT_INTRA
4816 if (mbmi->mode == DC_PRED) {
4817 rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
4818 mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
4819 if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
4820 EXT_INTRA_MODE ext_intra_mode =
4821 mbmi->ext_intra_mode_info.ext_intra_mode[0];
4822 int angle = mbmi->ext_intra_mode_info.ext_intra_angle[0];
4823 if (!DR_ONLY)
4824 rate2 += vp10_cost_bit(DR_EXT_INTRA_PROB,
4825 ext_intra_mode > FILTER_TM_PRED);
4826 if (ext_intra_mode > FILTER_TM_PRED)
4827 rate2 += write_uniform_cost(EXT_INTRA_ANGLES, angle);
4828 else
4829 rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
4830 }
4831 }
4832#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07004833 if (this_mode != DC_PRED && this_mode != TM_PRED)
4834 rate2 += intra_cost_penalty;
4835 distortion2 = distortion_y + distortion_uv;
4836 } else {
4837 this_rd = handle_inter_mode(cpi, x, bsize,
4838 &rate2, &distortion2, &skippable,
4839 &rate_y, &rate_uv,
4840 &disable_skip, frame_mv,
4841 mi_row, mi_col,
4842 single_newmv, single_inter_filter,
4843 single_skippable, &total_sse, best_rd,
4844 &mask_filter, filter_cache);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004845
Jingning Han3ee6db62015-08-05 19:00:31 -07004846 if (this_rd == INT64_MAX)
4847 continue;
4848
4849 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4850
4851 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4852 rate2 += compmode_cost;
4853 }
4854
4855 // Estimate the reference frame signaling cost and add it
4856 // to the rolling cost variable.
4857 if (comp_pred) {
4858 rate2 += ref_costs_comp[ref_frame];
4859 } else {
4860 rate2 += ref_costs_single[ref_frame];
4861 }
4862
4863 if (!disable_skip) {
4864 if (skippable) {
4865 // Back out the coefficient coding costs
4866 rate2 -= (rate_y + rate_uv);
4867
4868 // Cost the skip mb case
4869 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07004870
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04004871 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004872 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4873 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4874 // Add in the cost of the no skip flag.
4875 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4876 } else {
4877 // FIXME(rbultje) make this work for splitmv also
4878 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4879 distortion2 = total_sse;
4880 assert(total_sse >= 0);
4881 rate2 -= (rate_y + rate_uv);
4882 this_skip2 = 1;
4883 }
4884 } else {
4885 // Add in the cost of the no skip flag.
4886 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4887 }
4888
4889 // Calculate the final RD estimate for this mode.
4890 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4891 }
4892
4893 // Apply an adjustment to the rd value based on the similarity of the
4894 // source variance and reconstructed variance.
4895 rd_variance_adjustment(cpi, x, bsize, &this_rd,
4896 ref_frame, x->source_variance);
4897
4898 if (ref_frame == INTRA_FRAME) {
4899 // Keep record of best intra rd
4900 if (this_rd < best_intra_rd) {
4901 best_intra_rd = this_rd;
4902 best_intra_mode = mbmi->mode;
4903 }
4904 }
4905
4906 if (!disable_skip && ref_frame == INTRA_FRAME) {
4907 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07004908 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004909 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07004910 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004911 }
4912
4913 // Did this mode help.. i.e. is it the new best mode
4914 if (this_rd < best_rd || x->skip) {
4915 int max_plane = MAX_MB_PLANE;
4916 if (!mode_excluded) {
4917 // Note index of best mode so far
4918 best_mode_index = mode_index;
4919
4920 if (ref_frame == INTRA_FRAME) {
4921 /* required for left and above block mv */
4922 mbmi->mv[0].as_int = 0;
4923 max_plane = 1;
4924 } else {
4925 best_pred_sse = x->pred_sse[ref_frame];
4926 }
4927
4928 rd_cost->rate = rate2;
4929 rd_cost->dist = distortion2;
4930 rd_cost->rdcost = this_rd;
4931 best_rd = this_rd;
4932 best_mbmode = *mbmi;
4933 best_skip2 = this_skip2;
4934 best_mode_skippable = skippable;
4935
4936 if (!x->select_tx_size)
4937 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07004938
4939#if CONFIG_VAR_TX
4940 for (i = 0; i < MAX_MB_PLANE; ++i)
4941 memcpy(ctx->blk_skip[i], x->blk_skip[i],
4942 sizeof(uint8_t) * ctx->num_4x4_blk);
4943#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004944 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
hui su088b05f2015-08-12 10:41:51 -07004945 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07004946#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004947
4948 // TODO(debargha): enhance this test with a better distortion prediction
4949 // based on qp, activity mask and history
4950 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4951 (mode_index > MIN_EARLY_TERM_INDEX)) {
4952 int qstep = xd->plane[0].dequant[1];
4953 // TODO(debargha): Enhance this by specializing for each mode_index
4954 int scale = 4;
4955#if CONFIG_VP9_HIGHBITDEPTH
4956 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4957 qstep >>= (xd->bd - 8);
4958 }
4959#endif // CONFIG_VP9_HIGHBITDEPTH
4960 if (x->source_variance < UINT_MAX) {
4961 const int var_adjust = (x->source_variance < 16);
4962 scale -= var_adjust;
4963 }
4964 if (ref_frame > INTRA_FRAME &&
4965 distortion2 * scale < qstep * qstep) {
4966 early_term = 1;
4967 }
4968 }
4969 }
4970 }
4971
4972 /* keep record of best compound/single-only prediction */
4973 if (!disable_skip && ref_frame != INTRA_FRAME) {
4974 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4975
4976 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4977 single_rate = rate2 - compmode_cost;
4978 hybrid_rate = rate2;
4979 } else {
4980 single_rate = rate2;
4981 hybrid_rate = rate2 + compmode_cost;
4982 }
4983
4984 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4985 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4986
4987 if (!comp_pred) {
4988 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
4989 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4990 } else {
4991 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
4992 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4993 }
4994 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4995 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4996
4997 /* keep record of best filter type */
4998 if (!mode_excluded && cm->interp_filter != BILINEAR) {
4999 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5000 SWITCHABLE_FILTERS : cm->interp_filter];
5001
5002 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5003 int64_t adj_rd;
5004 if (ref == INT64_MAX)
5005 adj_rd = 0;
5006 else if (filter_cache[i] == INT64_MAX)
5007 // when early termination is triggered, the encoder does not have
5008 // access to the rate-distortion cost. it only knows that the cost
5009 // should be above the maximum valid value. hence it takes the known
5010 // maximum plus an arbitrary constant as the rate-distortion cost.
5011 adj_rd = mask_filter - ref + 10;
5012 else
5013 adj_rd = filter_cache[i] - ref;
5014
5015 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005016 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005017 }
5018 }
5019 }
5020
5021 if (early_term)
5022 break;
5023
5024 if (x->skip && !comp_pred)
5025 break;
5026 }
5027
5028 // The inter modes' rate costs are not calculated precisely in some cases.
5029 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
5030 // ZEROMV. Here, checks are added for those cases, and the mode decisions
5031 // are corrected.
5032 if (best_mbmode.mode == NEWMV) {
5033 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
5034 best_mbmode.ref_frame[1]};
5035 int comp_pred_mode = refs[1] > INTRA_FRAME;
5036
5037 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
5038 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
5039 best_mbmode.mv[1].as_int) || !comp_pred_mode))
5040 best_mbmode.mode = NEARESTMV;
5041 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
5042 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
5043 best_mbmode.mv[1].as_int) || !comp_pred_mode))
5044 best_mbmode.mode = NEARMV;
5045 else if (best_mbmode.mv[0].as_int == 0 &&
5046 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
5047 best_mbmode.mode = ZEROMV;
5048 }
5049
5050 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
5051 rd_cost->rate = INT_MAX;
5052 rd_cost->rdcost = INT64_MAX;
5053 return;
5054 }
5055
5056 // If we used an estimate for the uv intra rd in the loop above...
5057 if (sf->use_uv_intra_rd_estimate) {
5058 // Do Intra UV best rd mode selection if best mode choice above was intra.
5059 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5060 TX_SIZE uv_tx_size;
5061 *mbmi = best_mbmode;
5062 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
5063 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
5064 &rate_uv_tokenonly[uv_tx_size],
5065 &dist_uv[uv_tx_size],
5066 &skip_uv[uv_tx_size],
5067 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
5068 uv_tx_size);
5069 }
5070 }
5071
5072 assert((cm->interp_filter == SWITCHABLE) ||
5073 (cm->interp_filter == best_mbmode.interp_filter) ||
5074 !is_inter_block(&best_mbmode));
5075
5076 if (!cpi->rc.is_src_frame_alt_ref)
5077 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5078 sf->adaptive_rd_thresh, bsize, best_mode_index);
5079
5080 // macroblock modes
5081 *mbmi = best_mbmode;
5082 x->skip |= best_skip2;
5083
5084 for (i = 0; i < REFERENCE_MODES; ++i) {
5085 if (best_pred_rd[i] == INT64_MAX)
5086 best_pred_diff[i] = INT_MIN;
5087 else
5088 best_pred_diff[i] = best_rd - best_pred_rd[i];
5089 }
5090
5091 if (!x->skip) {
5092 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5093 if (best_filter_rd[i] == INT64_MAX)
5094 best_filter_diff[i] = 0;
5095 else
5096 best_filter_diff[i] = best_rd - best_filter_rd[i];
5097 }
5098 if (cm->interp_filter == SWITCHABLE)
5099 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5100 } else {
5101 vp10_zero(best_filter_diff);
5102 }
5103
5104 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
5105 // updating code causes PSNR loss. Need to figure out the confliction.
5106 x->skip |= best_mode_skippable;
5107
5108 if (!x->skip && !x->select_tx_size) {
5109 int has_high_freq_coeff = 0;
5110 int plane;
5111 int max_plane = is_inter_block(&xd->mi[0]->mbmi)
5112 ? MAX_MB_PLANE : 1;
5113 for (plane = 0; plane < max_plane; ++plane) {
5114 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
5115 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
5116 }
5117
5118 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
5119 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
5120 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
5121 }
5122
5123 best_mode_skippable |= !has_high_freq_coeff;
5124 }
5125
5126 assert(best_mode_index >= 0);
5127
5128 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
5129 best_filter_diff, best_mode_skippable);
5130}
5131
Yaowu Xu26a9afc2015-08-13 09:42:27 -07005132void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07005133 TileDataEnc *tile_data,
5134 MACROBLOCK *x,
5135 RD_COST *rd_cost,
5136 BLOCK_SIZE bsize,
5137 PICK_MODE_CONTEXT *ctx,
5138 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07005139 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07005140 MACROBLOCKD *const xd = &x->e_mbd;
5141 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5142 unsigned char segment_id = mbmi->segment_id;
5143 const int comp_pred = 0;
5144 int i;
5145 int64_t best_pred_diff[REFERENCE_MODES];
5146 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
5147 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
5148 vpx_prob comp_mode_p;
5149 INTERP_FILTER best_filter = SWITCHABLE;
5150 int64_t this_rd = INT64_MAX;
5151 int rate2 = 0;
5152 const int64_t distortion2 = 0;
5153
Jingning Han3ee6db62015-08-05 19:00:31 -07005154 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
5155 &comp_mode_p);
5156
5157 for (i = 0; i < MAX_REF_FRAMES; ++i)
5158 x->pred_sse[i] = INT_MAX;
5159 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
5160 x->pred_mv_sad[i] = INT_MAX;
5161
5162 rd_cost->rate = INT_MAX;
5163
5164 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5165
hui su5d011cb2015-09-15 12:44:13 -07005166 mbmi->palette_mode_info.palette_size[0] = 0;
5167 mbmi->palette_mode_info.palette_size[1] = 0;
hui sube3559b2015-10-07 09:29:02 -07005168#if CONFIG_EXT_INTRA
5169 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
5170 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
5171#endif // CONFIG_EXT_INTRA
Jingning Han3ee6db62015-08-05 19:00:31 -07005172 mbmi->mode = ZEROMV;
5173 mbmi->uv_mode = DC_PRED;
5174 mbmi->ref_frame[0] = LAST_FRAME;
5175 mbmi->ref_frame[1] = NONE;
5176 mbmi->mv[0].as_int = 0;
5177 x->skip = 1;
5178
5179 if (cm->interp_filter != BILINEAR) {
5180 best_filter = EIGHTTAP;
5181 if (cm->interp_filter == SWITCHABLE &&
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005182#if CONFIG_EXT_INTERP
5183 vp10_is_interp_needed(xd) &&
5184#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005185 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
5186 int rs;
5187 int best_rs = INT_MAX;
5188 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5189 mbmi->interp_filter = i;
5190 rs = vp10_get_switchable_rate(cpi, xd);
5191 if (rs < best_rs) {
5192 best_rs = rs;
5193 best_filter = mbmi->interp_filter;
5194 }
5195 }
5196 }
5197 }
5198 // Set the appropriate filter
5199 if (cm->interp_filter == SWITCHABLE) {
5200 mbmi->interp_filter = best_filter;
5201 rate2 += vp10_get_switchable_rate(cpi, xd);
5202 } else {
5203 mbmi->interp_filter = cm->interp_filter;
5204 }
5205
5206 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5207 rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
5208
5209 // Estimate the reference frame signaling cost and add it
5210 // to the rolling cost variable.
5211 rate2 += ref_costs_single[LAST_FRAME];
5212 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5213
5214 rd_cost->rate = rate2;
5215 rd_cost->dist = distortion2;
5216 rd_cost->rdcost = this_rd;
5217
5218 if (this_rd >= best_rd_so_far) {
5219 rd_cost->rate = INT_MAX;
5220 rd_cost->rdcost = INT64_MAX;
5221 return;
5222 }
5223
5224 assert((cm->interp_filter == SWITCHABLE) ||
5225 (cm->interp_filter == mbmi->interp_filter));
5226
5227 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5228 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
5229
5230 vp10_zero(best_pred_diff);
5231 vp10_zero(best_filter_diff);
5232
5233 if (!x->select_tx_size)
5234 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
5235 store_coding_context(x, ctx, THR_ZEROMV,
5236 best_pred_diff, best_filter_diff, 0);
5237}
5238
Yaowu Xu26a9afc2015-08-13 09:42:27 -07005239void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07005240 TileDataEnc *tile_data,
5241 MACROBLOCK *x,
5242 int mi_row, int mi_col,
5243 RD_COST *rd_cost,
5244 BLOCK_SIZE bsize,
5245 PICK_MODE_CONTEXT *ctx,
5246 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07005247 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07005248 RD_OPT *const rd_opt = &cpi->rd;
5249 SPEED_FEATURES *const sf = &cpi->sf;
5250 MACROBLOCKD *const xd = &x->e_mbd;
5251 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5252 const struct segmentation *const seg = &cm->seg;
5253 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
5254 unsigned char segment_id = mbmi->segment_id;
5255 int comp_pred, i;
5256 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
5257 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
5258 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
5259 VP9_ALT_FLAG };
5260 int64_t best_rd = best_rd_so_far;
5261 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
5262 int64_t best_pred_diff[REFERENCE_MODES];
5263 int64_t best_pred_rd[REFERENCE_MODES];
5264 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
5265 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
5266 MB_MODE_INFO best_mbmode;
5267 int ref_index, best_ref_index = 0;
5268 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
5269 vpx_prob comp_mode_p;
5270 INTERP_FILTER tmp_best_filter = SWITCHABLE;
5271 int rate_uv_intra, rate_uv_tokenonly;
5272 int64_t dist_uv;
5273 int skip_uv;
5274 PREDICTION_MODE mode_uv = DC_PRED;
5275 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
5276 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
5277 int_mv seg_mvs[4][MAX_REF_FRAMES];
5278 b_mode_info best_bmodes[4];
5279 int best_skip2 = 0;
5280 int ref_frame_skip_mask[2] = { 0 };
5281 int64_t mask_filter = 0;
5282 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
5283 int internal_active_edge =
5284 vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
5285
Jingning Han3ee6db62015-08-05 19:00:31 -07005286 memset(x->zcoeff_blk[TX_4X4], 0, 4);
5287 vp10_zero(best_mbmode);
5288
hui sube3559b2015-10-07 09:29:02 -07005289#if CONFIG_EXT_INTRA
5290 mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
5291 mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
5292#endif // CONFIG_EXT_INTRA
5293
Jingning Han3ee6db62015-08-05 19:00:31 -07005294 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
5295 filter_cache[i] = INT64_MAX;
5296
5297 for (i = 0; i < 4; i++) {
5298 int j;
5299 for (j = 0; j < MAX_REF_FRAMES; j++)
5300 seg_mvs[i][j].as_int = INVALID_MV;
5301 }
5302
5303 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
5304 &comp_mode_p);
5305
5306 for (i = 0; i < REFERENCE_MODES; ++i)
5307 best_pred_rd[i] = INT64_MAX;
5308 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
5309 best_filter_rd[i] = INT64_MAX;
5310 rate_uv_intra = INT_MAX;
5311
5312 rd_cost->rate = INT_MAX;
5313
5314 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
5315 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
5316 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
5317 frame_mv[NEARESTMV], frame_mv[NEARMV],
5318 yv12_mb);
5319 } else {
5320 ref_frame_skip_mask[0] |= (1 << ref_frame);
5321 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5322 }
5323 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
5324 frame_mv[ZEROMV][ref_frame].as_int = 0;
5325 }
5326
hui su5d011cb2015-09-15 12:44:13 -07005327 mbmi->palette_mode_info.palette_size[0] = 0;
5328 mbmi->palette_mode_info.palette_size[1] = 0;
5329
Jingning Han3ee6db62015-08-05 19:00:31 -07005330 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
5331 int mode_excluded = 0;
5332 int64_t this_rd = INT64_MAX;
5333 int disable_skip = 0;
5334 int compmode_cost = 0;
5335 int rate2 = 0, rate_y = 0, rate_uv = 0;
5336 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
5337 int skippable = 0;
5338 int i;
5339 int this_skip2 = 0;
5340 int64_t total_sse = INT_MAX;
5341 int early_term = 0;
5342
5343 ref_frame = vp10_ref_order[ref_index].ref_frame[0];
5344 second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
5345
5346 // Look at the reference frame of the best mode so far and set the
5347 // skip mask to look at a subset of the remaining modes.
5348 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
5349 if (ref_index == 3) {
5350 switch (best_mbmode.ref_frame[0]) {
5351 case INTRA_FRAME:
5352 break;
5353 case LAST_FRAME:
5354 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
5355 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5356 break;
5357 case GOLDEN_FRAME:
5358 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
5359 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
5360 break;
5361 case ALTREF_FRAME:
5362 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
5363 break;
5364 case NONE:
5365 case MAX_REF_FRAMES:
5366 assert(0 && "Invalid Reference frame");
5367 break;
5368 }
5369 }
5370 }
5371
5372 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07005373 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07005374 continue;
5375
5376 // Test best rd so far against threshold for trying this mode.
5377 if (!internal_active_edge &&
5378 rd_less_than_thresh(best_rd,
5379 rd_opt->threshes[segment_id][bsize][ref_index],
5380 tile_data->thresh_freq_fact[bsize][ref_index]))
5381 continue;
5382
5383 comp_pred = second_ref_frame > INTRA_FRAME;
5384 if (comp_pred) {
5385 if (!cpi->allow_comp_inter_inter)
5386 continue;
5387 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
5388 continue;
5389 // Do not allow compound prediction if the segment level reference frame
5390 // feature is in use as in this case there can only be one reference.
5391 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
5392 continue;
5393
5394 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
5395 best_mbmode.ref_frame[0] == INTRA_FRAME)
5396 continue;
5397 }
5398
5399 // TODO(jingning, jkoleszar): scaling reference frame not supported for
5400 // sub8x8 blocks.
5401 if (ref_frame > INTRA_FRAME &&
5402 vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
5403 continue;
5404
5405 if (second_ref_frame > INTRA_FRAME &&
5406 vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
5407 continue;
5408
5409 if (comp_pred)
5410 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
5411 else if (ref_frame != INTRA_FRAME)
5412 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
5413
5414 // If the segment reference frame feature is enabled....
5415 // then do nothing if the current ref frame is not allowed..
5416 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
5417 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
5418 continue;
5419 // Disable this drop out case if the ref frame
5420 // segment level feature is enabled for this segment. This is to
5421 // prevent the possibility that we end up unable to pick any mode.
5422 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5423 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
5424 // unless ARNR filtering is enabled in which case we want
5425 // an unfiltered alternative. We allow near/nearest as well
5426 // because they may result in zero-zero MVs but be cheaper.
5427 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
5428 continue;
5429 }
5430
5431 mbmi->tx_size = TX_4X4;
5432 mbmi->uv_mode = DC_PRED;
5433 mbmi->ref_frame[0] = ref_frame;
5434 mbmi->ref_frame[1] = second_ref_frame;
5435 // Evaluate all sub-pel filters irrespective of whether we can use
5436 // them for this frame.
5437 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
5438 : cm->interp_filter;
5439 x->skip = 0;
5440 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5441
5442 // Select prediction reference frames.
5443 for (i = 0; i < MAX_MB_PLANE; i++) {
5444 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5445 if (comp_pred)
5446 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5447 }
5448
Jingning Han704985e2015-10-08 12:05:03 -07005449#if CONFIG_VAR_TX
5450 for (i = 0; i < 64; ++i)
5451 mbmi->inter_tx_size[i] = mbmi->tx_size;
5452#endif
5453
Jingning Han3ee6db62015-08-05 19:00:31 -07005454 if (ref_frame == INTRA_FRAME) {
5455 int rate;
5456 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
5457 &distortion_y, best_rd) >= best_rd)
5458 continue;
5459 rate2 += rate;
5460 rate2 += intra_cost_penalty;
5461 distortion2 += distortion_y;
5462
5463 if (rate_uv_intra == INT_MAX) {
5464 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4,
5465 &rate_uv_intra,
5466 &rate_uv_tokenonly,
5467 &dist_uv, &skip_uv,
5468 &mode_uv);
5469 }
5470 rate2 += rate_uv_intra;
5471 rate_uv = rate_uv_tokenonly;
5472 distortion2 += dist_uv;
5473 distortion_uv = dist_uv;
5474 mbmi->uv_mode = mode_uv;
5475 } else {
5476 int rate;
5477 int64_t distortion;
5478 int64_t this_rd_thresh;
5479 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
5480 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
5481 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
5482 int tmp_best_skippable = 0;
5483 int switchable_filter_index;
5484 int_mv *second_ref = comp_pred ?
5485 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
5486 b_mode_info tmp_best_bmodes[16];
5487 MB_MODE_INFO tmp_best_mbmode;
5488 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
5489 int pred_exists = 0;
5490 int uv_skippable;
5491
5492 this_rd_thresh = (ref_frame == LAST_FRAME) ?
5493 rd_opt->threshes[segment_id][bsize][THR_LAST] :
5494 rd_opt->threshes[segment_id][bsize][THR_ALTR];
5495 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
5496 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
5497 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
5498 filter_cache[i] = INT64_MAX;
5499
5500 if (cm->interp_filter != BILINEAR) {
5501 tmp_best_filter = EIGHTTAP;
5502 if (x->source_variance < sf->disable_filter_search_var_thresh) {
5503 tmp_best_filter = EIGHTTAP;
5504 } else if (sf->adaptive_pred_interp_filter == 1 &&
5505 ctx->pred_interp_filter < SWITCHABLE) {
5506 tmp_best_filter = ctx->pred_interp_filter;
5507 } else if (sf->adaptive_pred_interp_filter == 2) {
5508 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
5509 ctx->pred_interp_filter : 0;
5510 } else {
5511 for (switchable_filter_index = 0;
5512 switchable_filter_index < SWITCHABLE_FILTERS;
5513 ++switchable_filter_index) {
5514 int newbest, rs;
5515 int64_t rs_rd;
5516 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
5517 mbmi->interp_filter = switchable_filter_index;
5518 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
5519 &mbmi_ext->ref_mvs[ref_frame][0],
5520 second_ref, best_yrd, &rate,
5521 &rate_y, &distortion,
5522 &skippable, &total_sse,
5523 (int) this_rd_thresh, seg_mvs,
5524 bsi, switchable_filter_index,
5525 mi_row, mi_col);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005526#if CONFIG_EXT_INTERP
5527 if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
5528 mbmi->interp_filter != EIGHTTAP) // invalid configuration
5529 continue;
5530#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005531 if (tmp_rd == INT64_MAX)
5532 continue;
5533 rs = vp10_get_switchable_rate(cpi, xd);
5534 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
5535 filter_cache[switchable_filter_index] = tmp_rd;
5536 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07005537 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005538 if (cm->interp_filter == SWITCHABLE)
5539 tmp_rd += rs_rd;
5540
James Zern5e16d392015-08-17 18:19:22 -07005541 mask_filter = VPXMAX(mask_filter, tmp_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005542
5543 newbest = (tmp_rd < tmp_best_rd);
5544 if (newbest) {
5545 tmp_best_filter = mbmi->interp_filter;
5546 tmp_best_rd = tmp_rd;
5547 }
5548 if ((newbest && cm->interp_filter == SWITCHABLE) ||
5549 (mbmi->interp_filter == cm->interp_filter &&
5550 cm->interp_filter != SWITCHABLE)) {
5551 tmp_best_rdu = tmp_rd;
5552 tmp_best_rate = rate;
5553 tmp_best_ratey = rate_y;
5554 tmp_best_distortion = distortion;
5555 tmp_best_sse = total_sse;
5556 tmp_best_skippable = skippable;
5557 tmp_best_mbmode = *mbmi;
5558 for (i = 0; i < 4; i++) {
5559 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
5560 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
5561 }
5562 pred_exists = 1;
5563 if (switchable_filter_index == 0 &&
5564 sf->use_rd_breakout &&
5565 best_rd < INT64_MAX) {
5566 if (tmp_best_rdu / 2 > best_rd) {
5567 // skip searching the other filters if the first is
5568 // already substantially larger than the best so far
5569 tmp_best_filter = mbmi->interp_filter;
5570 tmp_best_rdu = INT64_MAX;
5571 break;
5572 }
5573 }
5574 }
5575 } // switchable_filter_index loop
5576 }
5577 }
5578
5579 if (tmp_best_rdu == INT64_MAX && pred_exists)
5580 continue;
5581
5582 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
5583 tmp_best_filter : cm->interp_filter);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005584
5585
Jingning Han3ee6db62015-08-05 19:00:31 -07005586 if (!pred_exists) {
5587 // Handles the special case when a filter that is not in the
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005588 // switchable list (bilinear) is indicated at the frame level
Jingning Han3ee6db62015-08-05 19:00:31 -07005589 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
5590 &x->mbmi_ext->ref_mvs[ref_frame][0],
5591 second_ref, best_yrd, &rate, &rate_y,
5592 &distortion, &skippable, &total_sse,
5593 (int) this_rd_thresh, seg_mvs, bsi, 0,
5594 mi_row, mi_col);
Debargha Mukherjee85514c42015-10-30 09:19:36 -07005595#if CONFIG_EXT_INTERP
5596 if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
5597 mbmi->interp_filter != EIGHTTAP) {
5598 mbmi->interp_filter = EIGHTTAP;
5599 tmp_rd = rd_pick_best_sub8x8_mode(
5600 cpi, x,
5601 &x->mbmi_ext->ref_mvs[ref_frame][0],
5602 second_ref, best_yrd, &rate, &rate_y,
5603 &distortion, &skippable, &total_sse,
5604 (int) this_rd_thresh, seg_mvs, bsi, 0,
5605 mi_row, mi_col);
5606 }
5607#endif // CONFIG_EXT_INTERP
Jingning Han3ee6db62015-08-05 19:00:31 -07005608 if (tmp_rd == INT64_MAX)
5609 continue;
5610 } else {
5611 total_sse = tmp_best_sse;
5612 rate = tmp_best_rate;
5613 rate_y = tmp_best_ratey;
5614 distortion = tmp_best_distortion;
5615 skippable = tmp_best_skippable;
5616 *mbmi = tmp_best_mbmode;
5617 for (i = 0; i < 4; i++)
5618 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
5619 }
5620
5621 rate2 += rate;
5622 distortion2 += distortion;
5623
5624 if (cm->interp_filter == SWITCHABLE)
5625 rate2 += vp10_get_switchable_rate(cpi, xd);
5626
5627 if (!mode_excluded)
5628 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
5629 : cm->reference_mode == COMPOUND_REFERENCE;
5630
5631 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
5632
5633 tmp_best_rdu = best_rd -
James Zern5e16d392015-08-17 18:19:22 -07005634 VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
5635 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
Jingning Han3ee6db62015-08-05 19:00:31 -07005636
5637 if (tmp_best_rdu > 0) {
5638 // If even the 'Y' rd value of split is higher than best so far
5639 // then dont bother looking at UV
5640 vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
5641 BLOCK_8X8);
5642 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
Jingning Hana8dad552015-10-08 16:46:10 -07005643#if CONFIG_VAR_TX
5644 if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5645 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5646 continue;
5647#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005648 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5649 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5650 continue;
Jingning Hana8dad552015-10-08 16:46:10 -07005651#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005652 rate2 += rate_uv;
5653 distortion2 += distortion_uv;
5654 skippable = skippable && uv_skippable;
5655 total_sse += uv_sse;
5656 }
5657 }
5658
5659 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5660 rate2 += compmode_cost;
5661
5662 // Estimate the reference frame signaling cost and add it
5663 // to the rolling cost variable.
5664 if (second_ref_frame > INTRA_FRAME) {
5665 rate2 += ref_costs_comp[ref_frame];
5666 } else {
5667 rate2 += ref_costs_single[ref_frame];
5668 }
5669
5670 if (!disable_skip) {
5671 // Skip is never coded at the segment level for sub8x8 blocks and instead
5672 // always coded in the bitstream at the mode info level.
5673
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04005674 if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07005675 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
5676 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
5677 // Add in the cost of the no skip flag.
5678 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5679 } else {
5680 // FIXME(rbultje) make this work for splitmv also
5681 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
5682 distortion2 = total_sse;
5683 assert(total_sse >= 0);
5684 rate2 -= (rate_y + rate_uv);
5685 rate_y = 0;
5686 rate_uv = 0;
5687 this_skip2 = 1;
5688 }
5689 } else {
5690 // Add in the cost of the no skip flag.
5691 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5692 }
5693
5694 // Calculate the final RD estimate for this mode.
5695 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5696 }
5697
5698 if (!disable_skip && ref_frame == INTRA_FRAME) {
5699 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07005700 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005701 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07005702 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005703 }
5704
5705 // Did this mode help.. i.e. is it the new best mode
5706 if (this_rd < best_rd || x->skip) {
5707 if (!mode_excluded) {
5708 int max_plane = MAX_MB_PLANE;
5709 // Note index of best mode so far
5710 best_ref_index = ref_index;
5711
5712 if (ref_frame == INTRA_FRAME) {
5713 /* required for left and above block mv */
5714 mbmi->mv[0].as_int = 0;
5715 max_plane = 1;
5716 }
5717
5718 rd_cost->rate = rate2;
5719 rd_cost->dist = distortion2;
5720 rd_cost->rdcost = this_rd;
5721 best_rd = this_rd;
5722 best_yrd = best_rd -
5723 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
5724 best_mbmode = *mbmi;
5725 best_skip2 = this_skip2;
5726 if (!x->select_tx_size)
5727 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005728
5729#if CONFIG_VAR_TX
5730 for (i = 0; i < MAX_MB_PLANE; ++i)
5731 memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
5732#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005733 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
hui su088b05f2015-08-12 10:41:51 -07005734 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005735#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005736
5737 for (i = 0; i < 4; i++)
5738 best_bmodes[i] = xd->mi[0]->bmi[i];
5739
5740 // TODO(debargha): enhance this test with a better distortion prediction
5741 // based on qp, activity mask and history
5742 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
5743 (ref_index > MIN_EARLY_TERM_INDEX)) {
5744 int qstep = xd->plane[0].dequant[1];
5745 // TODO(debargha): Enhance this by specializing for each mode_index
5746 int scale = 4;
5747#if CONFIG_VP9_HIGHBITDEPTH
5748 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5749 qstep >>= (xd->bd - 8);
5750 }
5751#endif // CONFIG_VP9_HIGHBITDEPTH
5752 if (x->source_variance < UINT_MAX) {
5753 const int var_adjust = (x->source_variance < 16);
5754 scale -= var_adjust;
5755 }
5756 if (ref_frame > INTRA_FRAME &&
5757 distortion2 * scale < qstep * qstep) {
5758 early_term = 1;
5759 }
5760 }
5761 }
5762 }
5763
5764 /* keep record of best compound/single-only prediction */
5765 if (!disable_skip && ref_frame != INTRA_FRAME) {
5766 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5767
5768 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5769 single_rate = rate2 - compmode_cost;
5770 hybrid_rate = rate2;
5771 } else {
5772 single_rate = rate2;
5773 hybrid_rate = rate2 + compmode_cost;
5774 }
5775
5776 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
5777 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
5778
5779 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
5780 best_pred_rd[SINGLE_REFERENCE] = single_rd;
5781 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
5782 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5783
5784 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
5785 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5786 }
5787
5788 /* keep record of best filter type */
5789 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
5790 cm->interp_filter != BILINEAR) {
5791 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5792 SWITCHABLE_FILTERS : cm->interp_filter];
5793 int64_t adj_rd;
5794 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5795 if (ref == INT64_MAX)
5796 adj_rd = 0;
5797 else if (filter_cache[i] == INT64_MAX)
5798 // when early termination is triggered, the encoder does not have
5799 // access to the rate-distortion cost. it only knows that the cost
5800 // should be above the maximum valid value. hence it takes the known
5801 // maximum plus an arbitrary constant as the rate-distortion cost.
5802 adj_rd = mask_filter - ref + 10;
5803 else
5804 adj_rd = filter_cache[i] - ref;
5805
5806 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005807 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005808 }
5809 }
5810
5811 if (early_term)
5812 break;
5813
5814 if (x->skip && !comp_pred)
5815 break;
5816 }
5817
5818 if (best_rd >= best_rd_so_far) {
5819 rd_cost->rate = INT_MAX;
5820 rd_cost->rdcost = INT64_MAX;
5821 return;
5822 }
5823
5824 // If we used an estimate for the uv intra rd in the loop above...
5825 if (sf->use_uv_intra_rd_estimate) {
5826 // Do Intra UV best rd mode selection if best mode choice above was intra.
5827 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5828 *mbmi = best_mbmode;
5829 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
5830 &rate_uv_tokenonly,
5831 &dist_uv,
5832 &skip_uv,
5833 BLOCK_8X8, TX_4X4);
5834 }
5835 }
5836
5837 if (best_rd == INT64_MAX) {
5838 rd_cost->rate = INT_MAX;
5839 rd_cost->dist = INT64_MAX;
5840 rd_cost->rdcost = INT64_MAX;
5841 return;
5842 }
5843
5844 assert((cm->interp_filter == SWITCHABLE) ||
5845 (cm->interp_filter == best_mbmode.interp_filter) ||
5846 !is_inter_block(&best_mbmode));
5847
5848 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5849 sf->adaptive_rd_thresh, bsize, best_ref_index);
5850
5851 // macroblock modes
5852 *mbmi = best_mbmode;
5853 x->skip |= best_skip2;
5854 if (!is_inter_block(&best_mbmode)) {
5855 for (i = 0; i < 4; i++)
5856 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
5857 } else {
5858 for (i = 0; i < 4; ++i)
5859 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
5860
5861 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
5862 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
5863 }
5864
5865 for (i = 0; i < REFERENCE_MODES; ++i) {
5866 if (best_pred_rd[i] == INT64_MAX)
5867 best_pred_diff[i] = INT_MIN;
5868 else
5869 best_pred_diff[i] = best_rd - best_pred_rd[i];
5870 }
5871
5872 if (!x->skip) {
5873 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5874 if (best_filter_rd[i] == INT64_MAX)
5875 best_filter_diff[i] = 0;
5876 else
5877 best_filter_diff[i] = best_rd - best_filter_rd[i];
5878 }
5879 if (cm->interp_filter == SWITCHABLE)
5880 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5881 } else {
5882 vp10_zero(best_filter_diff);
5883 }
5884
5885 store_coding_context(x, ctx, best_ref_index,
5886 best_pred_diff, best_filter_diff, 0);
5887}