blob: 8d9393582b8cdde8f8519b5ebbad6bdde81df899 [file] [log] [blame]
Jingning Han3ee6db62015-08-05 19:00:31 -07001/*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <math.h>
13
14#include "./vp10_rtcd.h"
15#include "./vpx_dsp_rtcd.h"
16
Johannc5f11912015-08-31 14:36:35 -070017#include "vpx_dsp/vpx_dsp_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070018#include "vpx_mem/vpx_mem.h"
19#include "vpx_ports/mem.h"
Jingning Han3acfe462015-08-12 09:20:31 -070020#include "vpx_ports/system_state.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070021
Jingning Han54d66ef2015-08-06 21:14:07 -070022#include "vp10/common/common.h"
23#include "vp10/common/entropy.h"
24#include "vp10/common/entropymode.h"
25#include "vp10/common/idct.h"
26#include "vp10/common/mvref_common.h"
27#include "vp10/common/pred_common.h"
28#include "vp10/common/quant_common.h"
29#include "vp10/common/reconinter.h"
30#include "vp10/common/reconintra.h"
31#include "vp10/common/scan.h"
32#include "vp10/common/seg_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070033
Jingning Han54d66ef2015-08-06 21:14:07 -070034#include "vp10/encoder/cost.h"
35#include "vp10/encoder/encodemb.h"
36#include "vp10/encoder/encodemv.h"
37#include "vp10/encoder/encoder.h"
38#include "vp10/encoder/mcomp.h"
hui su5d011cb2015-09-15 12:44:13 -070039#include "vp10/encoder/palette.h"
Jingning Han54d66ef2015-08-06 21:14:07 -070040#include "vp10/encoder/quantize.h"
41#include "vp10/encoder/ratectrl.h"
42#include "vp10/encoder/rd.h"
43#include "vp10/encoder/rdopt.h"
44#include "vp10/encoder/aq_variance.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070045
46#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
47 (1 << INTRA_FRAME))
48#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
49 (1 << INTRA_FRAME))
50#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
51 (1 << INTRA_FRAME))
52
53#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
54
55#define MIN_EARLY_TERM_INDEX 3
56#define NEW_MV_DISCOUNT_FACTOR 8
57
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070058#if CONFIG_EXT_TX
Debargha Mukherjeeb8bc0262015-09-11 08:32:56 -070059const double ext_tx_th = 0.98;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070060#endif
61
Jingning Han3ee6db62015-08-05 19:00:31 -070062typedef struct {
63 PREDICTION_MODE mode;
64 MV_REFERENCE_FRAME ref_frame[2];
65} MODE_DEFINITION;
66
67typedef struct {
68 MV_REFERENCE_FRAME ref_frame[2];
69} REF_DEFINITION;
70
71struct rdcost_block_args {
Jingning Han71c15602015-10-13 12:40:39 -070072#if CONFIG_VAR_TX
73 const VP10_COMP *cpi;
74#endif
Jingning Han3ee6db62015-08-05 19:00:31 -070075 MACROBLOCK *x;
76 ENTROPY_CONTEXT t_above[16];
77 ENTROPY_CONTEXT t_left[16];
78 int this_rate;
79 int64_t this_dist;
80 int64_t this_sse;
81 int64_t this_rd;
82 int64_t best_rd;
83 int exit_early;
84 int use_fast_coef_costing;
85 const scan_order *so;
86 uint8_t skippable;
87};
88
89#define LAST_NEW_MV_INDEX 6
90static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
91 {NEARESTMV, {LAST_FRAME, NONE}},
92 {NEARESTMV, {ALTREF_FRAME, NONE}},
93 {NEARESTMV, {GOLDEN_FRAME, NONE}},
94
95 {DC_PRED, {INTRA_FRAME, NONE}},
96
97 {NEWMV, {LAST_FRAME, NONE}},
98 {NEWMV, {ALTREF_FRAME, NONE}},
99 {NEWMV, {GOLDEN_FRAME, NONE}},
100
101 {NEARMV, {LAST_FRAME, NONE}},
102 {NEARMV, {ALTREF_FRAME, NONE}},
103 {NEARMV, {GOLDEN_FRAME, NONE}},
104
105 {ZEROMV, {LAST_FRAME, NONE}},
106 {ZEROMV, {GOLDEN_FRAME, NONE}},
107 {ZEROMV, {ALTREF_FRAME, NONE}},
108
109 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
110 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
111
112 {TM_PRED, {INTRA_FRAME, NONE}},
113
114 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
115 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
116 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
117 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
118
119 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
120 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
121
122 {H_PRED, {INTRA_FRAME, NONE}},
123 {V_PRED, {INTRA_FRAME, NONE}},
124 {D135_PRED, {INTRA_FRAME, NONE}},
125 {D207_PRED, {INTRA_FRAME, NONE}},
126 {D153_PRED, {INTRA_FRAME, NONE}},
127 {D63_PRED, {INTRA_FRAME, NONE}},
128 {D117_PRED, {INTRA_FRAME, NONE}},
129 {D45_PRED, {INTRA_FRAME, NONE}},
130};
131
132static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
133 {{LAST_FRAME, NONE}},
134 {{GOLDEN_FRAME, NONE}},
135 {{ALTREF_FRAME, NONE}},
136 {{LAST_FRAME, ALTREF_FRAME}},
137 {{GOLDEN_FRAME, ALTREF_FRAME}},
138 {{INTRA_FRAME, NONE}},
139};
140
hui su5d011cb2015-09-15 12:44:13 -0700141static INLINE int write_uniform_cost(int n, int v) {
142 int l = get_unsigned_bits(n), m = (1 << l) - n;
143 if (l == 0)
144 return 0;
145 if (v < m)
146 return (l - 1) * vp10_cost_bit(128, 0);
147 else
148 return l * vp10_cost_bit(128, 0);
149}
150
Jingning Han3ee6db62015-08-05 19:00:31 -0700151static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
152 int m, int n, int min_plane, int max_plane) {
153 int i;
154
155 for (i = min_plane; i < max_plane; ++i) {
156 struct macroblock_plane *const p = &x->plane[i];
157 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
158
159 p->coeff = ctx->coeff_pbuf[i][m];
160 p->qcoeff = ctx->qcoeff_pbuf[i][m];
161 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
162 p->eobs = ctx->eobs_pbuf[i][m];
163
164 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
165 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
166 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
167 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
168
169 ctx->coeff_pbuf[i][n] = p->coeff;
170 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
171 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
172 ctx->eobs_pbuf[i][n] = p->eobs;
173 }
174}
175
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700176static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700177 MACROBLOCK *x, MACROBLOCKD *xd,
178 int *out_rate_sum, int64_t *out_dist_sum,
179 int *skip_txfm_sb, int64_t *skip_sse_sb) {
180 // Note our transform coeffs are 8 times an orthogonal transform.
181 // Hence quantizer step is also 8 times. To get effective quantizer
182 // we need to divide by 8 before sending to modeling function.
183 int i;
184 int64_t rate_sum = 0;
185 int64_t dist_sum = 0;
186 const int ref = xd->mi[0]->mbmi.ref_frame[0];
187 unsigned int sse;
188 unsigned int var = 0;
189 unsigned int sum_sse = 0;
190 int64_t total_sse = 0;
191 int skip_flag = 1;
192 const int shift = 6;
193 int rate;
194 int64_t dist;
195 const int dequant_shift =
196#if CONFIG_VP9_HIGHBITDEPTH
197 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
198 xd->bd - 5 :
199#endif // CONFIG_VP9_HIGHBITDEPTH
200 3;
201
202 x->pred_sse[ref] = 0;
203
204 for (i = 0; i < MAX_MB_PLANE; ++i) {
205 struct macroblock_plane *const p = &x->plane[i];
206 struct macroblockd_plane *const pd = &xd->plane[i];
207 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
208 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
209 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
210 const int64_t dc_thr = p->quant_thred[0] >> shift;
211 const int64_t ac_thr = p->quant_thred[1] >> shift;
212 // The low thresholds are used to measure if the prediction errors are
213 // low enough so that we can skip the mode search.
James Zern5e16d392015-08-17 18:19:22 -0700214 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
215 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700216 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
217 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
218 int idx, idy;
219 int lw = b_width_log2_lookup[unit_size] + 2;
220 int lh = b_height_log2_lookup[unit_size] + 2;
221
222 sum_sse = 0;
223
224 for (idy = 0; idy < bh; ++idy) {
225 for (idx = 0; idx < bw; ++idx) {
226 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
227 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
228 int block_idx = (idy << 1) + idx;
229 int low_err_skip = 0;
230
231 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
232 dst, pd->dst.stride, &sse);
233 x->bsse[(i << 2) + block_idx] = sse;
234 sum_sse += sse;
235
236 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
237 if (!x->select_tx_size) {
238 // Check if all ac coefficients can be quantized to zero.
239 if (var < ac_thr || var == 0) {
240 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
241
242 // Check if dc coefficient can be quantized to zero.
243 if (sse - var < dc_thr || sse == var) {
244 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
245
246 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
247 low_err_skip = 1;
248 }
249 }
250 }
251
252 if (skip_flag && !low_err_skip)
253 skip_flag = 0;
254
255 if (i == 0)
256 x->pred_sse[ref] += sse;
257 }
258 }
259
260 total_sse += sum_sse;
261
262 // Fast approximate the modelling function.
263 if (cpi->sf.simple_model_rd_from_var) {
264 int64_t rate;
265 const int64_t square_error = sum_sse;
266 int quantizer = (pd->dequant[1] >> dequant_shift);
267
268 if (quantizer < 120)
269 rate = (square_error * (280 - quantizer)) >> 8;
270 else
271 rate = 0;
272 dist = (square_error * quantizer) >> 8;
273 rate_sum += rate;
274 dist_sum += dist;
275 } else {
276 vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
277 pd->dequant[1] >> dequant_shift,
278 &rate, &dist);
279 rate_sum += rate;
280 dist_sum += dist;
281 }
282 }
283
284 *skip_txfm_sb = skip_flag;
285 *skip_sse_sb = total_sse << 4;
286 *out_rate_sum = (int)rate_sum;
287 *out_dist_sum = dist_sum << 4;
288}
289
290int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291 intptr_t block_size, int64_t *ssz) {
292 int i;
293 int64_t error = 0, sqcoeff = 0;
294
295 for (i = 0; i < block_size; i++) {
296 const int diff = coeff[i] - dqcoeff[i];
297 error += diff * diff;
298 sqcoeff += coeff[i] * coeff[i];
299 }
300
301 *ssz = sqcoeff;
302 return error;
303}
304
305int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
306 int block_size) {
307 int i;
308 int64_t error = 0;
309
310 for (i = 0; i < block_size; i++) {
311 const int diff = coeff[i] - dqcoeff[i];
312 error += diff * diff;
313 }
314
315 return error;
316}
317
318#if CONFIG_VP9_HIGHBITDEPTH
319int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
320 const tran_low_t *dqcoeff,
321 intptr_t block_size,
322 int64_t *ssz, int bd) {
323 int i;
324 int64_t error = 0, sqcoeff = 0;
325 int shift = 2 * (bd - 8);
326 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
327
328 for (i = 0; i < block_size; i++) {
329 const int64_t diff = coeff[i] - dqcoeff[i];
330 error += diff * diff;
331 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
332 }
333 assert(error >= 0 && sqcoeff >= 0);
334 error = (error + rounding) >> shift;
335 sqcoeff = (sqcoeff + rounding) >> shift;
336
337 *ssz = sqcoeff;
338 return error;
339}
340#endif // CONFIG_VP9_HIGHBITDEPTH
341
342/* The trailing '0' is a terminator which is used inside cost_coeffs() to
343 * decide whether to include cost of a trailing EOB node or not (i.e. we
344 * can skip this if the last coefficient in this transform block, e.g. the
345 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
346 * were non-zero). */
347static const int16_t band_counts[TX_SIZES][8] = {
348 { 1, 2, 3, 4, 3, 16 - 13, 0 },
349 { 1, 2, 3, 4, 11, 64 - 21, 0 },
350 { 1, 2, 3, 4, 11, 256 - 21, 0 },
351 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
352};
353static int cost_coeffs(MACROBLOCK *x,
354 int plane, int block,
Jingning Han2cdc1272015-10-09 09:57:42 -0700355#if CONFIG_VAR_TX
356 int coeff_ctx,
357#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700358 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
Jingning Han2cdc1272015-10-09 09:57:42 -0700359#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700360 TX_SIZE tx_size,
361 const int16_t *scan, const int16_t *nb,
362 int use_fast_coef_costing) {
363 MACROBLOCKD *const xd = &x->e_mbd;
364 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
365 const struct macroblock_plane *p = &x->plane[plane];
366 const struct macroblockd_plane *pd = &xd->plane[plane];
367 const PLANE_TYPE type = pd->plane_type;
368 const int16_t *band_count = &band_counts[tx_size][1];
369 const int eob = p->eobs[block];
370 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
371 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
372 x->token_costs[tx_size][type][is_inter_block(mbmi)];
373 uint8_t token_cache[32 * 32];
Jingning Han2cdc1272015-10-09 09:57:42 -0700374#if CONFIG_VAR_TX
375 int pt = coeff_ctx;
376#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700377 int pt = combine_entropy_contexts(*A, *L);
Jingning Han2cdc1272015-10-09 09:57:42 -0700378#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700379 int c, cost;
380#if CONFIG_VP9_HIGHBITDEPTH
381 const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
382#else
383 const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
384#endif
385
Jingning Han2cdc1272015-10-09 09:57:42 -0700386#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700387 // Check for consistency of tx_size with mode info
388 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
389 : get_uv_tx_size(mbmi, pd) == tx_size);
Jingning Han2cdc1272015-10-09 09:57:42 -0700390#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700391
392 if (eob == 0) {
393 // single eob token
394 cost = token_costs[0][0][pt][EOB_TOKEN];
395 c = 0;
396 } else {
397 int band_left = *band_count++;
398
399 // dc token
400 int v = qcoeff[0];
401 int16_t prev_t;
402 EXTRABIT e;
403 vp10_get_token_extra(v, &prev_t, &e);
404 cost = (*token_costs)[0][pt][prev_t] +
405 vp10_get_cost(prev_t, e, cat6_high_cost);
406
407 token_cache[0] = vp10_pt_energy_class[prev_t];
408 ++token_costs;
409
410 // ac tokens
411 for (c = 1; c < eob; c++) {
412 const int rc = scan[c];
413 int16_t t;
414
415 v = qcoeff[rc];
416 vp10_get_token_extra(v, &t, &e);
417 if (use_fast_coef_costing) {
418 cost += (*token_costs)[!prev_t][!prev_t][t] +
419 vp10_get_cost(t, e, cat6_high_cost);
420 } else {
421 pt = get_coef_context(nb, token_cache, c);
422 cost += (*token_costs)[!prev_t][pt][t] +
423 vp10_get_cost(t, e, cat6_high_cost);
424 token_cache[rc] = vp10_pt_energy_class[t];
425 }
426 prev_t = t;
427 if (!--band_left) {
428 band_left = *band_count++;
429 ++token_costs;
430 }
431 }
432
433 // eob token
434 if (band_left) {
435 if (use_fast_coef_costing) {
436 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
437 } else {
438 pt = get_coef_context(nb, token_cache, c);
439 cost += (*token_costs)[0][pt][EOB_TOKEN];
440 }
441 }
442 }
443
Jingning Han2cdc1272015-10-09 09:57:42 -0700444#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700445 // is eob first coefficient;
446 *A = *L = (c > 0);
Jingning Han2cdc1272015-10-09 09:57:42 -0700447#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700448
449 return cost;
450}
451
452static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
453 int64_t *out_dist, int64_t *out_sse) {
454 const int ss_txfrm_size = tx_size << 1;
455 MACROBLOCKD* const xd = &x->e_mbd;
456 const struct macroblock_plane *const p = &x->plane[plane];
457 const struct macroblockd_plane *const pd = &xd->plane[plane];
458 int64_t this_sse;
459 int shift = tx_size == TX_32X32 ? 0 : 2;
460 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
461 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
462#if CONFIG_VP9_HIGHBITDEPTH
463 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
464 *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
465 &this_sse, bd) >> shift;
466#else
467 *out_dist = vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
468 &this_sse) >> shift;
469#endif // CONFIG_VP9_HIGHBITDEPTH
470 *out_sse = this_sse >> shift;
Jingning Han3ee6db62015-08-05 19:00:31 -0700471}
472
Jingning Hanebc48ef2015-10-07 11:43:48 -0700473static int rate_block(int plane, int block, int blk_row, int blk_col,
Jingning Han3ee6db62015-08-05 19:00:31 -0700474 TX_SIZE tx_size, struct rdcost_block_args* args) {
Jingning Han2cdc1272015-10-09 09:57:42 -0700475#if CONFIG_VAR_TX
476 int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
477 *(args->t_left + blk_row));
478 int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
479 tx_size, args->so->scan, args->so->neighbors,
480 args->use_fast_coef_costing);
481 const struct macroblock_plane *p = &args->x->plane[plane];
482 *(args->t_above + blk_col) = !(p->eobs[block] == 0);
483 *(args->t_left + blk_row) = !(p->eobs[block] == 0);
484 return coeff_cost;
485#else
486 return cost_coeffs(args->x, plane, block,
487 args->t_above + blk_col,
488 args->t_left + blk_row,
489 tx_size, args->so->scan, args->so->neighbors,
Jingning Han3ee6db62015-08-05 19:00:31 -0700490 args->use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -0700491#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700492}
493
Jingning Hanebc48ef2015-10-07 11:43:48 -0700494static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
495 BLOCK_SIZE plane_bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700496 TX_SIZE tx_size, void *arg) {
497 struct rdcost_block_args *args = arg;
498 MACROBLOCK *const x = args->x;
499 MACROBLOCKD *const xd = &x->e_mbd;
500 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
501 int64_t rd1, rd2, rd;
502 int rate;
503 int64_t dist;
504 int64_t sse;
505
506 if (args->exit_early)
507 return;
508
509 if (!is_inter_block(mbmi)) {
Jingning Han71c15602015-10-13 12:40:39 -0700510#if CONFIG_VAR_TX
511 struct encode_b_args arg = {x, NULL, &mbmi->skip};
512 uint8_t *dst, *src;
513 int src_stride = x->plane[plane].src.stride;
514 int dst_stride = xd->plane[plane].dst.stride;
515 unsigned int tmp_sse;
516 PREDICTION_MODE mode = (plane == 0) ?
517 get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
518
519#if CONFIG_VP9_HIGHBITDEPTH
520 vp10_encode_block_intra(plane, block, blk_row, blk_col,
521 plane_bsize, tx_size, &arg);
522 dist_block(x, plane, block, tx_size, &dist, &sse);
523#else
524 src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
525 dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
526 vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
527 b_height_log2_lookup[plane_bsize],
528 tx_size, mode, dst, dst_stride,
529 dst, dst_stride, blk_col, blk_row, plane);
530 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
531 dst, dst_stride, &tmp_sse);
532 sse = (int64_t)tmp_sse * 16;
533 vp10_encode_block_intra(plane, block, blk_row, blk_col,
534 plane_bsize, tx_size, &arg);
535 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
536 dst, dst_stride, &tmp_sse);
537 dist = (int64_t)tmp_sse * 16;
538#endif // CONFIG_VP9_HIGHBITDEPTH
539#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700540 struct encode_b_args arg = {x, NULL, &mbmi->skip};
Jingning Hanebc48ef2015-10-07 11:43:48 -0700541 vp10_encode_block_intra(plane, block, blk_row, blk_col,
542 plane_bsize, tx_size, &arg);
Jingning Han3ee6db62015-08-05 19:00:31 -0700543 dist_block(x, plane, block, tx_size, &dist, &sse);
Jingning Han71c15602015-10-13 12:40:39 -0700544#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700545 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
546 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
547 SKIP_TXFM_NONE) {
548 // full forward transform and quantization
Jingning Hancaeb10b2015-10-22 17:25:00 -0700549 vp10_xform_quant(x, plane, block, blk_row, blk_col,
550 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700551 dist_block(x, plane, block, tx_size, &dist, &sse);
552 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
553 SKIP_TXFM_AC_ONLY) {
554 // compute DC coefficient
555 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
556 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Jingning Hanebc48ef2015-10-07 11:43:48 -0700557 vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
558 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700559 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
560 dist = sse;
561 if (x->plane[plane].eobs[block]) {
562 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
563 const int64_t resd_sse = coeff[0] - dqcoeff[0];
564 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
565#if CONFIG_VP9_HIGHBITDEPTH
566 dc_correct >>= ((xd->bd - 8) * 2);
567#endif
568 if (tx_size != TX_32X32)
569 dc_correct >>= 2;
570
James Zern5e16d392015-08-17 18:19:22 -0700571 dist = VPXMAX(0, sse - dc_correct);
Jingning Han3ee6db62015-08-05 19:00:31 -0700572 }
573 } else {
574 // SKIP_TXFM_AC_DC
575 // skip forward transform
576 x->plane[plane].eobs[block] = 0;
577 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
578 dist = sse;
579 }
580 } else {
581 // full forward transform and quantization
Jingning Hanebc48ef2015-10-07 11:43:48 -0700582 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700583 dist_block(x, plane, block, tx_size, &dist, &sse);
584 }
585
586 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
587 if (args->this_rd + rd > args->best_rd) {
588 args->exit_early = 1;
589 return;
590 }
591
Jingning Hanebc48ef2015-10-07 11:43:48 -0700592 rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700593 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
595
596 // TODO(jingning): temporarily enabled only for luma component
James Zern5e16d392015-08-17 18:19:22 -0700597 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700598 if (plane == 0)
599 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400600 (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700601
602 args->this_rate += rate;
603 args->this_dist += dist;
604 args->this_sse += sse;
605 args->this_rd += rd;
606
607 if (args->this_rd > args->best_rd) {
608 args->exit_early = 1;
609 return;
610 }
611
612 args->skippable &= !x->plane[plane].eobs[block];
613}
614
615static void txfm_rd_in_plane(MACROBLOCK *x,
Jingning Han71c15602015-10-13 12:40:39 -0700616#if CONFIG_VAR_TX
617 const VP10_COMP *cpi,
618#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700619 int *rate, int64_t *distortion,
620 int *skippable, int64_t *sse,
621 int64_t ref_best_rd, int plane,
622 BLOCK_SIZE bsize, TX_SIZE tx_size,
623 int use_fast_coef_casting) {
624 MACROBLOCKD *const xd = &x->e_mbd;
625 const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su5eed74e2015-08-18 16:57:07 -0700626 TX_TYPE tx_type;
Jingning Han3ee6db62015-08-05 19:00:31 -0700627 struct rdcost_block_args args;
628 vp10_zero(args);
629 args.x = x;
Jingning Han71c15602015-10-13 12:40:39 -0700630#if CONFIG_VAR_TX
631 args.cpi = cpi;
632#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700633 args.best_rd = ref_best_rd;
634 args.use_fast_coef_costing = use_fast_coef_casting;
635 args.skippable = 1;
636
637 if (plane == 0)
638 xd->mi[0]->mbmi.tx_size = tx_size;
639
640 vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
641
hui sub3cc3a02015-08-24 14:37:54 -0700642 tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700643 args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
Jingning Han3ee6db62015-08-05 19:00:31 -0700644
645 vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700646 block_rd_txfm, &args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700647 if (args.exit_early) {
648 *rate = INT_MAX;
649 *distortion = INT64_MAX;
650 *sse = INT64_MAX;
651 *skippable = 0;
652 } else {
653 *distortion = args.this_dist;
654 *rate = args.this_rate;
655 *sse = args.this_sse;
656 *skippable = args.skippable;
657 }
658}
659
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700660static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700661 int *rate, int64_t *distortion,
662 int *skip, int64_t *sse,
663 int64_t ref_best_rd,
664 BLOCK_SIZE bs) {
665 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700666 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700667 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
668 MACROBLOCKD *const xd = &x->e_mbd;
669 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
hui su6c81e372015-09-29 12:09:15 -0700670#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700671 TX_TYPE tx_type, best_tx_type = DCT_DCT;
hui su6c81e372015-09-29 12:09:15 -0700672 int r, s;
673 int64_t d, psse, this_rd, best_rd = INT64_MAX;
674 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
675 int s0 = vp10_cost_bit(skip_prob, 0);
676 int s1 = vp10_cost_bit(skip_prob, 1);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700677 int ext_tx_set;
678 const int is_inter = is_inter_block(mbmi);
hui su6c81e372015-09-29 12:09:15 -0700679#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700680
James Zern5e16d392015-08-17 18:19:22 -0700681 mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700682
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700683#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700684 ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
685
686 if (is_inter &&
687 get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700688 !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700689 for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
690 if (is_inter) {
691 if (!ext_tx_used_inter[ext_tx_set][tx_type])
692 continue;
693 } else {
694 if (!ext_tx_used_intra[ext_tx_set][tx_type])
695 continue;
696 }
697
698 mbmi->tx_type = tx_type;
699 if (ext_tx_set == 1 &&
700 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
hui su4f16f112015-10-02 10:45:27 -0700701 best_tx_type == DCT_DCT) {
702 tx_type = IDTX - 1;
hui su3fa01292015-09-28 18:38:00 -0700703 continue;
704 }
hui su6c81e372015-09-29 12:09:15 -0700705
Jingning Han71c15602015-10-13 12:40:39 -0700706 txfm_rd_in_plane(x,
707#if CONFIG_VAR_TX
708 cpi,
709#endif
710 &r, &d, &s,
hui su6c81e372015-09-29 12:09:15 -0700711 &psse, ref_best_rd, 0, bs, mbmi->tx_size,
712 cpi->sf.use_fast_coef_costing);
713
714 if (r == INT_MAX)
715 continue;
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700716 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
717 if (is_inter) {
718 if (ext_tx_set > 0)
719 r += cpi->inter_tx_type_costs[ext_tx_set]
720 [mbmi->tx_size][mbmi->tx_type];
721 } else {
722 if (ext_tx_set > 0)
723 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
724 [mbmi->mode][mbmi->tx_type];
725 }
hui su3fa01292015-09-28 18:38:00 -0700726 }
hui su6c81e372015-09-29 12:09:15 -0700727
728 if (s)
729 this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
730 else
731 this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700732 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
hui su6c81e372015-09-29 12:09:15 -0700733 this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
734
hui su4f16f112015-10-02 10:45:27 -0700735 if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
hui su6c81e372015-09-29 12:09:15 -0700736 best_rd = this_rd;
hui su4f16f112015-10-02 10:45:27 -0700737 best_tx_type = mbmi->tx_type;
hui su6c81e372015-09-29 12:09:15 -0700738 }
739 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700740 }
hui su6c81e372015-09-29 12:09:15 -0700741
hui su4f16f112015-10-02 10:45:27 -0700742 mbmi->tx_type = best_tx_type;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700743#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700744
Jingning Han71c15602015-10-13 12:40:39 -0700745 txfm_rd_in_plane(x,
746#if CONFIG_VAR_TX
747 cpi,
748#endif
749 rate, distortion, skip,
Jingning Han3ee6db62015-08-05 19:00:31 -0700750 sse, ref_best_rd, 0, bs,
751 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700752
753#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700754 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700755 !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700756 int ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
757 if (is_inter)
758 *rate += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
759 [mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700760 else
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700761 *rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
762 [mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700763 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700764#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700765}
766
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400767static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
768 int *rate, int64_t *distortion,
769 int *skip, int64_t *sse,
770 int64_t ref_best_rd,
771 BLOCK_SIZE bs) {
772 MACROBLOCKD *const xd = &x->e_mbd;
773 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
774
775 mbmi->tx_size = TX_4X4;
776
Jingning Han71c15602015-10-13 12:40:39 -0700777 txfm_rd_in_plane(x,
778#if CONFIG_VAR_TX
779 cpi,
780#endif
781 rate, distortion, skip,
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400782 sse, ref_best_rd, 0, bs,
783 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
784}
785
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700786static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700787 int *rate,
788 int64_t *distortion,
789 int *skip,
790 int64_t *psse,
791 int64_t ref_best_rd,
792 BLOCK_SIZE bs) {
793 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700794 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700795 MACROBLOCKD *const xd = &x->e_mbd;
796 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
797 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
hui su38debe52015-09-20 19:18:00 -0700798 int r, s;
799 int64_t d, sse;
800 int64_t rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700801 int n, m;
802 int s0, s1;
hui su38debe52015-09-20 19:18:00 -0700803 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700804 TX_SIZE best_tx = max_tx_size;
805 int start_tx, end_tx;
hui su38debe52015-09-20 19:18:00 -0700806 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
hui su07154b02015-09-22 10:34:18 -0700807#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700808 TX_TYPE tx_type, best_tx_type = DCT_DCT;
809 int ext_tx_set;
hui su07154b02015-09-22 10:34:18 -0700810#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700811 const int is_inter = is_inter_block(mbmi);
hui su07154b02015-09-22 10:34:18 -0700812
Jingning Han3ee6db62015-08-05 19:00:31 -0700813 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
814 assert(skip_prob > 0);
815 s0 = vp10_cost_bit(skip_prob, 0);
816 s1 = vp10_cost_bit(skip_prob, 1);
817
hui su38debe52015-09-20 19:18:00 -0700818 if (tx_select) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700819 start_tx = max_tx_size;
820 end_tx = 0;
821 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700822 const TX_SIZE chosen_tx_size =
823 VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700824 start_tx = chosen_tx_size;
825 end_tx = chosen_tx_size;
826 }
827
hui su38debe52015-09-20 19:18:00 -0700828 *distortion = INT64_MAX;
829 *rate = INT_MAX;
830 *skip = 0;
831 *psse = INT64_MAX;
832
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700833#if CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700834 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700835#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700836 last_rd = INT64_MAX;
hui su07154b02015-09-22 10:34:18 -0700837 for (n = start_tx; n >= end_tx; --n) {
838 int r_tx_size = 0;
hui su07154b02015-09-22 10:34:18 -0700839 for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
840 if (m == n)
841 r_tx_size += vp10_cost_zero(tx_probs[m]);
842 else
843 r_tx_size += vp10_cost_one(tx_probs[m]);
Shunyao Liaa006d72015-08-19 12:04:56 -0700844 }
hui su07154b02015-09-22 10:34:18 -0700845
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700846#if CONFIG_EXT_TX
847 ext_tx_set = get_ext_tx_set(n, bs, is_inter);
848 if (is_inter) {
849 if (!ext_tx_used_inter[ext_tx_set][tx_type])
850 continue;
851 } else {
852 if (!ext_tx_used_intra[ext_tx_set][tx_type])
853 continue;
854 }
855 mbmi->tx_type = tx_type;
856 if (ext_tx_set == 1 &&
857 mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
858 best_tx_type == DCT_DCT) {
859 tx_type = IDTX - 1;
860 break;
861 }
Jingning Han71c15602015-10-13 12:40:39 -0700862 txfm_rd_in_plane(x,
863#if CONFIG_VAR_TX
864 cpi,
865#endif
866 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700867 &sse, ref_best_rd, 0, bs, n,
868 cpi->sf.use_fast_coef_costing);
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700869 if (get_ext_tx_types(n, bs, is_inter) > 1 &&
870 !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
871 r != INT_MAX) {
872 if (is_inter) {
873 if (ext_tx_set > 0)
874 r += cpi->inter_tx_type_costs[ext_tx_set]
875 [mbmi->tx_size][mbmi->tx_type];
876 } else {
877 if (ext_tx_set > 0)
878 r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
879 [mbmi->mode][mbmi->tx_type];
880 }
hui su3fa01292015-09-28 18:38:00 -0700881 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700882#else // CONFIG_EXT_TX
883 txfm_rd_in_plane(x,
884#if CONFIG_VAR_TX
885 cpi,
886#endif
887 &r, &d, &s,
888 &sse, ref_best_rd, 0, bs, n,
889 cpi->sf.use_fast_coef_costing);
hui su07154b02015-09-22 10:34:18 -0700890#endif // CONFIG_EXT_TX
891
892 if (r == INT_MAX)
893 continue;
894
hui su07154b02015-09-22 10:34:18 -0700895 if (s) {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700896 if (is_inter) {
hui su07154b02015-09-22 10:34:18 -0700897 rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
hui su07154b02015-09-22 10:34:18 -0700898 } else {
899 rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
900 }
901 } else {
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700902 rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
hui su07154b02015-09-22 10:34:18 -0700903 }
904
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700905 if (tx_select && !(s && is_inter))
906 r += r_tx_size;
907
908 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
hui su07154b02015-09-22 10:34:18 -0700909 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
910
911 // Early termination in transform size search.
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700912 if (cpi->sf.tx_size_search_breakout &&
913 (rd == INT64_MAX ||
914#if CONFIG_EXT_TX
915 (s == 1 && tx_type != DCT_DCT) ||
916#else
917 (s == 1) ||
918#endif
919 (n < (int) max_tx_size && rd > last_rd)))
hui su07154b02015-09-22 10:34:18 -0700920 break;
921
922 last_rd = rd;
hui su4f16f112015-10-02 10:45:27 -0700923 if (rd <
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700924#if CONFIG_EXT_TX
925 (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
hui su07154b02015-09-22 10:34:18 -0700926#endif // CONFIG_EXT_TX
Debargha Mukherjee8a429242015-10-12 12:30:55 -0700927 best_rd) {
hui su07154b02015-09-22 10:34:18 -0700928 best_tx = n;
929 best_rd = rd;
930 *distortion = d;
931 *rate = r;
932 *skip = s;
933 *psse = sse;
934#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700935 best_tx_type = mbmi->tx_type;
hui su07154b02015-09-22 10:34:18 -0700936#endif // CONFIG_EXT_TX
937 }
Jingning Han3ee6db62015-08-05 19:00:31 -0700938 }
hui su07154b02015-09-22 10:34:18 -0700939#if CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700940 }
hui su07154b02015-09-22 10:34:18 -0700941#endif // CONFIG_EXT_TX
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700942
Jingning Han3ee6db62015-08-05 19:00:31 -0700943 mbmi->tx_size = best_tx;
hui su07154b02015-09-22 10:34:18 -0700944#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700945 mbmi->tx_type = best_tx_type;
Jingning Han71c15602015-10-13 12:40:39 -0700946 txfm_rd_in_plane(x,
947#if CONFIG_VAR_TX
948 cpi,
949#endif
950 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700951 &sse, ref_best_rd, 0, bs, best_tx,
952 cpi->sf.use_fast_coef_costing);
953#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700954}
955
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700956static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
Jingning Han3ee6db62015-08-05 19:00:31 -0700957 int64_t *distortion, int *skip,
958 int64_t *psse, BLOCK_SIZE bs,
959 int64_t ref_best_rd) {
960 MACROBLOCKD *xd = &x->e_mbd;
961 int64_t sse;
962 int64_t *ret_sse = psse ? psse : &sse;
963
964 assert(bs == xd->mi[0]->mbmi.sb_type);
965
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400966 if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
967 choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
968 ref_best_rd, bs);
969 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
970 xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700971 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
972 bs);
973 } else {
974 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
975 ref_best_rd, bs);
976 }
977}
978
979static int conditional_skipintra(PREDICTION_MODE mode,
980 PREDICTION_MODE best_intra_mode) {
981 if (mode == D117_PRED &&
982 best_intra_mode != V_PRED &&
983 best_intra_mode != D135_PRED)
984 return 1;
985 if (mode == D63_PRED &&
986 best_intra_mode != V_PRED &&
987 best_intra_mode != D45_PRED)
988 return 1;
989 if (mode == D207_PRED &&
990 best_intra_mode != H_PRED &&
991 best_intra_mode != D45_PRED)
992 return 1;
993 if (mode == D153_PRED &&
994 best_intra_mode != H_PRED &&
995 best_intra_mode != D135_PRED)
996 return 1;
997 return 0;
998}
999
hui su5d011cb2015-09-15 12:44:13 -07001000void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1001 int palette_ctx, int dc_mode_cost,
1002 PALETTE_MODE_INFO *palette_mode_info,
1003 uint8_t *best_palette_color_map,
1004 TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
1005 int64_t *best_rd) {
1006 MACROBLOCKD *const xd = &x->e_mbd;
1007 MODE_INFO *const mic = xd->mi[0];
1008 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1009 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1010 int this_rate, this_rate_tokenonly, s;
1011 int64_t this_distortion, this_rd;
1012 int colors, n;
1013 int src_stride = x->plane[0].src.stride;
1014 uint8_t *src = x->plane[0].src.buf;
1015
1016#if CONFIG_VP9_HIGHBITDEPTH
1017 if (cpi->common.use_highbitdepth)
1018 colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
1019 cpi->common.bit_depth);
1020 else
1021#endif // CONFIG_VP9_HIGHBITDEPTH
1022 colors = vp10_count_colors(src, src_stride, rows, cols);
1023 palette_mode_info->palette_size[0] = 0;
1024
1025 if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
1026 int r, c, i, j, k;
1027 int max_itr = 50;
1028 int color_ctx, color_idx = 0;
1029 int color_order[PALETTE_MAX_SIZE];
1030 double *data = x->palette_buffer->kmeans_data_buf;
1031 uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
1032 uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
1033 double centroids[PALETTE_MAX_SIZE];
1034 uint8_t *color_map;
1035 double lb, ub, val;
1036 PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
1037#if CONFIG_VP9_HIGHBITDEPTH
1038 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1039 if (cpi->common.use_highbitdepth)
1040 lb = ub = src16[0];
1041 else
1042#endif // CONFIG_VP9_HIGHBITDEPTH
1043 lb = ub = src[0];
1044
1045#if CONFIG_VP9_HIGHBITDEPTH
1046 if (cpi->common.use_highbitdepth) {
1047 for (r = 0; r < rows; ++r) {
1048 for (c = 0; c < cols; ++c) {
1049 val = src16[r * src_stride + c];
1050 data[r * cols + c] = val;
1051 if (val < lb)
1052 lb = val;
1053 else if (val > ub)
1054 ub = val;
1055 }
1056 }
1057 } else {
1058#endif // CONFIG_VP9_HIGHBITDEPTH
1059 for (r = 0; r < rows; ++r) {
1060 for (c = 0; c < cols; ++c) {
1061 val = src[r * src_stride + c];
1062 data[r * cols + c] = val;
1063 if (val < lb)
1064 lb = val;
1065 else if (val > ub)
1066 ub = val;
1067 }
1068 }
1069#if CONFIG_VP9_HIGHBITDEPTH
1070 }
1071#endif // CONFIG_VP9_HIGHBITDEPTH
1072
1073 mic->mbmi.mode = DC_PRED;
1074
1075 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
1076 n >= 2; --n) {
1077 for (i = 0; i < n; ++i)
1078 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
1079 vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
1080 n, 1, max_itr);
1081 vp10_insertion_sort(centroids, n);
hui su17c817a2015-10-15 18:04:50 -07001082 for (i = 0; i < n; ++i)
1083 centroids[i] = round(centroids[i]);
hui su5d011cb2015-09-15 12:44:13 -07001084 // remove duplicates
1085 i = 1;
1086 k = n;
1087 while (i < k) {
1088 if (centroids[i] == centroids[i - 1]) {
1089 j = i;
1090 while (j < k - 1) {
1091 centroids[j] = centroids[j + 1];
1092 ++j;
1093 }
1094 --k;
1095 } else {
1096 ++i;
1097 }
1098 }
1099
1100#if CONFIG_VP9_HIGHBITDEPTH
1101 if (cpi->common.use_highbitdepth)
1102 for (i = 0; i < k; ++i)
1103 mic->mbmi.palette_mode_info.palette_colors[i] =
1104 clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth);
1105 else
1106#endif // CONFIG_VP9_HIGHBITDEPTH
1107 for (i = 0; i < k; ++i)
Yaowu Xu8ced62f2015-10-14 08:10:05 -07001108 pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
hui su5d011cb2015-09-15 12:44:13 -07001109 pmi->palette_size[0] = k;
1110
1111 vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
1112 for (r = 0; r < rows; ++r)
1113 for (c = 0; c < cols; ++c)
1114 xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
1115
1116 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1117 &s, NULL, bsize, *best_rd);
1118 if (this_rate_tokenonly == INT_MAX)
1119 continue;
1120
1121 this_rate = this_rate_tokenonly + dc_mode_cost +
1122 cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
1123 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
1124 this_rate +=
1125 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1126 [palette_ctx], 1);
1127 color_map = xd->plane[0].color_index_map;
1128 this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
1129 for (i = 0; i < rows; ++i) {
1130 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
1131 color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
1132 k, color_order);
1133 for (r = 0; r < k; ++r)
1134 if (color_map[i * cols + j] == color_order[r]) {
1135 color_idx = r;
1136 break;
1137 }
1138 assert(color_idx < k);
1139 this_rate +=
1140 cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
1141 }
1142 }
1143 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1144
1145 if (this_rd < *best_rd) {
1146 *best_rd = this_rd;
1147 *palette_mode_info = mic->mbmi.palette_mode_info;
1148 memcpy(best_palette_color_map, xd->plane[0].color_index_map,
1149 rows * cols * sizeof(xd->plane[0].color_index_map[0]));
1150 *mode_selected = DC_PRED;
1151 *best_tx = mic->mbmi.tx_size;
1152 }
1153 }
1154 }
1155}
1156
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001157static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001158 int row, int col,
1159 PREDICTION_MODE *best_mode,
1160 const int *bmode_costs,
1161 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1162 int *bestrate, int *bestratey,
1163 int64_t *bestdistortion,
1164 BLOCK_SIZE bsize, int64_t rd_thresh) {
1165 PREDICTION_MODE mode;
1166 MACROBLOCKD *const xd = &x->e_mbd;
1167 int64_t best_rd = rd_thresh;
1168 struct macroblock_plane *p = &x->plane[0];
1169 struct macroblockd_plane *pd = &xd->plane[0];
1170 const int src_stride = p->src.stride;
1171 const int dst_stride = pd->dst.stride;
1172 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
1173 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
1174 ENTROPY_CONTEXT ta[2], tempa[2];
1175 ENTROPY_CONTEXT tl[2], templ[2];
1176 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1177 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1178 int idx, idy;
1179 uint8_t best_dst[8 * 8];
1180#if CONFIG_VP9_HIGHBITDEPTH
1181 uint16_t best_dst16[8 * 8];
1182#endif
1183
1184 memcpy(ta, a, sizeof(ta));
1185 memcpy(tl, l, sizeof(tl));
1186 xd->mi[0]->mbmi.tx_size = TX_4X4;
hui su5d011cb2015-09-15 12:44:13 -07001187 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001188
1189#if CONFIG_VP9_HIGHBITDEPTH
1190 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1191 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1192 int64_t this_rd;
1193 int ratey = 0;
1194 int64_t distortion = 0;
1195 int rate = bmode_costs[mode];
1196
1197 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1198 continue;
1199
1200 // Only do the oblique modes if the best so far is
1201 // one of the neighboring directional modes
1202 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1203 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001204 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001205 }
1206
1207 memcpy(tempa, ta, sizeof(ta));
1208 memcpy(templ, tl, sizeof(tl));
1209
1210 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1211 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1212 const int block = (row + idy) * 2 + (col + idx);
1213 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1214 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1215 int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001216 block,
1217 p->src_diff);
Jingning Han3ee6db62015-08-05 19:00:31 -07001218 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1219 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001220 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001221 dst, dst_stride,
1222 col + idx, row + idy, 0);
1223 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
1224 dst, dst_stride, xd->bd);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001225 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001226 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001227 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001228#if CONFIG_VAR_TX
1229 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1230 *(templ + idy));
1231#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001232 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001233 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001234 ratey += cost_coeffs(x, 0, block,
1235#if CONFIG_VAR_TX
1236 coeff_ctx,
1237#else
1238 tempa + idx, templ + idy,
1239#endif
1240 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001241 so->scan, so->neighbors,
1242 cpi->sf.use_fast_coef_costing);
1243 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1244 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001245 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1246 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001247 xd->bd, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001248 } else {
1249 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001250 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001251 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001252#if CONFIG_VAR_TX
1253 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1254 *(templ + idy));
1255#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001256 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001257 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001258 ratey += cost_coeffs(x, 0, block,
1259#if CONFIG_VAR_TX
1260 coeff_ctx,
1261#else
1262 tempa + idx, templ + idy,
1263#endif
1264 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001265 so->scan, so->neighbors,
1266 cpi->sf.use_fast_coef_costing);
1267 distortion += vp10_highbd_block_error(
1268 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1269 16, &unused, xd->bd) >> 2;
1270 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1271 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001272 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1273 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001274 xd->bd, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001275 }
1276 }
1277 }
1278
1279 rate += ratey;
1280 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1281
1282 if (this_rd < best_rd) {
1283 *bestrate = rate;
1284 *bestratey = ratey;
1285 *bestdistortion = distortion;
1286 best_rd = this_rd;
1287 *best_mode = mode;
1288 memcpy(a, tempa, sizeof(tempa));
1289 memcpy(l, templ, sizeof(templ));
1290 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1291 memcpy(best_dst16 + idy * 8,
1292 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1293 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1294 }
1295 }
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001296next_highbd:
Jingning Han3ee6db62015-08-05 19:00:31 -07001297 {}
1298 }
Jingning Han481b8342015-09-11 08:56:06 -07001299 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001300 return best_rd;
1301
1302 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1303 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1304 best_dst16 + idy * 8,
1305 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1306 }
1307
1308 return best_rd;
1309 }
1310#endif // CONFIG_VP9_HIGHBITDEPTH
1311
1312 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1313 int64_t this_rd;
1314 int ratey = 0;
1315 int64_t distortion = 0;
1316 int rate = bmode_costs[mode];
1317
1318 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1319 continue;
1320
1321 // Only do the oblique modes if the best so far is
1322 // one of the neighboring directional modes
1323 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1324 if (conditional_skipintra(mode, *best_mode))
Debargha Mukherjee8a429242015-10-12 12:30:55 -07001325 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07001326 }
1327
1328 memcpy(tempa, ta, sizeof(ta));
1329 memcpy(templ, tl, sizeof(tl));
1330
1331 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1332 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1333 const int block = (row + idy) * 2 + (col + idx);
1334 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1335 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1336 int16_t *const src_diff =
1337 vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1338 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1339 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001340 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001341 dst, dst_stride, col + idx, row + idy, 0);
1342 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1343
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001344 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001345 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001346 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001347#if CONFIG_VAR_TX
1348 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1349 *(templ + idy));
1350#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001351 vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001352 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001353#if CONFIG_VAR_TX
1354 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1355 so->neighbors, cpi->sf.use_fast_coef_costing);
1356 *(tempa + idx) = !(p->eobs[block] == 0);
1357 *(templ + idy) = !(p->eobs[block] == 0);
1358#else
1359 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1360 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001361 so->scan, so->neighbors,
1362 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07001363#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001364 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1365 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001366 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001367 dst, dst_stride, p->eobs[block], DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001368 } else {
1369 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001370 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001371 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001372#if CONFIG_VAR_TX
1373 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1374 *(templ + idy));
1375#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001376 vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001377 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001378#if CONFIG_VAR_TX
1379 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1380 so->neighbors, cpi->sf.use_fast_coef_costing);
1381 *(tempa + idx) = !(p->eobs[block] == 0);
1382 *(templ + idy) = !(p->eobs[block] == 0);
1383#else
1384 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1385 TX_4X4, so->scan, so->neighbors,
1386 cpi->sf.use_fast_coef_costing);
1387#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001388 distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1389 16, &unused) >> 2;
1390 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1391 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001392 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001393 dst, dst_stride, p->eobs[block], tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001394 }
1395 }
1396 }
1397
1398 rate += ratey;
1399 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1400
1401 if (this_rd < best_rd) {
1402 *bestrate = rate;
1403 *bestratey = ratey;
1404 *bestdistortion = distortion;
1405 best_rd = this_rd;
1406 *best_mode = mode;
1407 memcpy(a, tempa, sizeof(tempa));
1408 memcpy(l, templ, sizeof(templ));
1409 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1410 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1411 num_4x4_blocks_wide * 4);
1412 }
1413 next:
1414 {}
1415 }
1416
Jingning Hanf1376972015-09-10 12:42:21 -07001417 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001418 return best_rd;
1419
1420 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1421 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1422 num_4x4_blocks_wide * 4);
1423
1424 return best_rd;
1425}
1426
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001427static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
Jingning Han3ee6db62015-08-05 19:00:31 -07001428 int *rate, int *rate_y,
1429 int64_t *distortion,
1430 int64_t best_rd) {
1431 int i, j;
1432 const MACROBLOCKD *const xd = &mb->e_mbd;
1433 MODE_INFO *const mic = xd->mi[0];
1434 const MODE_INFO *above_mi = xd->above_mi;
1435 const MODE_INFO *left_mi = xd->left_mi;
1436 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1437 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1438 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1439 int idx, idy;
1440 int cost = 0;
1441 int64_t total_distortion = 0;
1442 int tot_rate_y = 0;
1443 int64_t total_rd = 0;
1444 ENTROPY_CONTEXT t_above[4], t_left[4];
1445 const int *bmode_costs = cpi->mbmode_cost;
1446
1447 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1448 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1449
1450 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1451 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1452 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1453 PREDICTION_MODE best_mode = DC_PRED;
1454 int r = INT_MAX, ry = INT_MAX;
1455 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1456 i = idy * 2 + idx;
1457 if (cpi->common.frame_type == KEY_FRAME) {
1458 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
1459 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
1460
1461 bmode_costs = cpi->y_mode_costs[A][L];
1462 }
1463
1464 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
1465 bmode_costs, t_above + idx, t_left + idy,
1466 &r, &ry, &d, bsize, best_rd - total_rd);
1467 if (this_rd >= best_rd - total_rd)
1468 return INT64_MAX;
1469
1470 total_rd += this_rd;
1471 cost += r;
1472 total_distortion += d;
1473 tot_rate_y += ry;
1474
1475 mic->bmi[i].as_mode = best_mode;
1476 for (j = 1; j < num_4x4_blocks_high; ++j)
1477 mic->bmi[i + j * 2].as_mode = best_mode;
1478 for (j = 1; j < num_4x4_blocks_wide; ++j)
1479 mic->bmi[i + j].as_mode = best_mode;
1480
1481 if (total_rd >= best_rd)
1482 return INT64_MAX;
1483 }
1484 }
1485
1486 *rate = cost;
1487 *rate_y = tot_rate_y;
1488 *distortion = total_distortion;
1489 mic->mbmi.mode = mic->bmi[3].as_mode;
1490
1491 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1492}
1493
1494// This function is used only for intra_only frames
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001495static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001496 int *rate, int *rate_tokenonly,
1497 int64_t *distortion, int *skippable,
1498 BLOCK_SIZE bsize,
1499 int64_t best_rd) {
1500 PREDICTION_MODE mode;
1501 PREDICTION_MODE mode_selected = DC_PRED;
1502 MACROBLOCKD *const xd = &x->e_mbd;
1503 MODE_INFO *const mic = xd->mi[0];
1504 int this_rate, this_rate_tokenonly, s;
1505 int64_t this_distortion, this_rd;
1506 TX_SIZE best_tx = TX_4X4;
hui su3fa01292015-09-28 18:38:00 -07001507#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001508 TX_TYPE best_tx_type = DCT_DCT;
hui su3fa01292015-09-28 18:38:00 -07001509#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001510 int *bmode_costs;
hui su5d011cb2015-09-15 12:44:13 -07001511 PALETTE_MODE_INFO palette_mode_info;
hui suaaf6f622015-10-14 20:18:18 -07001512 uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
1513 x->palette_buffer->best_palette_color_map : NULL;
hui su5d011cb2015-09-15 12:44:13 -07001514 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1515 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1516 int palette_ctx = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001517 const MODE_INFO *above_mi = xd->above_mi;
1518 const MODE_INFO *left_mi = xd->left_mi;
1519 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
1520 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
1521 bmode_costs = cpi->y_mode_costs[A][L];
1522
1523 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07001524 palette_mode_info.palette_size[0] = 0;
1525 mic->mbmi.palette_mode_info.palette_size[0] = 0;
1526 if (above_mi)
1527 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1528 if (left_mi)
1529 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1530
Jingning Han3ee6db62015-08-05 19:00:31 -07001531 /* Y Search for intra prediction mode */
1532 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
Jingning Han3ee6db62015-08-05 19:00:31 -07001533 mic->mbmi.mode = mode;
1534
1535 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1536 &s, NULL, bsize, best_rd);
1537
1538 if (this_rate_tokenonly == INT_MAX)
1539 continue;
1540
1541 this_rate = this_rate_tokenonly + bmode_costs[mode];
hui su5d011cb2015-09-15 12:44:13 -07001542 if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
1543 this_rate +=
1544 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1545 [palette_ctx], 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001546 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1547
1548 if (this_rd < best_rd) {
1549 mode_selected = mode;
1550 best_rd = this_rd;
1551 best_tx = mic->mbmi.tx_size;
hui su3fa01292015-09-28 18:38:00 -07001552#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001553 best_tx_type = mic->mbmi.tx_type;
hui su3fa01292015-09-28 18:38:00 -07001554#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001555 *rate = this_rate;
1556 *rate_tokenonly = this_rate_tokenonly;
1557 *distortion = this_distortion;
1558 *skippable = s;
1559 }
1560 }
1561
hui su5d011cb2015-09-15 12:44:13 -07001562 if (cpi->common.allow_screen_content_tools)
1563 rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
1564 &palette_mode_info, best_palette_color_map,
1565 &best_tx, &mode_selected, &best_rd);
1566
Jingning Han3ee6db62015-08-05 19:00:31 -07001567 mic->mbmi.mode = mode_selected;
1568 mic->mbmi.tx_size = best_tx;
hui su3fa01292015-09-28 18:38:00 -07001569#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001570 mic->mbmi.tx_type = best_tx_type;
hui su3fa01292015-09-28 18:38:00 -07001571#endif // CONFIG_EXT_TX
hui su5d011cb2015-09-15 12:44:13 -07001572 mic->mbmi.palette_mode_info.palette_size[0] =
1573 palette_mode_info.palette_size[0];
1574 if (palette_mode_info.palette_size[0] > 0) {
1575 memcpy(mic->mbmi.palette_mode_info.palette_colors,
1576 palette_mode_info.palette_colors,
1577 PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
1578 memcpy(xd->plane[0].color_index_map, best_palette_color_map,
1579 rows * cols * sizeof(best_palette_color_map[0]));
1580 }
Jingning Han3ee6db62015-08-05 19:00:31 -07001581
1582 return best_rd;
1583}
1584
Jingning Hana8dad552015-10-08 16:46:10 -07001585#if CONFIG_VAR_TX
Jingning Han71c15602015-10-13 12:40:39 -07001586static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
Jingning Han2cdc1272015-10-09 09:57:42 -07001587 int blk_row, int blk_col, int plane, int block,
1588 int plane_bsize, int coeff_ctx,
1589 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1590 MACROBLOCKD *xd = &x->e_mbd;
1591 const struct macroblock_plane *const p = &x->plane[plane];
1592 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han71c15602015-10-13 12:40:39 -07001593#if CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001594 const int ss_txfrm_size = tx_size << 1;
1595 int64_t this_sse;
1596 int shift = tx_size == TX_32X32 ? 0 : 2;
1597 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han71c15602015-10-13 12:40:39 -07001598#endif
1599 unsigned int tmp_sse = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001600 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1601 PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
1602 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1603 const scan_order *const scan_order =
1604 get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
1605
Jingning Han71c15602015-10-13 12:40:39 -07001606 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
1607 int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
1608 int src_stride = p->src.stride;
1609 uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
1610 uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
1611 DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
1612
1613 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1614 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1615
1616 if (xd->mb_to_bottom_edge < 0)
1617 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1618 if (xd->mb_to_right_edge < 0)
1619 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1620
Jingning Han2cdc1272015-10-09 09:57:42 -07001621 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
1622
Jingning Han71c15602015-10-13 12:40:39 -07001623 vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
1624 NULL, 0, NULL, 0, bh, bh);
1625
1626 if (blk_row + (bh >> 2) > max_blocks_high ||
1627 blk_col + (bh >> 2) > max_blocks_wide) {
1628 int idx, idy;
1629 unsigned int this_sse;
1630 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1631 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1632 for (idy = 0; idy < blocks_height; idy += 2) {
1633 for (idx = 0; idx < blocks_width; idx += 2) {
1634 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1635 src_stride,
1636 rec_buffer + 4 * idy * 32 + 4 * idx,
1637 32, &this_sse);
1638 tmp_sse += this_sse;
1639 }
1640 }
1641 } else {
1642 cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
1643 }
1644
Jingning Han2cdc1272015-10-09 09:57:42 -07001645#if CONFIG_VP9_HIGHBITDEPTH
1646 *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
1647 &this_sse, xd->bd) >> shift;
Jingning Han2cdc1272015-10-09 09:57:42 -07001648 *bsse += this_sse >> shift;
Jingning Han71c15602015-10-13 12:40:39 -07001649#else
1650 *bsse += (int64_t)tmp_sse * 16;
1651
1652 if (p->eobs[block] > 0) {
1653 // TODO(jingning): integrate multiple transform type experiment
1654 TX_TYPE tx_type = DCT_DCT;
1655 switch (tx_size) {
1656 case TX_32X32:
1657 vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
1658 tx_type);
1659 break;
1660 case TX_16X16:
1661 vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
1662 tx_type);
1663 break;
1664 case TX_8X8:
1665 vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
1666 tx_type);
1667 break;
1668 case TX_4X4:
1669 vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
1670 tx_type,
1671 xd->lossless[xd->mi[0]->mbmi.segment_id]);
1672 break;
1673 default:
1674 assert(0 && "Invalid transform size");
1675 break;
1676 }
1677
1678 if ((bh >> 2) + blk_col > max_blocks_wide ||
1679 (bh >> 2) + blk_row > max_blocks_high) {
1680 int idx, idy;
1681 unsigned int this_sse;
1682 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1683 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1684 tmp_sse = 0;
1685 for (idy = 0; idy < blocks_height; idy += 2) {
1686 for (idx = 0; idx < blocks_width; idx += 2) {
1687 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1688 src_stride,
1689 rec_buffer + 4 * idy * 32 + 4 * idx,
1690 32, &this_sse);
1691 tmp_sse += this_sse;
1692 }
1693 }
1694 } else {
1695 cpi->fn_ptr[txm_bsize].vf(src, src_stride,
1696 rec_buffer, 32, &tmp_sse);
1697 }
1698 }
1699 *dist += (int64_t)tmp_sse * 16;
1700#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001701
1702 *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
1703 scan_order->scan, scan_order->neighbors, 0);
1704 *skip &= (p->eobs[block] == 0);
1705}
1706
1707static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
1708 int blk_row, int blk_col, int plane, int block,
1709 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
Jingning Han2cdc1272015-10-09 09:57:42 -07001710 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
Jingning Han3edad6e2015-10-14 09:38:17 -07001711 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Jingning Han2cdc1272015-10-09 09:57:42 -07001712 int *rate, int64_t *dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001713 int64_t *bsse, int *skip,
1714 int64_t ref_best_rd, int *is_cost_valid) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001715 MACROBLOCKD *const xd = &x->e_mbd;
1716 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1717 struct macroblock_plane *const p = &x->plane[plane];
1718 struct macroblockd_plane *const pd = &xd->plane[plane];
1719 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1720 (blk_col >> (1 - pd->subsampling_x));
1721 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1722 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1723 int64_t this_rd = INT64_MAX;
Jingning Han2cdc1272015-10-09 09:57:42 -07001724 ENTROPY_CONTEXT *pta = ta + blk_col;
1725 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han3a279612015-10-12 19:20:58 -07001726 ENTROPY_CONTEXT stxa = 0, stxl = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001727 int coeff_ctx, i;
Jingning Han3edad6e2015-10-14 09:38:17 -07001728 int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
1729 tx_left + (blk_row >> 1), tx_size);
1730
Jingning Han3a279612015-10-12 19:20:58 -07001731 int64_t sum_dist = 0, sum_bsse = 0;
1732 int64_t sum_rd = INT64_MAX;
Jingning Han3edad6e2015-10-14 09:38:17 -07001733 int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
Jingning Han3a279612015-10-12 19:20:58 -07001734 int all_skip = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001735 int tmp_eob = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001736 int zero_blk_rate;
Jingning Han1e48f742015-10-13 11:59:49 -07001737
1738 if (ref_best_rd < 0) {
1739 *is_cost_valid = 0;
1740 return;
1741 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001742
1743 switch (tx_size) {
1744 case TX_4X4:
Jingning Han3a279612015-10-12 19:20:58 -07001745 stxa = pta[0];
1746 stxl = ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001747 break;
1748 case TX_8X8:
Jingning Han3a279612015-10-12 19:20:58 -07001749 stxa = !!*(const uint16_t *)&pta[0];
1750 stxl = !!*(const uint16_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001751 break;
1752 case TX_16X16:
Jingning Han3a279612015-10-12 19:20:58 -07001753 stxa = !!*(const uint32_t *)&pta[0];
1754 stxl = !!*(const uint32_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001755 break;
1756 case TX_32X32:
Jingning Han3a279612015-10-12 19:20:58 -07001757 stxa = !!*(const uint64_t *)&pta[0];
1758 stxl = !!*(const uint64_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001759 break;
1760 default:
1761 assert(0 && "Invalid transform size.");
1762 break;
1763 }
Jingning Han3a279612015-10-12 19:20:58 -07001764 coeff_ctx = combine_entropy_contexts(stxa, stxl);
Jingning Han2cdc1272015-10-09 09:57:42 -07001765
1766 if (xd->mb_to_bottom_edge < 0)
1767 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1768 if (xd->mb_to_right_edge < 0)
1769 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1770
1771 *rate = 0;
1772 *dist = 0;
1773 *bsse = 0;
1774 *skip = 1;
1775
1776 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
1777 return;
1778
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001779 zero_blk_rate =
1780 x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
1781
Jingning Han1e48f742015-10-13 11:59:49 -07001782 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
1783 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han71c15602015-10-13 12:40:39 -07001784 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han1e48f742015-10-13 11:59:49 -07001785 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001786
Jingning Han47c7fd92015-10-30 13:00:48 -07001787 if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
1788 RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) || *skip == 1) &&
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001789 !xd->lossless[mbmi->segment_id]) {
1790 *rate = zero_blk_rate;
1791 *dist = *bsse;
1792 *skip = 1;
1793 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
1794 p->eobs[block] = 0;
1795 } else {
1796 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
1797 *skip = 0;
1798 }
1799
Jingning Han1e48f742015-10-13 11:59:49 -07001800 if (tx_size > TX_4X4)
Jingning Han3edad6e2015-10-14 09:38:17 -07001801 *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
Jingning Han1e48f742015-10-13 11:59:49 -07001802 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
1803 tmp_eob = p->eobs[block];
1804 }
1805
Jingning Han2cdc1272015-10-09 09:57:42 -07001806 if (tx_size > TX_4X4) {
1807 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Han3a279612015-10-12 19:20:58 -07001808 int bsl = b_height_log2_lookup[bsize];
Jingning Han2cdc1272015-10-09 09:57:42 -07001809 int sub_step = 1 << (2 * (tx_size - 1));
1810 int i;
Jingning Han3a279612015-10-12 19:20:58 -07001811 int this_rate;
1812 int64_t this_dist;
1813 int64_t this_bsse;
1814 int this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07001815 int this_cost_valid = 1;
1816 int64_t tmp_rd = 0;
Jingning Han3a279612015-10-12 19:20:58 -07001817
1818 --bsl;
Jingning Han236623c2015-10-26 19:39:30 -07001819 for (i = 0; i < 4 && this_cost_valid; ++i) {
Jingning Han3a279612015-10-12 19:20:58 -07001820 int offsetr = (i >> 1) << bsl;
1821 int offsetc = (i & 0x01) << bsl;
Jingning Han2cdc1272015-10-09 09:57:42 -07001822 select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
1823 plane, block + i * sub_step, tx_size - 1,
Jingning Han3edad6e2015-10-14 09:38:17 -07001824 plane_bsize, ta, tl, tx_above, tx_left,
1825 &this_rate, &this_dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001826 &this_bsse, &this_skip,
1827 ref_best_rd - tmp_rd, &this_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07001828 sum_rate += this_rate;
1829 sum_dist += this_dist;
1830 sum_bsse += this_bsse;
1831 all_skip &= this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07001832 tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
1833 if (this_rd < tmp_rd)
1834 break;
Jingning Han2cdc1272015-10-09 09:57:42 -07001835 }
Jingning Han1e48f742015-10-13 11:59:49 -07001836 if (this_cost_valid)
1837 sum_rd = tmp_rd;
Jingning Han3a279612015-10-12 19:20:58 -07001838 }
1839
1840 if (this_rd < sum_rd) {
Jingning Han79fe7242015-10-23 14:27:21 -07001841 int idx, idy;
Jingning Han3a279612015-10-12 19:20:58 -07001842 for (i = 0; i < (1 << tx_size); ++i)
Jingning Han1e48f742015-10-13 11:59:49 -07001843 pta[i] = ptl[i] = !(tmp_eob == 0);
Jingning Han3edad6e2015-10-14 09:38:17 -07001844 txfm_partition_update(tx_above + (blk_col >> 1),
1845 tx_left + (blk_row >> 1), tx_size);
Jingning Han1e48f742015-10-13 11:59:49 -07001846 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han79fe7242015-10-23 14:27:21 -07001847
1848 for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
1849 for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
1850 mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
Jingning Han3a279612015-10-12 19:20:58 -07001851 mbmi->tx_size = tx_size;
Jingning Han236623c2015-10-26 19:39:30 -07001852 if (this_rd == INT64_MAX)
1853 *is_cost_valid = 0;
Jingning Hanbfeac5e2015-10-15 23:11:30 -07001854 x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
Jingning Han3a279612015-10-12 19:20:58 -07001855 } else {
1856 *rate = sum_rate;
1857 *dist = sum_dist;
1858 *bsse = sum_bsse;
1859 *skip = all_skip;
Jingning Han236623c2015-10-26 19:39:30 -07001860 if (sum_rd == INT64_MAX)
1861 *is_cost_valid = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001862 }
1863}
1864
1865static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
1866 int *rate, int64_t *distortion, int *skippable,
1867 int64_t *sse, BLOCK_SIZE bsize,
1868 int64_t ref_best_rd) {
1869 MACROBLOCKD *const xd = &x->e_mbd;
1870 int is_cost_valid = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001871 int64_t this_rd = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001872
1873 if (ref_best_rd < 0)
1874 is_cost_valid = 0;
1875
1876 *rate = 0;
1877 *distortion = 0;
1878 *sse = 0;
1879 *skippable = 1;
1880
Jingning Hand3e55452015-10-22 12:06:32 -07001881#if CONFIG_EXT_TX
1882 xd->mi[0]->mbmi.tx_type = DCT_DCT;
1883#endif
1884
Jingning Han2cdc1272015-10-09 09:57:42 -07001885 if (is_cost_valid) {
1886 const struct macroblockd_plane *const pd = &xd->plane[0];
1887 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1888 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
1889 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
1890 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
1891 int bh = num_4x4_blocks_wide_lookup[txb_size];
1892 int idx, idy;
1893 int block = 0;
1894 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
1895 ENTROPY_CONTEXT ctxa[16], ctxl[16];
Jingning Han3edad6e2015-10-14 09:38:17 -07001896 TXFM_CONTEXT tx_above[8], tx_left[8];
Jingning Han2cdc1272015-10-09 09:57:42 -07001897
1898 int pnrate = 0, pnskip = 1;
1899 int64_t pndist = 0, pnsse = 0;
1900
1901 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
Jingning Han3edad6e2015-10-14 09:38:17 -07001902 memcpy(tx_above, xd->above_txfm_context,
1903 sizeof(TXFM_CONTEXT) * (mi_width >> 1));
1904 memcpy(tx_left, xd->left_txfm_context,
1905 sizeof(TXFM_CONTEXT) * (mi_height >> 1));
Jingning Han2cdc1272015-10-09 09:57:42 -07001906
1907 for (idy = 0; idy < mi_height; idy += bh) {
1908 for (idx = 0; idx < mi_width; idx += bh) {
1909 select_tx_block(cpi, x, idy, idx, 0, block,
Jingning Han3a279612015-10-12 19:20:58 -07001910 max_txsize_lookup[plane_bsize], plane_bsize,
Jingning Han3edad6e2015-10-14 09:38:17 -07001911 ctxa, ctxl, tx_above, tx_left,
1912 &pnrate, &pndist, &pnsse, &pnskip,
Jingning Han1e48f742015-10-13 11:59:49 -07001913 ref_best_rd - this_rd, &is_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07001914 *rate += pnrate;
1915 *distortion += pndist;
1916 *sse += pnsse;
1917 *skippable &= pnskip;
Jingning Han1e48f742015-10-13 11:59:49 -07001918 this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
1919 RDCOST(x->rdmult, x->rddiv, 0, pnsse));
Jingning Han2cdc1272015-10-09 09:57:42 -07001920 block += step;
1921 }
1922 }
1923 }
1924
1925 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
1926 RDCOST(x->rdmult, x->rddiv, 0, *sse));
1927 if (this_rd > ref_best_rd)
1928 is_cost_valid = 0;
1929
1930 if (!is_cost_valid) {
1931 // reset cost value
1932 *rate = INT_MAX;
1933 *distortion = INT64_MAX;
1934 *sse = INT64_MAX;
1935 *skippable = 0;
1936 }
1937}
1938
Jingning Hana8dad552015-10-08 16:46:10 -07001939static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
1940 int blk_row, int blk_col, int plane, int block,
1941 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
1942 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
1943 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1944 MACROBLOCKD *const xd = &x->e_mbd;
1945 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Han2cdc1272015-10-09 09:57:42 -07001946 struct macroblock_plane *const p = &x->plane[plane];
Jingning Hana8dad552015-10-08 16:46:10 -07001947 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han2cdc1272015-10-09 09:57:42 -07001948 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Hana8dad552015-10-08 16:46:10 -07001949 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1950 (blk_col >> (1 - pd->subsampling_x));
1951 TX_SIZE plane_tx_size = plane ?
Jingning Han2cdc1272015-10-09 09:57:42 -07001952 get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
1953 0, 0) :
Jingning Hana8dad552015-10-08 16:46:10 -07001954 mbmi->inter_tx_size[tx_idx];
1955
1956 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1957 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1958
1959 if (xd->mb_to_bottom_edge < 0)
1960 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1961 if (xd->mb_to_right_edge < 0)
1962 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1963
1964 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
1965 return;
1966
1967 if (tx_size == plane_tx_size) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001968 int coeff_ctx, i;
Jingning Hana8dad552015-10-08 16:46:10 -07001969 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
Jingning Han2cdc1272015-10-09 09:57:42 -07001970 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Jingning Hana8dad552015-10-08 16:46:10 -07001971 switch (tx_size) {
1972 case TX_4X4:
1973 break;
1974 case TX_8X8:
1975 ta[0] = !!*(const uint16_t *)&ta[0];
1976 tl[0] = !!*(const uint16_t *)&tl[0];
1977 break;
1978 case TX_16X16:
1979 ta[0] = !!*(const uint32_t *)&ta[0];
1980 tl[0] = !!*(const uint32_t *)&tl[0];
1981 break;
1982 case TX_32X32:
1983 ta[0] = !!*(const uint64_t *)&ta[0];
1984 tl[0] = !!*(const uint64_t *)&tl[0];
1985 break;
1986 default:
1987 assert(0 && "Invalid transform size.");
1988 break;
1989 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001990 coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
Jingning Han71c15602015-10-13 12:40:39 -07001991 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han2cdc1272015-10-09 09:57:42 -07001992 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hana8dad552015-10-08 16:46:10 -07001993 for (i = 0; i < (1 << tx_size); ++i) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001994 ta[i] = !(p->eobs[block] == 0);
1995 tl[i] = !(p->eobs[block] == 0);
Jingning Hana8dad552015-10-08 16:46:10 -07001996 }
Jingning Hana8dad552015-10-08 16:46:10 -07001997 } else {
Jingning Hana8dad552015-10-08 16:46:10 -07001998 int bsl = b_width_log2_lookup[bsize];
1999 int step = 1 << (2 * (tx_size - 1));
2000 int i;
2001
2002 assert(bsl > 0);
2003 --bsl;
2004
2005 for (i = 0; i < 4; ++i) {
2006 int offsetr = (i >> 1) << bsl;
2007 int offsetc = (i & 0x01) << bsl;
2008 tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
2009 block + i * step, tx_size - 1, plane_bsize,
2010 above_ctx, left_ctx, rate, dist, bsse, skip);
2011 }
2012 }
2013}
2014
2015// Return value 0: early termination triggered, no valid rd cost available;
2016// 1: rd cost values are valid.
2017static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
2018 int *rate, int64_t *distortion, int *skippable,
2019 int64_t *sse, BLOCK_SIZE bsize,
2020 int64_t ref_best_rd) {
2021 MACROBLOCKD *const xd = &x->e_mbd;
2022 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2023 int plane;
2024 int is_cost_valid = 1;
2025 int64_t this_rd;
2026
2027 if (ref_best_rd < 0)
2028 is_cost_valid = 0;
2029
2030 if (is_inter_block(mbmi) && is_cost_valid) {
2031 int plane;
2032 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2033 vp10_subtract_plane(x, bsize, plane);
2034 }
2035
2036 *rate = 0;
2037 *distortion = 0;
2038 *sse = 0;
2039 *skippable = 1;
2040
2041 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
2042 const struct macroblockd_plane *const pd = &xd->plane[plane];
2043 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2044 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
2045 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
2046 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
2047 int bh = num_4x4_blocks_wide_lookup[txb_size];
2048 int idx, idy;
2049 int block = 0;
2050 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
2051 int pnrate = 0, pnskip = 1;
2052 int64_t pndist = 0, pnsse = 0;
2053 ENTROPY_CONTEXT ta[16], tl[16];
2054
2055 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
2056
2057 for (idy = 0; idy < mi_height; idy += bh) {
2058 for (idx = 0; idx < mi_width; idx += bh) {
2059 tx_block_rd(cpi, x, idy, idx, plane, block,
2060 max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
2061 &pnrate, &pndist, &pnsse, &pnskip);
2062 block += step;
2063 }
2064 }
2065
2066 if (pnrate == INT_MAX) {
2067 is_cost_valid = 0;
2068 break;
2069 }
2070
2071 *rate += pnrate;
2072 *distortion += pndist;
2073 *sse += pnsse;
2074 *skippable &= pnskip;
2075
2076 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2077 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2078
2079 if (this_rd > ref_best_rd) {
2080 is_cost_valid = 0;
2081 break;
2082 }
2083 }
2084
2085 if (!is_cost_valid) {
2086 // reset cost value
2087 *rate = INT_MAX;
2088 *distortion = INT64_MAX;
2089 *sse = INT64_MAX;
2090 *skippable = 0;
2091 }
2092
2093 return is_cost_valid;
2094}
2095#endif
2096
Jingning Han3ee6db62015-08-05 19:00:31 -07002097// Return value 0: early termination triggered, no valid rd cost available;
2098// 1: rd cost values are valid.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002099static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002100 int *rate, int64_t *distortion, int *skippable,
2101 int64_t *sse, BLOCK_SIZE bsize,
2102 int64_t ref_best_rd) {
2103 MACROBLOCKD *const xd = &x->e_mbd;
2104 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2105 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
2106 int plane;
2107 int pnrate = 0, pnskip = 1;
2108 int64_t pndist = 0, pnsse = 0;
2109 int is_cost_valid = 1;
2110
2111 if (ref_best_rd < 0)
2112 is_cost_valid = 0;
2113
2114 if (is_inter_block(mbmi) && is_cost_valid) {
2115 int plane;
2116 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2117 vp10_subtract_plane(x, bsize, plane);
2118 }
2119
2120 *rate = 0;
2121 *distortion = 0;
2122 *sse = 0;
2123 *skippable = 1;
2124
2125 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Jingning Han71c15602015-10-13 12:40:39 -07002126 txfm_rd_in_plane(x,
2127#if CONFIG_VAR_TX
2128 cpi,
2129#endif
2130 &pnrate, &pndist, &pnskip, &pnsse,
Jingning Han3ee6db62015-08-05 19:00:31 -07002131 ref_best_rd, plane, bsize, uv_tx_size,
2132 cpi->sf.use_fast_coef_costing);
2133 if (pnrate == INT_MAX) {
2134 is_cost_valid = 0;
2135 break;
2136 }
2137 *rate += pnrate;
2138 *distortion += pndist;
2139 *sse += pnsse;
2140 *skippable &= pnskip;
2141 }
2142
2143 if (!is_cost_valid) {
2144 // reset cost value
2145 *rate = INT_MAX;
2146 *distortion = INT64_MAX;
2147 *sse = INT64_MAX;
2148 *skippable = 0;
2149 }
2150
2151 return is_cost_valid;
2152}
2153
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002154static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002155 PICK_MODE_CONTEXT *ctx,
2156 int *rate, int *rate_tokenonly,
2157 int64_t *distortion, int *skippable,
2158 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
2159 MACROBLOCKD *xd = &x->e_mbd;
2160 PREDICTION_MODE mode;
2161 PREDICTION_MODE mode_selected = DC_PRED;
2162 int64_t best_rd = INT64_MAX, this_rd;
2163 int this_rate_tokenonly, this_rate, s;
2164 int64_t this_distortion, this_sse;
2165
2166 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07002167 xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07002168 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2169 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
2170 continue;
2171
2172 xd->mi[0]->mbmi.uv_mode = mode;
2173
2174 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2175 &this_distortion, &s, &this_sse, bsize, best_rd))
2176 continue;
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002177 this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mode];
Jingning Han3ee6db62015-08-05 19:00:31 -07002178 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2179
2180 if (this_rd < best_rd) {
2181 mode_selected = mode;
2182 best_rd = this_rd;
2183 *rate = this_rate;
2184 *rate_tokenonly = this_rate_tokenonly;
2185 *distortion = this_distortion;
2186 *skippable = s;
2187 if (!x->select_tx_size)
2188 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2189 }
2190 }
2191
2192 xd->mi[0]->mbmi.uv_mode = mode_selected;
2193 return best_rd;
2194}
2195
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002196static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002197 int *rate, int *rate_tokenonly,
2198 int64_t *distortion, int *skippable,
2199 BLOCK_SIZE bsize) {
Jingning Han3ee6db62015-08-05 19:00:31 -07002200 int64_t unused;
2201
2202 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
2203 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
2204 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
2205 skippable, &unused, bsize, INT64_MAX);
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002206 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[DC_PRED];
Jingning Han3ee6db62015-08-05 19:00:31 -07002207 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
2208}
2209
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002210static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002211 PICK_MODE_CONTEXT *ctx,
2212 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
2213 int *rate_uv, int *rate_uv_tokenonly,
2214 int64_t *dist_uv, int *skip_uv,
2215 PREDICTION_MODE *mode_uv) {
2216 // Use an estimated rd for uv_intra based on DC_PRED if the
2217 // appropriate speed flag is set.
2218 if (cpi->sf.use_uv_intra_rd_estimate) {
2219 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
2220 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
2221 // Else do a proper rd search for each possible transform size that may
2222 // be considered in the main rd loop.
2223 } else {
2224 rd_pick_intra_sbuv_mode(cpi, x, ctx,
2225 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
2226 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
2227 }
2228 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
2229}
2230
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002231static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
Jingning Han3ee6db62015-08-05 19:00:31 -07002232 int mode_context) {
2233 assert(is_inter_mode(mode));
2234 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
2235}
2236
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002237static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
Jingning Han3ee6db62015-08-05 19:00:31 -07002238 int i,
2239 PREDICTION_MODE mode, int_mv this_mv[2],
2240 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
2241 int_mv seg_mvs[MAX_REF_FRAMES],
2242 int_mv *best_ref_mv[2], const int *mvjcost,
2243 int *mvcost[2]) {
2244 MODE_INFO *const mic = xd->mi[0];
2245 const MB_MODE_INFO *const mbmi = &mic->mbmi;
2246 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2247 int thismvcost = 0;
2248 int idx, idy;
2249 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
2250 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
2251 const int is_compound = has_second_ref(mbmi);
2252
2253 switch (mode) {
2254 case NEWMV:
2255 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
2256 thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
2257 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2258 if (is_compound) {
2259 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
2260 thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
2261 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2262 }
2263 break;
2264 case NEARMV:
2265 case NEARESTMV:
2266 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
2267 if (is_compound)
2268 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
2269 break;
2270 case ZEROMV:
2271 this_mv[0].as_int = 0;
2272 if (is_compound)
2273 this_mv[1].as_int = 0;
2274 break;
2275 default:
2276 break;
2277 }
2278
2279 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
2280 if (is_compound)
2281 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
2282
2283 mic->bmi[i].as_mode = mode;
2284
2285 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
2286 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
2287 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
2288
2289 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
2290 thismvcost;
2291}
2292
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002293static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07002294 MACROBLOCK *x,
2295 int64_t best_yrd,
2296 int i,
2297 int *labelyrate,
2298 int64_t *distortion, int64_t *sse,
2299 ENTROPY_CONTEXT *ta,
2300 ENTROPY_CONTEXT *tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002301 int ir, int ic,
Jingning Han3ee6db62015-08-05 19:00:31 -07002302 int mi_row, int mi_col) {
2303 int k;
2304 MACROBLOCKD *xd = &x->e_mbd;
2305 struct macroblockd_plane *const pd = &xd->plane[0];
2306 struct macroblock_plane *const p = &x->plane[0];
2307 MODE_INFO *const mi = xd->mi[0];
2308 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
2309 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
2310 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
2311 int idx, idy;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002312 void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
Jingning Han3ee6db62015-08-05 19:00:31 -07002313
2314 const uint8_t *const src =
2315 &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
2316 uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2317 pd->dst.stride)];
2318 int64_t thisdistortion = 0, thissse = 0;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002319 int thisrate = 0;
hui sub3cc3a02015-08-24 14:37:54 -07002320 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07002321 const scan_order *so = get_scan(TX_4X4, tx_type, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07002322
Yaowu Xu7c514e22015-09-28 15:55:46 -07002323 vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
2324
Jingning Han3ee6db62015-08-05 19:00:31 -07002325#if CONFIG_VP9_HIGHBITDEPTH
2326 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002327 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_highbd_fwht4x4
2328 : vpx_highbd_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002329 } else {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002330 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002331 }
2332#else
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002333 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002334#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han3ee6db62015-08-05 19:00:31 -07002335
2336#if CONFIG_VP9_HIGHBITDEPTH
2337 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2338 vpx_highbd_subtract_block(
2339 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2340 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
2341 } else {
2342 vpx_subtract_block(
2343 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2344 8, src, p->src.stride, dst, pd->dst.stride);
2345 }
2346#else
2347 vpx_subtract_block(height, width,
2348 vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2349 8, src, p->src.stride, dst, pd->dst.stride);
2350#endif // CONFIG_VP9_HIGHBITDEPTH
2351
2352 k = i;
2353 for (idy = 0; idy < height / 4; ++idy) {
2354 for (idx = 0; idx < width / 4; ++idx) {
2355 int64_t ssz, rd, rd1, rd2;
2356 tran_low_t* coeff;
Jingning Han2cdc1272015-10-09 09:57:42 -07002357#if CONFIG_VAR_TX
2358 int coeff_ctx;
2359#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002360 k += (idy * 2 + idx);
Jingning Han2cdc1272015-10-09 09:57:42 -07002361#if CONFIG_VAR_TX
2362 coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
2363 *(tl + (k >> 1)));
2364#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002365 coeff = BLOCK_OFFSET(p->coeff, k);
Yaowu Xu7c514e22015-09-28 15:55:46 -07002366 fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
2367 coeff, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002368 vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
2369#if CONFIG_VP9_HIGHBITDEPTH
2370 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2371 thisdistortion += vp10_highbd_block_error(coeff,
2372 BLOCK_OFFSET(pd->dqcoeff, k),
2373 16, &ssz, xd->bd);
2374 } else {
2375 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2376 16, &ssz);
2377 }
2378#else
2379 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2380 16, &ssz);
2381#endif // CONFIG_VP9_HIGHBITDEPTH
2382 thissse += ssz;
Jingning Han2cdc1272015-10-09 09:57:42 -07002383#if CONFIG_VAR_TX
2384 thisrate += cost_coeffs(x, 0, k, coeff_ctx,
2385 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07002386 so->scan, so->neighbors,
2387 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07002388 *(ta + (k & 1)) = !(p->eobs[k] == 0);
2389 *(tl + (k >> 1)) = !(p->eobs[k] == 0);
2390#else
2391 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
2392 TX_4X4,
2393 so->scan, so->neighbors,
2394 cpi->sf.use_fast_coef_costing);
2395#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002396 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
2397 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
James Zern5e16d392015-08-17 18:19:22 -07002398 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -07002399 if (rd >= best_yrd)
2400 return INT64_MAX;
2401 }
2402 }
2403
2404 *distortion = thisdistortion >> 2;
2405 *labelyrate = thisrate;
2406 *sse = thissse >> 2;
2407
2408 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
2409}
2410
2411typedef struct {
2412 int eobs;
2413 int brate;
2414 int byrate;
2415 int64_t bdist;
2416 int64_t bsse;
2417 int64_t brdcost;
2418 int_mv mvs[2];
2419 ENTROPY_CONTEXT ta[2];
2420 ENTROPY_CONTEXT tl[2];
2421} SEG_RDSTAT;
2422
2423typedef struct {
2424 int_mv *ref_mv[2];
2425 int_mv mvp;
2426
2427 int64_t segment_rd;
2428 int r;
2429 int64_t d;
2430 int64_t sse;
2431 int segment_yrate;
2432 PREDICTION_MODE modes[4];
2433 SEG_RDSTAT rdstat[4][INTER_MODES];
2434 int mvthresh;
2435} BEST_SEG_INFO;
2436
2437static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
2438 return (mv->row >> 3) < x->mv_row_min ||
2439 (mv->row >> 3) > x->mv_row_max ||
2440 (mv->col >> 3) < x->mv_col_min ||
2441 (mv->col >> 3) > x->mv_col_max;
2442}
2443
2444static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
2445 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
2446 struct macroblock_plane *const p = &x->plane[0];
2447 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
2448
2449 p->src.buf = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2450 p->src.stride)];
2451 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
2452 pd->pre[0].buf = &pd->pre[0].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2453 pd->pre[0].stride)];
2454 if (has_second_ref(mbmi))
2455 pd->pre[1].buf = &pd->pre[1].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2456 pd->pre[1].stride)];
2457}
2458
2459static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
2460 struct buf_2d orig_pre[2]) {
2461 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
2462 x->plane[0].src = orig_src;
2463 x->e_mbd.plane[0].pre[0] = orig_pre[0];
2464 if (has_second_ref(mbmi))
2465 x->e_mbd.plane[0].pre[1] = orig_pre[1];
2466}
2467
2468static INLINE int mv_has_subpel(const MV *mv) {
2469 return (mv->row & 0x0F) || (mv->col & 0x0F);
2470}
2471
2472// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
2473// TODO(aconverse): Find out if this is still productive then clean up or remove
2474static int check_best_zero_mv(
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002475 const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
Jingning Han3ee6db62015-08-05 19:00:31 -07002476 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
2477 const MV_REFERENCE_FRAME ref_frames[2]) {
2478 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2479 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
2480 (ref_frames[1] == NONE ||
2481 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
2482 int rfc = mode_context[ref_frames[0]];
2483 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2484 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2485 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2486
2487 if (this_mode == NEARMV) {
2488 if (c1 > c3) return 0;
2489 } else if (this_mode == NEARESTMV) {
2490 if (c2 > c3) return 0;
2491 } else {
2492 assert(this_mode == ZEROMV);
2493 if (ref_frames[1] == NONE) {
2494 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
2495 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
2496 return 0;
2497 } else {
2498 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
2499 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
2500 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
2501 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
2502 return 0;
2503 }
2504 }
2505 }
2506 return 1;
2507}
2508
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002509static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002510 BLOCK_SIZE bsize,
2511 int_mv *frame_mv,
2512 int mi_row, int mi_col,
2513 int_mv single_newmv[MAX_REF_FRAMES],
2514 int *rate_mv) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07002515 const VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07002516 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2517 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2518 MACROBLOCKD *xd = &x->e_mbd;
2519 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2520 const int refs[2] = {mbmi->ref_frame[0],
2521 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
2522 int_mv ref_mv[2];
2523 int ite, ref;
2524 const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
2525 struct scale_factors sf;
2526
2527 // Do joint motion search in compound mode to get more accurate mv.
2528 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2529 int last_besterr[2] = {INT_MAX, INT_MAX};
2530 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2531 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2532 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2533 };
2534
2535 // Prediction buffer from second frame.
2536#if CONFIG_VP9_HIGHBITDEPTH
2537 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
2538 uint8_t *second_pred;
2539#else
2540 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
2541#endif // CONFIG_VP9_HIGHBITDEPTH
2542
2543 for (ref = 0; ref < 2; ++ref) {
2544 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
2545
2546 if (scaled_ref_frame[ref]) {
2547 int i;
2548 // Swap out the reference frame for a version that's been scaled to
2549 // match the resolution of the current frame, allowing the existing
2550 // motion search code to be used without additional modifications.
2551 for (i = 0; i < MAX_MB_PLANE; i++)
2552 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2553 vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2554 NULL);
2555 }
2556
2557 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2558 }
2559
2560 // Since we have scaled the reference frames to match the size of the current
2561 // frame we must use a unit scaling factor during mode selection.
2562#if CONFIG_VP9_HIGHBITDEPTH
2563 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
2564 cm->width, cm->height,
2565 cm->use_highbitdepth);
2566#else
2567 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
2568 cm->width, cm->height);
2569#endif // CONFIG_VP9_HIGHBITDEPTH
2570
2571 // Allow joint search multiple times iteratively for each reference frame
2572 // and break out of the search loop if it couldn't find a better mv.
2573 for (ite = 0; ite < 4; ite++) {
2574 struct buf_2d ref_yv12[2];
2575 int bestsme = INT_MAX;
2576 int sadpb = x->sadperbit16;
2577 MV tmp_mv;
2578 int search_range = 3;
2579
2580 int tmp_col_min = x->mv_col_min;
2581 int tmp_col_max = x->mv_col_max;
2582 int tmp_row_min = x->mv_row_min;
2583 int tmp_row_max = x->mv_row_max;
2584 int id = ite % 2; // Even iterations search in the first reference frame,
2585 // odd iterations search in the second. The predictor
2586 // found for the 'other' reference frame is factored in.
2587
2588 // Initialized here because of compiler problem in Visual Studio.
2589 ref_yv12[0] = xd->plane[0].pre[0];
2590 ref_yv12[1] = xd->plane[0].pre[1];
2591
2592 // Get the prediction block from the 'other' reference frame.
2593#if CONFIG_VP9_HIGHBITDEPTH
2594 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2595 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
2596 vp10_highbd_build_inter_predictor(ref_yv12[!id].buf,
2597 ref_yv12[!id].stride,
2598 second_pred, pw,
2599 &frame_mv[refs[!id]].as_mv,
2600 &sf, pw, ph, 0,
2601 kernel, MV_PRECISION_Q3,
2602 mi_col * MI_SIZE, mi_row * MI_SIZE,
2603 xd->bd);
2604 } else {
2605 second_pred = (uint8_t *)second_pred_alloc_16;
2606 vp10_build_inter_predictor(ref_yv12[!id].buf,
2607 ref_yv12[!id].stride,
2608 second_pred, pw,
2609 &frame_mv[refs[!id]].as_mv,
2610 &sf, pw, ph, 0,
2611 kernel, MV_PRECISION_Q3,
2612 mi_col * MI_SIZE, mi_row * MI_SIZE);
2613 }
2614#else
2615 vp10_build_inter_predictor(ref_yv12[!id].buf,
2616 ref_yv12[!id].stride,
2617 second_pred, pw,
2618 &frame_mv[refs[!id]].as_mv,
2619 &sf, pw, ph, 0,
2620 kernel, MV_PRECISION_Q3,
2621 mi_col * MI_SIZE, mi_row * MI_SIZE);
2622#endif // CONFIG_VP9_HIGHBITDEPTH
2623
2624 // Do compound motion search on the current reference frame.
2625 if (id)
2626 xd->plane[0].pre[0] = ref_yv12[id];
2627 vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
2628
2629 // Use the mv result from the single mode as mv predictor.
2630 tmp_mv = frame_mv[refs[id]].as_mv;
2631
2632 tmp_mv.col >>= 3;
2633 tmp_mv.row >>= 3;
2634
2635 // Small-range full-pixel motion search.
2636 bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
2637 search_range,
2638 &cpi->fn_ptr[bsize],
2639 &ref_mv[id].as_mv, second_pred);
2640 if (bestsme < INT_MAX)
2641 bestsme = vp10_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2642 second_pred, &cpi->fn_ptr[bsize], 1);
2643
2644 x->mv_col_min = tmp_col_min;
2645 x->mv_col_max = tmp_col_max;
2646 x->mv_row_min = tmp_row_min;
2647 x->mv_row_max = tmp_row_max;
2648
2649 if (bestsme < INT_MAX) {
2650 int dis; /* TODO: use dis in distortion calculation later. */
2651 unsigned int sse;
2652 bestsme = cpi->find_fractional_mv_step(
2653 x, &tmp_mv,
2654 &ref_mv[id].as_mv,
2655 cpi->common.allow_high_precision_mv,
2656 x->errorperbit,
2657 &cpi->fn_ptr[bsize],
2658 0, cpi->sf.mv.subpel_iters_per_step,
2659 NULL,
2660 x->nmvjointcost, x->mvcost,
2661 &dis, &sse, second_pred,
2662 pw, ph);
2663 }
2664
2665 // Restore the pointer to the first (possibly scaled) prediction buffer.
2666 if (id)
2667 xd->plane[0].pre[0] = ref_yv12[0];
2668
2669 if (bestsme < last_besterr[id]) {
2670 frame_mv[refs[id]].as_mv = tmp_mv;
2671 last_besterr[id] = bestsme;
2672 } else {
2673 break;
2674 }
2675 }
2676
2677 *rate_mv = 0;
2678
2679 for (ref = 0; ref < 2; ++ref) {
2680 if (scaled_ref_frame[ref]) {
2681 // Restore the prediction frame pointers to their unscaled versions.
2682 int i;
2683 for (i = 0; i < MAX_MB_PLANE; i++)
2684 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2685 }
2686
2687 *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2688 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
2689 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2690 }
2691}
2692
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002693static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002694 int_mv *best_ref_mv,
2695 int_mv *second_best_ref_mv,
2696 int64_t best_rd, int *returntotrate,
2697 int *returnyrate,
2698 int64_t *returndistortion,
2699 int *skippable, int64_t *psse,
2700 int mvthresh,
2701 int_mv seg_mvs[4][MAX_REF_FRAMES],
2702 BEST_SEG_INFO *bsi_buf, int filter_idx,
2703 int mi_row, int mi_col) {
2704 int i;
2705 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2706 MACROBLOCKD *xd = &x->e_mbd;
2707 MODE_INFO *mi = xd->mi[0];
2708 MB_MODE_INFO *mbmi = &mi->mbmi;
2709 int mode_idx;
2710 int k, br = 0, idx, idy;
2711 int64_t bd = 0, block_sse = 0;
2712 PREDICTION_MODE this_mode;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07002713 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07002714 struct macroblock_plane *const p = &x->plane[0];
2715 struct macroblockd_plane *const pd = &xd->plane[0];
2716 const int label_count = 4;
2717 int64_t this_segment_rd = 0;
2718 int label_mv_thresh;
2719 int segmentyrate = 0;
2720 const BLOCK_SIZE bsize = mbmi->sb_type;
2721 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2722 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2723 ENTROPY_CONTEXT t_above[2], t_left[2];
2724 int subpelmv = 1, have_ref = 0;
2725 const int has_second_rf = has_second_ref(mbmi);
2726 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
2727 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2728
2729 vp10_zero(*bsi);
2730
2731 bsi->segment_rd = best_rd;
2732 bsi->ref_mv[0] = best_ref_mv;
2733 bsi->ref_mv[1] = second_best_ref_mv;
2734 bsi->mvp.as_int = best_ref_mv->as_int;
2735 bsi->mvthresh = mvthresh;
2736
2737 for (i = 0; i < 4; i++)
2738 bsi->modes[i] = ZEROMV;
2739
2740 memcpy(t_above, pd->above_context, sizeof(t_above));
2741 memcpy(t_left, pd->left_context, sizeof(t_left));
2742
2743 // 64 makes this threshold really big effectively
2744 // making it so that we very rarely check mvs on
2745 // segments. setting this to 1 would make mv thresh
2746 // roughly equal to what it is for macroblocks
2747 label_mv_thresh = 1 * bsi->mvthresh / label_count;
2748
2749 // Segmentation method overheads
2750 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
2751 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
2752 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
2753 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
2754 int_mv mode_mv[MB_MODE_COUNT][2];
2755 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2756 PREDICTION_MODE mode_selected = ZEROMV;
2757 int64_t best_rd = INT64_MAX;
2758 const int i = idy * 2 + idx;
2759 int ref;
2760
2761 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2762 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
2763 frame_mv[ZEROMV][frame].as_int = 0;
2764 vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
2765 &frame_mv[NEARESTMV][frame],
2766 &frame_mv[NEARMV][frame],
2767 mbmi_ext->mode_context);
2768 }
2769
2770 // search for the best motion vector on this segment
2771 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
2772 const struct buf_2d orig_src = x->plane[0].src;
2773 struct buf_2d orig_pre[2];
2774
2775 mode_idx = INTER_OFFSET(this_mode);
2776 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
2777 if (!(inter_mode_mask & (1 << this_mode)))
2778 continue;
2779
2780 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
2781 this_mode, mbmi->ref_frame))
2782 continue;
2783
2784 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
2785 memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
2786 sizeof(bsi->rdstat[i][mode_idx].ta));
2787 memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
2788 sizeof(bsi->rdstat[i][mode_idx].tl));
2789
2790 // motion search for newmv (single predictor case only)
2791 if (!has_second_rf && this_mode == NEWMV &&
2792 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
2793 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
2794 int step_param = 0;
2795 int thissme, bestsme = INT_MAX;
2796 int sadpb = x->sadperbit4;
2797 MV mvp_full;
2798 int max_mv;
2799 int cost_list[5];
2800
2801 /* Is the best so far sufficiently good that we cant justify doing
2802 * and new motion search. */
2803 if (best_rd < label_mv_thresh)
2804 break;
2805
2806 if (cpi->oxcf.mode != BEST) {
2807 // use previous block's result as next block's MV predictor.
2808 if (i > 0) {
2809 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
2810 if (i == 2)
2811 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
2812 }
2813 }
2814 if (i == 0)
2815 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
2816 else
James Zern5e16d392015-08-17 18:19:22 -07002817 max_mv =
2818 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
Jingning Han3ee6db62015-08-05 19:00:31 -07002819
2820 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2821 // Take wtd average of the step_params based on the last frame's
2822 // max mv magnitude and the best ref mvs of the current block for
2823 // the given reference.
2824 step_param = (vp10_init_search_range(max_mv) +
2825 cpi->mv_step_param) / 2;
2826 } else {
2827 step_param = cpi->mv_step_param;
2828 }
2829
2830 mvp_full.row = bsi->mvp.as_mv.row >> 3;
2831 mvp_full.col = bsi->mvp.as_mv.col >> 3;
2832
2833 if (cpi->sf.adaptive_motion_search) {
2834 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
2835 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
James Zern5e16d392015-08-17 18:19:22 -07002836 step_param = VPXMAX(step_param, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002837 }
2838
2839 // adjust src pointer for this block
2840 mi_buf_shift(x, i);
2841
2842 vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
2843
2844 bestsme = vp10_full_pixel_search(
2845 cpi, x, bsize, &mvp_full, step_param, sadpb,
2846 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2847 &bsi->ref_mv[0]->as_mv, new_mv,
2848 INT_MAX, 1);
2849
2850 // Should we do a full search (best quality only)
2851 if (cpi->oxcf.mode == BEST) {
2852 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
2853 /* Check if mvp_full is within the range. */
2854 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
2855 x->mv_row_min, x->mv_row_max);
2856 thissme = cpi->full_search_sad(x, &mvp_full,
2857 sadpb, 16, &cpi->fn_ptr[bsize],
2858 &bsi->ref_mv[0]->as_mv,
2859 &best_mv->as_mv);
2860 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
2861 if (thissme < bestsme) {
2862 bestsme = thissme;
2863 *new_mv = best_mv->as_mv;
2864 } else {
2865 // The full search result is actually worse so re-instate the
2866 // previous best vector
2867 best_mv->as_mv = *new_mv;
2868 }
2869 }
2870
2871 if (bestsme < INT_MAX) {
2872 int distortion;
2873 cpi->find_fractional_mv_step(
2874 x,
2875 new_mv,
2876 &bsi->ref_mv[0]->as_mv,
2877 cm->allow_high_precision_mv,
2878 x->errorperbit, &cpi->fn_ptr[bsize],
2879 cpi->sf.mv.subpel_force_stop,
2880 cpi->sf.mv.subpel_iters_per_step,
2881 cond_cost_list(cpi, cost_list),
2882 x->nmvjointcost, x->mvcost,
2883 &distortion,
2884 &x->pred_sse[mbmi->ref_frame[0]],
2885 NULL, 0, 0);
2886
2887 // save motion search result for use in compound prediction
2888 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
2889 }
2890
2891 if (cpi->sf.adaptive_motion_search)
2892 x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
2893
2894 // restore src pointers
2895 mi_buf_restore(x, orig_src, orig_pre);
2896 }
2897
2898 if (has_second_rf) {
2899 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
2900 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
2901 continue;
2902 }
2903
2904 if (has_second_rf && this_mode == NEWMV &&
2905 mbmi->interp_filter == EIGHTTAP) {
2906 // adjust src pointers
2907 mi_buf_shift(x, i);
2908 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2909 int rate_mv;
2910 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
2911 mi_row, mi_col, seg_mvs[i],
2912 &rate_mv);
2913 seg_mvs[i][mbmi->ref_frame[0]].as_int =
2914 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
2915 seg_mvs[i][mbmi->ref_frame[1]].as_int =
2916 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
2917 }
2918 // restore src pointers
2919 mi_buf_restore(x, orig_src, orig_pre);
2920 }
2921
2922 bsi->rdstat[i][mode_idx].brate =
2923 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
2924 frame_mv, seg_mvs[i], bsi->ref_mv,
2925 x->nmvjointcost, x->mvcost);
2926
2927 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2928 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
2929 mode_mv[this_mode][ref].as_int;
2930 if (num_4x4_blocks_wide > 1)
2931 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
2932 mode_mv[this_mode][ref].as_int;
2933 if (num_4x4_blocks_high > 1)
2934 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
2935 mode_mv[this_mode][ref].as_int;
2936 }
2937
2938 // Trap vectors that reach beyond the UMV borders
2939 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
2940 (has_second_rf &&
2941 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
2942 continue;
2943
2944 if (filter_idx > 0) {
2945 BEST_SEG_INFO *ref_bsi = bsi_buf;
2946 subpelmv = 0;
2947 have_ref = 1;
2948
2949 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2950 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2951 have_ref &= mode_mv[this_mode][ref].as_int ==
2952 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2953 }
2954
2955 if (filter_idx > 1 && !subpelmv && !have_ref) {
2956 ref_bsi = bsi_buf + 1;
2957 have_ref = 1;
2958 for (ref = 0; ref < 1 + has_second_rf; ++ref)
2959 have_ref &= mode_mv[this_mode][ref].as_int ==
2960 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2961 }
2962
2963 if (!subpelmv && have_ref &&
2964 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2965 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2966 sizeof(SEG_RDSTAT));
2967 if (num_4x4_blocks_wide > 1)
2968 bsi->rdstat[i + 1][mode_idx].eobs =
2969 ref_bsi->rdstat[i + 1][mode_idx].eobs;
2970 if (num_4x4_blocks_high > 1)
2971 bsi->rdstat[i + 2][mode_idx].eobs =
2972 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2973
2974 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2975 mode_selected = this_mode;
2976 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2977 }
2978 continue;
2979 }
2980 }
2981
2982 bsi->rdstat[i][mode_idx].brdcost =
2983 encode_inter_mb_segment(cpi, x,
2984 bsi->segment_rd - this_segment_rd, i,
2985 &bsi->rdstat[i][mode_idx].byrate,
2986 &bsi->rdstat[i][mode_idx].bdist,
2987 &bsi->rdstat[i][mode_idx].bsse,
2988 bsi->rdstat[i][mode_idx].ta,
2989 bsi->rdstat[i][mode_idx].tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002990 idy, idx,
Jingning Han3ee6db62015-08-05 19:00:31 -07002991 mi_row, mi_col);
2992 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2993 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2994 bsi->rdstat[i][mode_idx].brate, 0);
2995 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2996 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2997 if (num_4x4_blocks_wide > 1)
2998 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2999 if (num_4x4_blocks_high > 1)
3000 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
3001 }
3002
3003 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
3004 mode_selected = this_mode;
3005 best_rd = bsi->rdstat[i][mode_idx].brdcost;
3006 }
3007 } /*for each 4x4 mode*/
3008
3009 if (best_rd == INT64_MAX) {
3010 int iy, midx;
3011 for (iy = i + 1; iy < 4; ++iy)
3012 for (midx = 0; midx < INTER_MODES; ++midx)
3013 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3014 bsi->segment_rd = INT64_MAX;
3015 return INT64_MAX;
3016 }
3017
3018 mode_idx = INTER_OFFSET(mode_selected);
3019 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
3020 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
3021
3022 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
3023 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
3024 x->mvcost);
3025
3026 br += bsi->rdstat[i][mode_idx].brate;
3027 bd += bsi->rdstat[i][mode_idx].bdist;
3028 block_sse += bsi->rdstat[i][mode_idx].bsse;
3029 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
3030 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
3031
3032 if (this_segment_rd > bsi->segment_rd) {
3033 int iy, midx;
3034 for (iy = i + 1; iy < 4; ++iy)
3035 for (midx = 0; midx < INTER_MODES; ++midx)
3036 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
3037 bsi->segment_rd = INT64_MAX;
3038 return INT64_MAX;
3039 }
3040 }
3041 } /* for each label */
3042
3043 bsi->r = br;
3044 bsi->d = bd;
3045 bsi->segment_yrate = segmentyrate;
3046 bsi->segment_rd = this_segment_rd;
3047 bsi->sse = block_sse;
3048
3049 // update the coding decisions
3050 for (k = 0; k < 4; ++k)
3051 bsi->modes[k] = mi->bmi[k].as_mode;
3052
3053 if (bsi->segment_rd > best_rd)
3054 return INT64_MAX;
3055 /* set it to the best */
3056 for (i = 0; i < 4; i++) {
3057 mode_idx = INTER_OFFSET(bsi->modes[i]);
3058 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
3059 if (has_second_ref(mbmi))
3060 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
3061 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
3062 mi->bmi[i].as_mode = bsi->modes[i];
3063 }
3064
3065 /*
3066 * used to set mbmi->mv.as_int
3067 */
3068 *returntotrate = bsi->r;
3069 *returndistortion = bsi->d;
3070 *returnyrate = bsi->segment_yrate;
3071 *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
3072 *psse = bsi->sse;
3073 mbmi->mode = bsi->modes[3];
3074
3075 return bsi->segment_rd;
3076}
3077
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003078static void estimate_ref_frame_costs(const VP10_COMMON *cm,
Jingning Han3ee6db62015-08-05 19:00:31 -07003079 const MACROBLOCKD *xd,
3080 int segment_id,
3081 unsigned int *ref_costs_single,
3082 unsigned int *ref_costs_comp,
3083 vpx_prob *comp_mode_p) {
3084 int seg_ref_active = segfeature_active(&cm->seg, segment_id,
3085 SEG_LVL_REF_FRAME);
3086 if (seg_ref_active) {
3087 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
3088 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
3089 *comp_mode_p = 128;
3090 } else {
3091 vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
3092 vpx_prob comp_inter_p = 128;
3093
3094 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3095 comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
3096 *comp_mode_p = comp_inter_p;
3097 } else {
3098 *comp_mode_p = 128;
3099 }
3100
3101 ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
3102
3103 if (cm->reference_mode != COMPOUND_REFERENCE) {
3104 vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
3105 vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
3106 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3107
3108 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3109 base_cost += vp10_cost_bit(comp_inter_p, 0);
3110
3111 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
3112 ref_costs_single[ALTREF_FRAME] = base_cost;
3113 ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
3114 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3115 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3116 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
3117 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
3118 } else {
3119 ref_costs_single[LAST_FRAME] = 512;
3120 ref_costs_single[GOLDEN_FRAME] = 512;
3121 ref_costs_single[ALTREF_FRAME] = 512;
3122 }
3123 if (cm->reference_mode != SINGLE_REFERENCE) {
3124 vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
3125 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3126
3127 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3128 base_cost += vp10_cost_bit(comp_inter_p, 1);
3129
3130 ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
3131 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
3132 } else {
3133 ref_costs_comp[LAST_FRAME] = 512;
3134 ref_costs_comp[GOLDEN_FRAME] = 512;
3135 }
3136 }
3137}
3138
3139static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3140 int mode_index,
3141 int64_t comp_pred_diff[REFERENCE_MODES],
3142 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
3143 int skippable) {
3144 MACROBLOCKD *const xd = &x->e_mbd;
3145
3146 // Take a snapshot of the coding context so it can be
3147 // restored if we decide to encode this way
3148 ctx->skip = x->skip;
3149 ctx->skippable = skippable;
3150 ctx->best_mode_index = mode_index;
3151 ctx->mic = *xd->mi[0];
3152 ctx->mbmi_ext = *x->mbmi_ext;
3153 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
3154 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
3155 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
3156
3157 memcpy(ctx->best_filter_diff, best_filter_diff,
3158 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
3159}
3160
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003161static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003162 MV_REFERENCE_FRAME ref_frame,
3163 BLOCK_SIZE block_size,
3164 int mi_row, int mi_col,
3165 int_mv frame_nearest_mv[MAX_REF_FRAMES],
3166 int_mv frame_near_mv[MAX_REF_FRAMES],
3167 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003168 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003169 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
3170 MACROBLOCKD *const xd = &x->e_mbd;
3171 MODE_INFO *const mi = xd->mi[0];
3172 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
3173 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
3174 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3175
3176 assert(yv12 != NULL);
3177
3178 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
3179 // use the UV scaling factors.
3180 vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
3181
3182 // Gets an initial list of candidate vectors from neighbours and orders them
3183 vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
3184 NULL, NULL, mbmi_ext->mode_context);
3185
3186 // Candidate refinement carried out at encoder and decoder
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003187 vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
3188 &frame_nearest_mv[ref_frame],
3189 &frame_near_mv[ref_frame]);
Jingning Han3ee6db62015-08-05 19:00:31 -07003190
3191 // Further refinement that is encode side only to test the top few candidates
3192 // in full and choose the best as the centre point for subsequent searches.
3193 // The current implementation doesn't support scaling.
3194 if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
3195 vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
3196 ref_frame, block_size);
3197}
3198
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003199static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003200 BLOCK_SIZE bsize,
3201 int mi_row, int mi_col,
3202 int_mv *tmp_mv, int *rate_mv) {
3203 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003204 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003205 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3206 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
3207 int bestsme = INT_MAX;
3208 int step_param;
3209 int sadpb = x->sadperbit16;
3210 MV mvp_full;
3211 int ref = mbmi->ref_frame[0];
3212 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3213
3214 int tmp_col_min = x->mv_col_min;
3215 int tmp_col_max = x->mv_col_max;
3216 int tmp_row_min = x->mv_row_min;
3217 int tmp_row_max = x->mv_row_max;
3218 int cost_list[5];
3219
3220 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
3221 ref);
3222
3223 MV pred_mv[3];
3224 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3225 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
3226 pred_mv[2] = x->pred_mv[ref];
3227
3228 if (scaled_ref_frame) {
3229 int i;
3230 // Swap out the reference frame for a version that's been scaled to
3231 // match the resolution of the current frame, allowing the existing
3232 // motion search code to be used without additional modifications.
3233 for (i = 0; i < MAX_MB_PLANE; i++)
3234 backup_yv12[i] = xd->plane[i].pre[0];
3235
3236 vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
3237 }
3238
3239 vp10_set_mv_search_range(x, &ref_mv);
3240
3241 // Work out the size of the first step in the mv step search.
James Zern5e16d392015-08-17 18:19:22 -07003242 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
Jingning Han3ee6db62015-08-05 19:00:31 -07003243 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3244 // Take wtd average of the step_params based on the last frame's
3245 // max mv magnitude and that based on the best ref mvs of the current
3246 // block for the given reference.
3247 step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
3248 cpi->mv_step_param) / 2;
3249 } else {
3250 step_param = cpi->mv_step_param;
3251 }
3252
3253 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
James Zern5e16d392015-08-17 18:19:22 -07003254 int boffset =
3255 2 * (b_width_log2_lookup[BLOCK_64X64] -
3256 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
3257 step_param = VPXMAX(step_param, boffset);
Jingning Han3ee6db62015-08-05 19:00:31 -07003258 }
3259
3260 if (cpi->sf.adaptive_motion_search) {
3261 int bwl = b_width_log2_lookup[bsize];
3262 int bhl = b_height_log2_lookup[bsize];
3263 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
3264
3265 if (tlevel < 5)
3266 step_param += 2;
3267
3268 // prev_mv_sad is not setup for dynamically scaled frames.
3269 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
3270 int i;
3271 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
3272 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
3273 x->pred_mv[ref].row = 0;
3274 x->pred_mv[ref].col = 0;
3275 tmp_mv->as_int = INVALID_MV;
3276
3277 if (scaled_ref_frame) {
3278 int i;
3279 for (i = 0; i < MAX_MB_PLANE; ++i)
3280 xd->plane[i].pre[0] = backup_yv12[i];
3281 }
3282 return;
3283 }
3284 }
3285 }
3286 }
3287
3288 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
3289
3290 mvp_full.col >>= 3;
3291 mvp_full.row >>= 3;
3292
3293 bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
3294 cond_cost_list(cpi, cost_list),
3295 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
3296
3297 x->mv_col_min = tmp_col_min;
3298 x->mv_col_max = tmp_col_max;
3299 x->mv_row_min = tmp_row_min;
3300 x->mv_row_max = tmp_row_max;
3301
3302 if (bestsme < INT_MAX) {
3303 int dis; /* TODO: use dis in distortion calculation later. */
3304 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
3305 cm->allow_high_precision_mv,
3306 x->errorperbit,
3307 &cpi->fn_ptr[bsize],
3308 cpi->sf.mv.subpel_force_stop,
3309 cpi->sf.mv.subpel_iters_per_step,
3310 cond_cost_list(cpi, cost_list),
3311 x->nmvjointcost, x->mvcost,
3312 &dis, &x->pred_sse[ref], NULL, 0, 0);
3313 }
3314 *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
3315 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3316
3317 if (cpi->sf.adaptive_motion_search)
3318 x->pred_mv[ref] = tmp_mv->as_mv;
3319
3320 if (scaled_ref_frame) {
3321 int i;
3322 for (i = 0; i < MAX_MB_PLANE; i++)
3323 xd->plane[i].pre[0] = backup_yv12[i];
3324 }
3325}
3326
3327
3328
3329static INLINE void restore_dst_buf(MACROBLOCKD *xd,
3330 uint8_t *orig_dst[MAX_MB_PLANE],
3331 int orig_dst_stride[MAX_MB_PLANE]) {
3332 int i;
3333 for (i = 0; i < MAX_MB_PLANE; i++) {
3334 xd->plane[i].dst.buf = orig_dst[i];
3335 xd->plane[i].dst.stride = orig_dst_stride[i];
3336 }
3337}
3338
3339// In some situations we want to discount tha pparent cost of a new motion
3340// vector. Where there is a subtle motion field and especially where there is
3341// low spatial complexity then it can be hard to cover the cost of a new motion
3342// vector in a single block, even if that motion vector reduces distortion.
3343// However, once established that vector may be usable through the nearest and
3344// near mv modes to reduce distortion in subsequent blocks and also improve
3345// visual quality.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003346static int discount_newmv_test(const VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003347 int this_mode,
3348 int_mv this_mv,
3349 int_mv (*mode_mv)[MAX_REF_FRAMES],
3350 int ref_frame) {
3351 return (!cpi->rc.is_src_frame_alt_ref &&
3352 (this_mode == NEWMV) &&
3353 (this_mv.as_int != 0) &&
3354 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
3355 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
3356 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
3357 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
3358}
3359
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003360#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
3361#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
3362 VP9_INTERP_EXTEND) << 3)
3363
3364// TODO(jingning): this mv clamping function should be block size dependent.
3365static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
3366 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
3367 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
3368 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
3369 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
3370}
3371
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003372static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003373 BLOCK_SIZE bsize,
3374 int *rate2, int64_t *distortion,
3375 int *skippable,
3376 int *rate_y, int *rate_uv,
3377 int *disable_skip,
3378 int_mv (*mode_mv)[MAX_REF_FRAMES],
3379 int mi_row, int mi_col,
3380 int_mv single_newmv[MAX_REF_FRAMES],
3381 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
3382 int (*single_skippable)[MAX_REF_FRAMES],
3383 int64_t *psse,
3384 const int64_t ref_best_rd,
3385 int64_t *mask_filter,
3386 int64_t filter_cache[]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003387 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003388 MACROBLOCKD *xd = &x->e_mbd;
3389 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3390 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3391 const int is_comp_pred = has_second_ref(mbmi);
3392 const int this_mode = mbmi->mode;
3393 int_mv *frame_mv = mode_mv[this_mode];
3394 int i;
3395 int refs[2] = { mbmi->ref_frame[0],
3396 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3397 int_mv cur_mv[2];
3398#if CONFIG_VP9_HIGHBITDEPTH
3399 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
3400 uint8_t *tmp_buf;
3401#else
3402 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
3403#endif // CONFIG_VP9_HIGHBITDEPTH
3404 int pred_exists = 0;
3405 int intpel_mv;
3406 int64_t rd, tmp_rd, best_rd = INT64_MAX;
3407 int best_needs_copy = 0;
3408 uint8_t *orig_dst[MAX_MB_PLANE];
3409 int orig_dst_stride[MAX_MB_PLANE];
3410 int rs = 0;
3411 INTERP_FILTER best_filter = SWITCHABLE;
3412 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
3413 int64_t bsse[MAX_MB_PLANE << 2] = {0};
3414
3415 int bsl = mi_width_log2_lookup[bsize];
3416 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
3417 (((mi_row + mi_col) >> bsl) +
3418 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
3419
3420 int skip_txfm_sb = 0;
3421 int64_t skip_sse_sb = INT64_MAX;
3422 int64_t distortion_y = 0, distortion_uv = 0;
3423
3424#if CONFIG_VP9_HIGHBITDEPTH
3425 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3426 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
3427 } else {
3428 tmp_buf = (uint8_t *)tmp_buf16;
3429 }
3430#endif // CONFIG_VP9_HIGHBITDEPTH
3431
3432 if (pred_filter_search) {
3433 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
3434 if (xd->up_available)
3435 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
3436 if (xd->left_available)
3437 lf = xd->mi[-1]->mbmi.interp_filter;
3438
3439 if ((this_mode != NEWMV) || (af == lf))
3440 best_filter = af;
3441 }
3442
3443 if (is_comp_pred) {
3444 if (frame_mv[refs[0]].as_int == INVALID_MV ||
3445 frame_mv[refs[1]].as_int == INVALID_MV)
3446 return INT64_MAX;
3447
3448 if (cpi->sf.adaptive_mode_search) {
3449 if (single_filter[this_mode][refs[0]] ==
3450 single_filter[this_mode][refs[1]])
3451 best_filter = single_filter[this_mode][refs[0]];
3452 }
3453 }
3454
3455 if (this_mode == NEWMV) {
3456 int rate_mv;
3457 if (is_comp_pred) {
3458 // Initialize mv using single prediction mode result.
3459 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
3460 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
3461
3462 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
3463 joint_motion_search(cpi, x, bsize, frame_mv,
3464 mi_row, mi_col, single_newmv, &rate_mv);
3465 } else {
3466 rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
3467 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
3468 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3469 rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
3470 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
3471 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3472 }
3473 *rate2 += rate_mv;
3474 } else {
3475 int_mv tmp_mv;
3476 single_motion_search(cpi, x, bsize, mi_row, mi_col,
3477 &tmp_mv, &rate_mv);
3478 if (tmp_mv.as_int == INVALID_MV)
3479 return INT64_MAX;
3480
3481 frame_mv[refs[0]].as_int =
3482 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
3483 single_newmv[refs[0]].as_int = tmp_mv.as_int;
3484
3485 // Estimate the rate implications of a new mv but discount this
3486 // under certain circumstances where we want to help initiate a weak
3487 // motion field, where the distortion gain for a single block may not
3488 // be enough to overcome the cost of a new mv.
3489 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07003490 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07003491 } else {
3492 *rate2 += rate_mv;
3493 }
3494 }
3495 }
3496
3497 for (i = 0; i < is_comp_pred + 1; ++i) {
3498 cur_mv[i] = frame_mv[refs[i]];
3499 // Clip "next_nearest" so that it does not extend to far out of image
3500 if (this_mode != NEWMV)
3501 clamp_mv2(&cur_mv[i].as_mv, xd);
3502
3503 if (mv_check_bounds(x, &cur_mv[i].as_mv))
3504 return INT64_MAX;
3505 mbmi->mv[i].as_int = cur_mv[i].as_int;
3506 }
3507
3508 // do first prediction into the destination buffer. Do the next
3509 // prediction into a temporary buffer. Then keep track of which one
3510 // of these currently holds the best predictor, and use the other
3511 // one for future predictions. In the end, copy from tmp_buf to
3512 // dst if necessary.
3513 for (i = 0; i < MAX_MB_PLANE; i++) {
3514 orig_dst[i] = xd->plane[i].dst.buf;
3515 orig_dst_stride[i] = xd->plane[i].dst.stride;
3516 }
3517
3518 // We don't include the cost of the second reference here, because there
3519 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
3520 // words if you present them in that order, the second one is always known
3521 // if the first is known.
3522 //
3523 // Under some circumstances we discount the cost of new mv mode to encourage
3524 // initiation of a motion field.
3525 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
3526 mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07003527 *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
3528 mbmi_ext->mode_context[refs[0]]),
3529 cost_mv_ref(cpi, NEARESTMV,
3530 mbmi_ext->mode_context[refs[0]]));
Jingning Han3ee6db62015-08-05 19:00:31 -07003531 } else {
3532 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
3533 }
3534
3535 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
3536 mbmi->mode != NEARESTMV)
3537 return INT64_MAX;
3538
3539 pred_exists = 0;
3540 // Are all MVs integer pel for Y and UV
3541 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
3542 if (is_comp_pred)
3543 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
3544
3545 // Search for best switchable filter by checking the variance of
3546 // pred error irrespective of whether the filter will be used
3547 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3548 filter_cache[i] = INT64_MAX;
3549
3550 if (cm->interp_filter != BILINEAR) {
3551 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3552 best_filter = EIGHTTAP;
3553 } else if (best_filter == SWITCHABLE) {
3554 int newbest;
3555 int tmp_rate_sum = 0;
3556 int64_t tmp_dist_sum = 0;
3557
3558 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3559 int j;
3560 int64_t rs_rd;
3561 int tmp_skip_sb = 0;
3562 int64_t tmp_skip_sse = INT64_MAX;
3563
3564 mbmi->interp_filter = i;
3565 rs = vp10_get_switchable_rate(cpi, xd);
3566 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3567
3568 if (i > 0 && intpel_mv) {
3569 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
3570 filter_cache[i] = rd;
3571 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07003572 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003573 if (cm->interp_filter == SWITCHABLE)
3574 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07003575 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003576 } else {
3577 int rate_sum = 0;
3578 int64_t dist_sum = 0;
3579 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
3580 (cpi->sf.interp_filter_search_mask & (1 << i))) {
3581 rate_sum = INT_MAX;
3582 dist_sum = INT64_MAX;
3583 continue;
3584 }
3585
3586 if ((cm->interp_filter == SWITCHABLE &&
3587 (!i || best_needs_copy)) ||
3588 (cm->interp_filter != SWITCHABLE &&
3589 (cm->interp_filter == mbmi->interp_filter ||
3590 (i == 0 && intpel_mv)))) {
3591 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3592 } else {
3593 for (j = 0; j < MAX_MB_PLANE; j++) {
3594 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
3595 xd->plane[j].dst.stride = 64;
3596 }
3597 }
3598 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
3599 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
3600 &tmp_skip_sb, &tmp_skip_sse);
3601
3602 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
3603 filter_cache[i] = rd;
3604 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07003605 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003606 if (cm->interp_filter == SWITCHABLE)
3607 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07003608 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003609
3610 if (i == 0 && intpel_mv) {
3611 tmp_rate_sum = rate_sum;
3612 tmp_dist_sum = dist_sum;
3613 }
3614 }
3615
3616 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3617 if (rd / 2 > ref_best_rd) {
3618 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3619 return INT64_MAX;
3620 }
3621 }
3622 newbest = i == 0 || rd < best_rd;
3623
3624 if (newbest) {
3625 best_rd = rd;
3626 best_filter = mbmi->interp_filter;
3627 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
3628 best_needs_copy = !best_needs_copy;
3629 }
3630
3631 if ((cm->interp_filter == SWITCHABLE && newbest) ||
3632 (cm->interp_filter != SWITCHABLE &&
3633 cm->interp_filter == mbmi->interp_filter)) {
3634 pred_exists = 1;
3635 tmp_rd = best_rd;
3636
3637 skip_txfm_sb = tmp_skip_sb;
3638 skip_sse_sb = tmp_skip_sse;
3639 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3640 memcpy(bsse, x->bsse, sizeof(bsse));
3641 }
3642 }
3643 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3644 }
3645 }
3646 // Set the appropriate filter
3647 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
3648 cm->interp_filter : best_filter;
3649 rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
3650
3651 if (pred_exists) {
3652 if (best_needs_copy) {
3653 // again temporarily set the buffers to local memory to prevent a memcpy
3654 for (i = 0; i < MAX_MB_PLANE; i++) {
3655 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
3656 xd->plane[i].dst.stride = 64;
3657 }
3658 }
3659 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
3660 } else {
3661 int tmp_rate;
3662 int64_t tmp_dist;
3663 // Handles the special case when a filter that is not in the
3664 // switchable list (ex. bilinear) is indicated at the frame level, or
3665 // skip condition holds.
3666 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
3667 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
3668 &skip_txfm_sb, &skip_sse_sb);
3669 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
3670 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3671 memcpy(bsse, x->bsse, sizeof(bsse));
3672 }
3673
3674 if (!is_comp_pred)
3675 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
3676
3677 if (cpi->sf.adaptive_mode_search)
3678 if (is_comp_pred)
3679 if (single_skippable[this_mode][refs[0]] &&
3680 single_skippable[this_mode][refs[1]])
3681 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
3682
3683 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3684 // if current pred_error modeled rd is substantially more than the best
3685 // so far, do not bother doing full rd
3686 if (rd / 2 > ref_best_rd) {
3687 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3688 return INT64_MAX;
3689 }
3690 }
3691
3692 if (cm->interp_filter == SWITCHABLE)
3693 *rate2 += rs;
3694
3695 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
3696 memcpy(x->bsse, bsse, sizeof(bsse));
3697
3698 if (!skip_txfm_sb) {
3699 int skippable_y, skippable_uv;
3700 int64_t sseuv = INT64_MAX;
3701 int64_t rdcosty = INT64_MAX;
3702
3703 // Y cost and distortion
3704 vp10_subtract_plane(x, bsize, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07003705#if CONFIG_VAR_TX
Jingning Hanf0dee772015-10-26 12:32:30 -07003706 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
Jingning Han2cdc1272015-10-09 09:57:42 -07003707 inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3708 bsize, ref_best_rd);
3709 } else {
3710 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3711 bsize, ref_best_rd);
3712 for (i = 0; i < 64; ++i)
3713 mbmi->inter_tx_size[i] = mbmi->tx_size;
3714 }
3715#else
Jingning Han3ee6db62015-08-05 19:00:31 -07003716 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3717 bsize, ref_best_rd);
Jingning Han704985e2015-10-08 12:05:03 -07003718#endif
3719
Jingning Han3ee6db62015-08-05 19:00:31 -07003720 if (*rate_y == INT_MAX) {
3721 *rate2 = INT_MAX;
3722 *distortion = INT64_MAX;
3723 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3724 return INT64_MAX;
3725 }
3726
3727 *rate2 += *rate_y;
3728 *distortion += distortion_y;
3729
3730 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
James Zern5e16d392015-08-17 18:19:22 -07003731 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
Jingning Han3ee6db62015-08-05 19:00:31 -07003732
Jingning Hana8dad552015-10-08 16:46:10 -07003733#if CONFIG_VAR_TX
3734 if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
3735 &sseuv, bsize, ref_best_rd - rdcosty)) {
3736#else
Jingning Han3ee6db62015-08-05 19:00:31 -07003737 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
3738 &sseuv, bsize, ref_best_rd - rdcosty)) {
Jingning Hana8dad552015-10-08 16:46:10 -07003739#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07003740 *rate2 = INT_MAX;
3741 *distortion = INT64_MAX;
3742 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3743 return INT64_MAX;
3744 }
3745
3746 *psse += sseuv;
3747 *rate2 += *rate_uv;
3748 *distortion += distortion_uv;
3749 *skippable = skippable_y && skippable_uv;
3750 } else {
3751 x->skip = 1;
3752 *disable_skip = 1;
3753
3754 // The cost of skip bit needs to be added.
3755 *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3756
3757 *distortion = skip_sse_sb;
3758 }
3759
3760 if (!is_comp_pred)
3761 single_skippable[this_mode][refs[0]] = *skippable;
3762
3763 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3764 return 0; // The rate-distortion cost will be re-calculated by caller.
3765}
3766
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003767void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003768 RD_COST *rd_cost, BLOCK_SIZE bsize,
3769 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003770 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003771 MACROBLOCKD *const xd = &x->e_mbd;
3772 struct macroblockd_plane *const pd = xd->plane;
3773 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3774 int y_skip = 0, uv_skip = 0;
3775 int64_t dist_y = 0, dist_uv = 0;
3776 TX_SIZE max_uv_tx_size;
Jingning Han3ee6db62015-08-05 19:00:31 -07003777 ctx->skip = 0;
3778 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3779 xd->mi[0]->mbmi.ref_frame[1] = NONE;
3780
3781 if (bsize >= BLOCK_8X8) {
3782 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3783 &dist_y, &y_skip, bsize,
3784 best_rd) >= best_rd) {
3785 rd_cost->rate = INT_MAX;
3786 return;
3787 }
3788 } else {
3789 y_skip = 0;
3790 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3791 &dist_y, best_rd) >= best_rd) {
3792 rd_cost->rate = INT_MAX;
3793 return;
3794 }
3795 }
3796 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
3797 pd[1].subsampling_x,
3798 pd[1].subsampling_y);
3799 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
James Zern5e16d392015-08-17 18:19:22 -07003800 &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
Jingning Han3ee6db62015-08-05 19:00:31 -07003801 max_uv_tx_size);
3802
3803 if (y_skip && uv_skip) {
3804 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3805 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3806 rd_cost->dist = dist_y + dist_uv;
3807 } else {
3808 rd_cost->rate = rate_y + rate_uv +
3809 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
3810 rd_cost->dist = dist_y + dist_uv;
3811 }
3812
3813 ctx->mic = *xd->mi[0];
3814 ctx->mbmi_ext = *x->mbmi_ext;
3815 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
3816}
3817
3818// This function is designed to apply a bias or adjustment to an rd value based
3819// on the relative variance of the source and reconstruction.
3820#define LOW_VAR_THRESH 16
3821#define VLOW_ADJ_MAX 25
3822#define VHIGH_ADJ_MAX 8
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003823static void rd_variance_adjustment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003824 MACROBLOCK *x,
3825 BLOCK_SIZE bsize,
3826 int64_t *this_rd,
3827 MV_REFERENCE_FRAME ref_frame,
3828 unsigned int source_variance) {
3829 MACROBLOCKD *const xd = &x->e_mbd;
3830 unsigned int recon_variance;
3831 unsigned int absvar_diff = 0;
3832 int64_t var_error = 0;
3833 int64_t var_factor = 0;
3834
3835 if (*this_rd == INT64_MAX)
3836 return;
3837
3838#if CONFIG_VP9_HIGHBITDEPTH
3839 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3840 recon_variance =
3841 vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
3842 } else {
3843 recon_variance =
3844 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
3845 }
3846#else
3847 recon_variance =
3848 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
3849#endif // CONFIG_VP9_HIGHBITDEPTH
3850
3851 if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
3852 absvar_diff = (source_variance > recon_variance)
3853 ? (source_variance - recon_variance)
3854 : (recon_variance - source_variance);
3855
3856 var_error = (200 * source_variance * recon_variance) /
3857 ((source_variance * source_variance) +
3858 (recon_variance * recon_variance));
3859 var_error = 100 - var_error;
3860 }
3861
3862 // Source variance above a threshold and ref frame is intra.
3863 // This case is targeted mainly at discouraging intra modes that give rise
3864 // to a predictor with a low spatial complexity compared to the source.
3865 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
3866 (source_variance > recon_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07003867 var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07003868 // A second possible case of interest is where the source variance
3869 // is very low and we wish to discourage false texture or motion trails.
3870 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
3871 (recon_variance > source_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07003872 var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07003873 }
3874 *this_rd += (*this_rd * var_factor) / 100;
3875}
3876
3877
3878// Do we have an internal image edge (e.g. formatting bars).
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003879int vp10_internal_image_edge(VP10_COMP *cpi) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003880 return (cpi->oxcf.pass == 2) &&
3881 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
3882 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
3883}
3884
3885// Checks to see if a super block is on a horizontal image edge.
3886// In most cases this is the "real" edge unless there are formatting
3887// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003888int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003889 int top_edge = 0;
3890 int bottom_edge = cpi->common.mi_rows;
3891 int is_active_h_edge = 0;
3892
3893 // For two pass account for any formatting bars detected.
3894 if (cpi->oxcf.pass == 2) {
3895 TWO_PASS *twopass = &cpi->twopass;
3896
3897 // The inactive region is specified in MBs not mi units.
3898 // The image edge is in the following MB row.
3899 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3900
3901 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
James Zern5e16d392015-08-17 18:19:22 -07003902 bottom_edge = VPXMAX(top_edge, bottom_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07003903 }
3904
3905 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
3906 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
3907 is_active_h_edge = 1;
3908 }
3909 return is_active_h_edge;
3910}
3911
3912// Checks to see if a super block is on a vertical image edge.
3913// In most cases this is the "real" edge unless there are formatting
3914// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003915int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003916 int left_edge = 0;
3917 int right_edge = cpi->common.mi_cols;
3918 int is_active_v_edge = 0;
3919
3920 // For two pass account for any formatting bars detected.
3921 if (cpi->oxcf.pass == 2) {
3922 TWO_PASS *twopass = &cpi->twopass;
3923
3924 // The inactive region is specified in MBs not mi units.
3925 // The image edge is in the following MB row.
3926 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3927
3928 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
James Zern5e16d392015-08-17 18:19:22 -07003929 right_edge = VPXMAX(left_edge, right_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07003930 }
3931
3932 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
3933 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
3934 is_active_v_edge = 1;
3935 }
3936 return is_active_v_edge;
3937}
3938
3939// Checks to see if a super block is at the edge of the active image.
3940// In most cases this is the "real" edge unless there are formatting
3941// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003942int vp10_active_edge_sb(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003943 int mi_row, int mi_col) {
3944 return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
3945 vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
3946}
3947
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003948void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
Jingning Han4fa8e732015-09-10 12:24:06 -07003949 TileDataEnc *tile_data,
3950 MACROBLOCK *x,
3951 int mi_row, int mi_col,
3952 RD_COST *rd_cost, BLOCK_SIZE bsize,
3953 PICK_MODE_CONTEXT *ctx,
3954 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003955 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003956 RD_OPT *const rd_opt = &cpi->rd;
3957 SPEED_FEATURES *const sf = &cpi->sf;
3958 MACROBLOCKD *const xd = &x->e_mbd;
3959 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3960 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3961 const struct segmentation *const seg = &cm->seg;
3962 PREDICTION_MODE this_mode;
3963 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3964 unsigned char segment_id = mbmi->segment_id;
3965 int comp_pred, i, k;
3966 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3967 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3968 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3969 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3970 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3971 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3972 VP9_ALT_FLAG };
3973 int64_t best_rd = best_rd_so_far;
3974 int64_t best_pred_diff[REFERENCE_MODES];
3975 int64_t best_pred_rd[REFERENCE_MODES];
3976 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3977 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3978 MB_MODE_INFO best_mbmode;
3979 int best_mode_skippable = 0;
3980 int midx, best_mode_index = -1;
3981 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3982 vpx_prob comp_mode_p;
3983 int64_t best_intra_rd = INT64_MAX;
3984 unsigned int best_pred_sse = UINT_MAX;
3985 PREDICTION_MODE best_intra_mode = DC_PRED;
3986 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3987 int64_t dist_uv[TX_SIZES];
3988 int skip_uv[TX_SIZES];
3989 PREDICTION_MODE mode_uv[TX_SIZES];
3990 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
3991 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3992 int best_skip2 = 0;
3993 uint8_t ref_frame_skip_mask[2] = { 0 };
3994 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3995 int mode_skip_start = sf->mode_skip_start + 1;
3996 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3997 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3998 int64_t mode_threshold[MAX_MODES];
3999 int *mode_map = tile_data->mode_map[bsize];
4000 const int mode_search_skip_flags = sf->mode_search_skip_flags;
4001 int64_t mask_filter = 0;
4002 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4003
4004 vp10_zero(best_mbmode);
4005
Jingning Han3ee6db62015-08-05 19:00:31 -07004006 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4007 filter_cache[i] = INT64_MAX;
4008
4009 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4010 &comp_mode_p);
4011
4012 for (i = 0; i < REFERENCE_MODES; ++i)
4013 best_pred_rd[i] = INT64_MAX;
4014 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4015 best_filter_rd[i] = INT64_MAX;
4016 for (i = 0; i < TX_SIZES; i++)
4017 rate_uv_intra[i] = INT_MAX;
4018 for (i = 0; i < MAX_REF_FRAMES; ++i)
4019 x->pred_sse[i] = INT_MAX;
4020 for (i = 0; i < MB_MODE_COUNT; ++i) {
4021 for (k = 0; k < MAX_REF_FRAMES; ++k) {
4022 single_inter_filter[i][k] = SWITCHABLE;
4023 single_skippable[i][k] = 0;
4024 }
4025 }
4026
4027 rd_cost->rate = INT_MAX;
4028
4029 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4030 x->pred_mv_sad[ref_frame] = INT_MAX;
4031 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
4032 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
4033 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4034 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
4035 }
4036 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4037 frame_mv[ZEROMV][ref_frame].as_int = 0;
4038 }
4039
4040 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
4041 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
4042 // Skip checking missing references in both single and compound reference
4043 // modes. Note that a mode will be skipped iff both reference frames
4044 // are masked out.
4045 ref_frame_skip_mask[0] |= (1 << ref_frame);
4046 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Jingning Han1eb760e2015-09-10 12:56:41 -07004047 } else {
Jingning Han3ee6db62015-08-05 19:00:31 -07004048 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
4049 // Skip fixed mv modes for poor references
4050 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
4051 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
4052 break;
4053 }
4054 }
4055 }
4056 // If the segment reference frame feature is enabled....
4057 // then do nothing if the current ref frame is not allowed..
4058 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4059 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4060 ref_frame_skip_mask[0] |= (1 << ref_frame);
4061 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4062 }
4063 }
4064
4065 // Disable this drop out case if the ref frame
4066 // segment level feature is enabled for this segment. This is to
4067 // prevent the possibility that we end up unable to pick any mode.
4068 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4069 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4070 // unless ARNR filtering is enabled in which case we want
4071 // an unfiltered alternative. We allow near/nearest as well
4072 // because they may result in zero-zero MVs but be cheaper.
4073 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
4074 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
4075 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4076 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4077 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
4078 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
4079 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
4080 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
4081 }
4082 }
4083
4084 if (cpi->rc.is_src_frame_alt_ref) {
4085 if (sf->alt_ref_search_fp) {
4086 mode_skip_mask[ALTREF_FRAME] = 0;
4087 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
4088 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4089 }
4090 }
4091
4092 if (sf->alt_ref_search_fp)
4093 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
4094 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
4095 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
4096
4097 if (sf->adaptive_mode_search) {
4098 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
4099 cpi->rc.frames_since_golden >= 3)
4100 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
4101 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
4102 }
4103
4104 if (bsize > sf->max_intra_bsize) {
4105 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
4106 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
4107 }
4108
4109 mode_skip_mask[INTRA_FRAME] |=
4110 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
4111
4112 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4113 mode_threshold[i] = 0;
4114 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
4115 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
4116
4117 midx = sf->schedule_mode_search ? mode_skip_start : 0;
4118 while (midx > 4) {
4119 uint8_t end_pos = 0;
4120 for (i = 5; i < midx; ++i) {
4121 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
4122 uint8_t tmp = mode_map[i];
4123 mode_map[i] = mode_map[i - 1];
4124 mode_map[i - 1] = tmp;
4125 end_pos = i;
4126 }
4127 }
4128 midx = end_pos;
4129 }
4130
hui su5d011cb2015-09-15 12:44:13 -07004131 mbmi->palette_mode_info.palette_size[0] = 0;
4132 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004133 for (midx = 0; midx < MAX_MODES; ++midx) {
4134 int mode_index = mode_map[midx];
4135 int mode_excluded = 0;
4136 int64_t this_rd = INT64_MAX;
4137 int disable_skip = 0;
4138 int compmode_cost = 0;
4139 int rate2 = 0, rate_y = 0, rate_uv = 0;
4140 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4141 int skippable = 0;
4142 int this_skip2 = 0;
4143 int64_t total_sse = INT64_MAX;
4144 int early_term = 0;
4145
4146 this_mode = vp10_mode_order[mode_index].mode;
4147 ref_frame = vp10_mode_order[mode_index].ref_frame[0];
4148 second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
4149
4150 // Look at the reference frame of the best mode so far and set the
4151 // skip mask to look at a subset of the remaining modes.
4152 if (midx == mode_skip_start && best_mode_index >= 0) {
4153 switch (best_mbmode.ref_frame[0]) {
4154 case INTRA_FRAME:
4155 break;
4156 case LAST_FRAME:
4157 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
4158 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4159 break;
4160 case GOLDEN_FRAME:
4161 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
4162 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4163 break;
4164 case ALTREF_FRAME:
4165 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
4166 break;
4167 case NONE:
4168 case MAX_REF_FRAMES:
4169 assert(0 && "Invalid Reference frame");
4170 break;
4171 }
4172 }
4173
4174 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004175 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004176 continue;
4177
4178 if (mode_skip_mask[ref_frame] & (1 << this_mode))
4179 continue;
4180
4181 // Test best rd so far against threshold for trying this mode.
4182 if (best_mode_skippable && sf->schedule_mode_search)
4183 mode_threshold[mode_index] <<= 1;
4184
4185 if (best_rd < mode_threshold[mode_index])
4186 continue;
4187
Jingning Han3ee6db62015-08-05 19:00:31 -07004188 comp_pred = second_ref_frame > INTRA_FRAME;
4189 if (comp_pred) {
4190 if (!cpi->allow_comp_inter_inter)
4191 continue;
4192
4193 // Skip compound inter modes if ARF is not available.
4194 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4195 continue;
4196
4197 // Do not allow compound prediction if the segment level reference frame
4198 // feature is in use as in this case there can only be one reference.
4199 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4200 continue;
4201
4202 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4203 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
4204 continue;
4205
4206 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4207 } else {
4208 if (ref_frame != INTRA_FRAME)
4209 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4210 }
4211
4212 if (ref_frame == INTRA_FRAME) {
4213 if (sf->adaptive_mode_search)
4214 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
4215 continue;
4216
4217 if (this_mode != DC_PRED) {
4218 // Disable intra modes other than DC_PRED for blocks with low variance
4219 // Threshold for intra skipping based on source variance
4220 // TODO(debargha): Specialize the threshold for super block sizes
4221 const unsigned int skip_intra_var_thresh = 64;
4222 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4223 x->source_variance < skip_intra_var_thresh)
4224 continue;
4225 // Only search the oblique modes if the best so far is
4226 // one of the neighboring directional modes
4227 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
4228 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
4229 if (best_mode_index >= 0 &&
4230 best_mbmode.ref_frame[0] > INTRA_FRAME)
4231 continue;
4232 }
4233 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
4234 if (conditional_skipintra(this_mode, best_intra_mode))
4235 continue;
4236 }
4237 }
4238 } else {
4239 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
4240 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
4241 this_mode, ref_frames))
4242 continue;
4243 }
4244
4245 mbmi->mode = this_mode;
4246 mbmi->uv_mode = DC_PRED;
4247 mbmi->ref_frame[0] = ref_frame;
4248 mbmi->ref_frame[1] = second_ref_frame;
4249 // Evaluate all sub-pel filters irrespective of whether we can use
4250 // them for this frame.
4251 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4252 : cm->interp_filter;
4253 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4254
4255 x->skip = 0;
4256 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4257
4258 // Select prediction reference frames.
4259 for (i = 0; i < MAX_MB_PLANE; i++) {
4260 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4261 if (comp_pred)
4262 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4263 }
4264
4265 if (ref_frame == INTRA_FRAME) {
4266 TX_SIZE uv_tx;
4267 struct macroblockd_plane *const pd = &xd->plane[1];
4268 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4269 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
4270 NULL, bsize, best_rd);
4271 if (rate_y == INT_MAX)
4272 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07004273 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
4274 pd->subsampling_y);
4275 if (rate_uv_intra[uv_tx] == INT_MAX) {
4276 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
4277 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
4278 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
4279 }
4280
4281 rate_uv = rate_uv_tokenonly[uv_tx];
4282 distortion_uv = dist_uv[uv_tx];
4283 skippable = skippable && skip_uv[uv_tx];
4284 mbmi->uv_mode = mode_uv[uv_tx];
4285
4286 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
4287 if (this_mode != DC_PRED && this_mode != TM_PRED)
4288 rate2 += intra_cost_penalty;
4289 distortion2 = distortion_y + distortion_uv;
4290 } else {
4291 this_rd = handle_inter_mode(cpi, x, bsize,
4292 &rate2, &distortion2, &skippable,
4293 &rate_y, &rate_uv,
4294 &disable_skip, frame_mv,
4295 mi_row, mi_col,
4296 single_newmv, single_inter_filter,
4297 single_skippable, &total_sse, best_rd,
4298 &mask_filter, filter_cache);
4299 if (this_rd == INT64_MAX)
4300 continue;
4301
4302 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4303
4304 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4305 rate2 += compmode_cost;
4306 }
4307
4308 // Estimate the reference frame signaling cost and add it
4309 // to the rolling cost variable.
4310 if (comp_pred) {
4311 rate2 += ref_costs_comp[ref_frame];
4312 } else {
4313 rate2 += ref_costs_single[ref_frame];
4314 }
4315
4316 if (!disable_skip) {
4317 if (skippable) {
4318 // Back out the coefficient coding costs
4319 rate2 -= (rate_y + rate_uv);
4320
4321 // Cost the skip mb case
4322 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04004323 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004324 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4325 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4326 // Add in the cost of the no skip flag.
4327 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4328 } else {
4329 // FIXME(rbultje) make this work for splitmv also
4330 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4331 distortion2 = total_sse;
4332 assert(total_sse >= 0);
4333 rate2 -= (rate_y + rate_uv);
4334 this_skip2 = 1;
4335 }
4336 } else {
4337 // Add in the cost of the no skip flag.
4338 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4339 }
4340
4341 // Calculate the final RD estimate for this mode.
4342 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4343 }
4344
4345 // Apply an adjustment to the rd value based on the similarity of the
4346 // source variance and reconstructed variance.
4347 rd_variance_adjustment(cpi, x, bsize, &this_rd,
4348 ref_frame, x->source_variance);
4349
4350 if (ref_frame == INTRA_FRAME) {
4351 // Keep record of best intra rd
4352 if (this_rd < best_intra_rd) {
4353 best_intra_rd = this_rd;
4354 best_intra_mode = mbmi->mode;
4355 }
4356 }
4357
4358 if (!disable_skip && ref_frame == INTRA_FRAME) {
4359 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07004360 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004361 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07004362 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004363 }
4364
4365 // Did this mode help.. i.e. is it the new best mode
4366 if (this_rd < best_rd || x->skip) {
4367 int max_plane = MAX_MB_PLANE;
4368 if (!mode_excluded) {
4369 // Note index of best mode so far
4370 best_mode_index = mode_index;
4371
4372 if (ref_frame == INTRA_FRAME) {
4373 /* required for left and above block mv */
4374 mbmi->mv[0].as_int = 0;
4375 max_plane = 1;
4376 } else {
4377 best_pred_sse = x->pred_sse[ref_frame];
4378 }
4379
4380 rd_cost->rate = rate2;
4381 rd_cost->dist = distortion2;
4382 rd_cost->rdcost = this_rd;
4383 best_rd = this_rd;
4384 best_mbmode = *mbmi;
4385 best_skip2 = this_skip2;
4386 best_mode_skippable = skippable;
4387
4388 if (!x->select_tx_size)
4389 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07004390
4391#if CONFIG_VAR_TX
4392 for (i = 0; i < MAX_MB_PLANE; ++i)
4393 memcpy(ctx->blk_skip[i], x->blk_skip[i],
4394 sizeof(uint8_t) * ctx->num_4x4_blk);
4395#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004396 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
hui su088b05f2015-08-12 10:41:51 -07004397 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07004398#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07004399
4400 // TODO(debargha): enhance this test with a better distortion prediction
4401 // based on qp, activity mask and history
4402 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4403 (mode_index > MIN_EARLY_TERM_INDEX)) {
4404 int qstep = xd->plane[0].dequant[1];
4405 // TODO(debargha): Enhance this by specializing for each mode_index
4406 int scale = 4;
4407#if CONFIG_VP9_HIGHBITDEPTH
4408 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4409 qstep >>= (xd->bd - 8);
4410 }
4411#endif // CONFIG_VP9_HIGHBITDEPTH
4412 if (x->source_variance < UINT_MAX) {
4413 const int var_adjust = (x->source_variance < 16);
4414 scale -= var_adjust;
4415 }
4416 if (ref_frame > INTRA_FRAME &&
4417 distortion2 * scale < qstep * qstep) {
4418 early_term = 1;
4419 }
4420 }
4421 }
4422 }
4423
4424 /* keep record of best compound/single-only prediction */
4425 if (!disable_skip && ref_frame != INTRA_FRAME) {
4426 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4427
4428 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4429 single_rate = rate2 - compmode_cost;
4430 hybrid_rate = rate2;
4431 } else {
4432 single_rate = rate2;
4433 hybrid_rate = rate2 + compmode_cost;
4434 }
4435
4436 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4437 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4438
4439 if (!comp_pred) {
4440 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
4441 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4442 } else {
4443 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
4444 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4445 }
4446 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4447 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4448
4449 /* keep record of best filter type */
4450 if (!mode_excluded && cm->interp_filter != BILINEAR) {
4451 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
4452 SWITCHABLE_FILTERS : cm->interp_filter];
4453
4454 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4455 int64_t adj_rd;
4456 if (ref == INT64_MAX)
4457 adj_rd = 0;
4458 else if (filter_cache[i] == INT64_MAX)
4459 // when early termination is triggered, the encoder does not have
4460 // access to the rate-distortion cost. it only knows that the cost
4461 // should be above the maximum valid value. hence it takes the known
4462 // maximum plus an arbitrary constant as the rate-distortion cost.
4463 adj_rd = mask_filter - ref + 10;
4464 else
4465 adj_rd = filter_cache[i] - ref;
4466
4467 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07004468 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004469 }
4470 }
4471 }
4472
4473 if (early_term)
4474 break;
4475
4476 if (x->skip && !comp_pred)
4477 break;
4478 }
4479
4480 // The inter modes' rate costs are not calculated precisely in some cases.
4481 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
4482 // ZEROMV. Here, checks are added for those cases, and the mode decisions
4483 // are corrected.
4484 if (best_mbmode.mode == NEWMV) {
4485 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
4486 best_mbmode.ref_frame[1]};
4487 int comp_pred_mode = refs[1] > INTRA_FRAME;
4488
4489 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4490 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
4491 best_mbmode.mv[1].as_int) || !comp_pred_mode))
4492 best_mbmode.mode = NEARESTMV;
4493 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4494 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
4495 best_mbmode.mv[1].as_int) || !comp_pred_mode))
4496 best_mbmode.mode = NEARMV;
4497 else if (best_mbmode.mv[0].as_int == 0 &&
4498 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
4499 best_mbmode.mode = ZEROMV;
4500 }
4501
4502 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
4503 rd_cost->rate = INT_MAX;
4504 rd_cost->rdcost = INT64_MAX;
4505 return;
4506 }
4507
4508 // If we used an estimate for the uv intra rd in the loop above...
4509 if (sf->use_uv_intra_rd_estimate) {
4510 // Do Intra UV best rd mode selection if best mode choice above was intra.
4511 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4512 TX_SIZE uv_tx_size;
4513 *mbmi = best_mbmode;
4514 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
4515 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4516 &rate_uv_tokenonly[uv_tx_size],
4517 &dist_uv[uv_tx_size],
4518 &skip_uv[uv_tx_size],
4519 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
4520 uv_tx_size);
4521 }
4522 }
4523
4524 assert((cm->interp_filter == SWITCHABLE) ||
4525 (cm->interp_filter == best_mbmode.interp_filter) ||
4526 !is_inter_block(&best_mbmode));
4527
4528 if (!cpi->rc.is_src_frame_alt_ref)
4529 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4530 sf->adaptive_rd_thresh, bsize, best_mode_index);
4531
4532 // macroblock modes
4533 *mbmi = best_mbmode;
4534 x->skip |= best_skip2;
4535
4536 for (i = 0; i < REFERENCE_MODES; ++i) {
4537 if (best_pred_rd[i] == INT64_MAX)
4538 best_pred_diff[i] = INT_MIN;
4539 else
4540 best_pred_diff[i] = best_rd - best_pred_rd[i];
4541 }
4542
4543 if (!x->skip) {
4544 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4545 if (best_filter_rd[i] == INT64_MAX)
4546 best_filter_diff[i] = 0;
4547 else
4548 best_filter_diff[i] = best_rd - best_filter_rd[i];
4549 }
4550 if (cm->interp_filter == SWITCHABLE)
4551 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4552 } else {
4553 vp10_zero(best_filter_diff);
4554 }
4555
4556 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
4557 // updating code causes PSNR loss. Need to figure out the confliction.
4558 x->skip |= best_mode_skippable;
4559
4560 if (!x->skip && !x->select_tx_size) {
4561 int has_high_freq_coeff = 0;
4562 int plane;
4563 int max_plane = is_inter_block(&xd->mi[0]->mbmi)
4564 ? MAX_MB_PLANE : 1;
4565 for (plane = 0; plane < max_plane; ++plane) {
4566 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
4567 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
4568 }
4569
4570 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
4571 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
4572 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
4573 }
4574
4575 best_mode_skippable |= !has_high_freq_coeff;
4576 }
4577
4578 assert(best_mode_index >= 0);
4579
4580 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
4581 best_filter_diff, best_mode_skippable);
4582}
4583
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004584void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004585 TileDataEnc *tile_data,
4586 MACROBLOCK *x,
4587 RD_COST *rd_cost,
4588 BLOCK_SIZE bsize,
4589 PICK_MODE_CONTEXT *ctx,
4590 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004591 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004592 MACROBLOCKD *const xd = &x->e_mbd;
4593 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4594 unsigned char segment_id = mbmi->segment_id;
4595 const int comp_pred = 0;
4596 int i;
4597 int64_t best_pred_diff[REFERENCE_MODES];
4598 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4599 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4600 vpx_prob comp_mode_p;
4601 INTERP_FILTER best_filter = SWITCHABLE;
4602 int64_t this_rd = INT64_MAX;
4603 int rate2 = 0;
4604 const int64_t distortion2 = 0;
4605
Jingning Han3ee6db62015-08-05 19:00:31 -07004606 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4607 &comp_mode_p);
4608
4609 for (i = 0; i < MAX_REF_FRAMES; ++i)
4610 x->pred_sse[i] = INT_MAX;
4611 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
4612 x->pred_mv_sad[i] = INT_MAX;
4613
4614 rd_cost->rate = INT_MAX;
4615
4616 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
4617
hui su5d011cb2015-09-15 12:44:13 -07004618 mbmi->palette_mode_info.palette_size[0] = 0;
4619 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004620 mbmi->mode = ZEROMV;
4621 mbmi->uv_mode = DC_PRED;
4622 mbmi->ref_frame[0] = LAST_FRAME;
4623 mbmi->ref_frame[1] = NONE;
4624 mbmi->mv[0].as_int = 0;
4625 x->skip = 1;
4626
4627 if (cm->interp_filter != BILINEAR) {
4628 best_filter = EIGHTTAP;
4629 if (cm->interp_filter == SWITCHABLE &&
4630 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
4631 int rs;
4632 int best_rs = INT_MAX;
4633 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
4634 mbmi->interp_filter = i;
4635 rs = vp10_get_switchable_rate(cpi, xd);
4636 if (rs < best_rs) {
4637 best_rs = rs;
4638 best_filter = mbmi->interp_filter;
4639 }
4640 }
4641 }
4642 }
4643 // Set the appropriate filter
4644 if (cm->interp_filter == SWITCHABLE) {
4645 mbmi->interp_filter = best_filter;
4646 rate2 += vp10_get_switchable_rate(cpi, xd);
4647 } else {
4648 mbmi->interp_filter = cm->interp_filter;
4649 }
4650
4651 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4652 rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
4653
4654 // Estimate the reference frame signaling cost and add it
4655 // to the rolling cost variable.
4656 rate2 += ref_costs_single[LAST_FRAME];
4657 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4658
4659 rd_cost->rate = rate2;
4660 rd_cost->dist = distortion2;
4661 rd_cost->rdcost = this_rd;
4662
4663 if (this_rd >= best_rd_so_far) {
4664 rd_cost->rate = INT_MAX;
4665 rd_cost->rdcost = INT64_MAX;
4666 return;
4667 }
4668
4669 assert((cm->interp_filter == SWITCHABLE) ||
4670 (cm->interp_filter == mbmi->interp_filter));
4671
4672 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4673 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
4674
4675 vp10_zero(best_pred_diff);
4676 vp10_zero(best_filter_diff);
4677
4678 if (!x->select_tx_size)
4679 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
4680 store_coding_context(x, ctx, THR_ZEROMV,
4681 best_pred_diff, best_filter_diff, 0);
4682}
4683
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004684void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004685 TileDataEnc *tile_data,
4686 MACROBLOCK *x,
4687 int mi_row, int mi_col,
4688 RD_COST *rd_cost,
4689 BLOCK_SIZE bsize,
4690 PICK_MODE_CONTEXT *ctx,
4691 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004692 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004693 RD_OPT *const rd_opt = &cpi->rd;
4694 SPEED_FEATURES *const sf = &cpi->sf;
4695 MACROBLOCKD *const xd = &x->e_mbd;
4696 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4697 const struct segmentation *const seg = &cm->seg;
4698 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
4699 unsigned char segment_id = mbmi->segment_id;
4700 int comp_pred, i;
4701 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
4702 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
4703 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
4704 VP9_ALT_FLAG };
4705 int64_t best_rd = best_rd_so_far;
4706 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
4707 int64_t best_pred_diff[REFERENCE_MODES];
4708 int64_t best_pred_rd[REFERENCE_MODES];
4709 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
4710 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4711 MB_MODE_INFO best_mbmode;
4712 int ref_index, best_ref_index = 0;
4713 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4714 vpx_prob comp_mode_p;
4715 INTERP_FILTER tmp_best_filter = SWITCHABLE;
4716 int rate_uv_intra, rate_uv_tokenonly;
4717 int64_t dist_uv;
4718 int skip_uv;
4719 PREDICTION_MODE mode_uv = DC_PRED;
4720 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
4721 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
4722 int_mv seg_mvs[4][MAX_REF_FRAMES];
4723 b_mode_info best_bmodes[4];
4724 int best_skip2 = 0;
4725 int ref_frame_skip_mask[2] = { 0 };
4726 int64_t mask_filter = 0;
4727 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4728 int internal_active_edge =
4729 vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
4730
Jingning Han3ee6db62015-08-05 19:00:31 -07004731 memset(x->zcoeff_blk[TX_4X4], 0, 4);
4732 vp10_zero(best_mbmode);
4733
4734 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4735 filter_cache[i] = INT64_MAX;
4736
4737 for (i = 0; i < 4; i++) {
4738 int j;
4739 for (j = 0; j < MAX_REF_FRAMES; j++)
4740 seg_mvs[i][j].as_int = INVALID_MV;
4741 }
4742
4743 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4744 &comp_mode_p);
4745
4746 for (i = 0; i < REFERENCE_MODES; ++i)
4747 best_pred_rd[i] = INT64_MAX;
4748 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4749 best_filter_rd[i] = INT64_MAX;
4750 rate_uv_intra = INT_MAX;
4751
4752 rd_cost->rate = INT_MAX;
4753
4754 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
4755 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
4756 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4757 frame_mv[NEARESTMV], frame_mv[NEARMV],
4758 yv12_mb);
4759 } else {
4760 ref_frame_skip_mask[0] |= (1 << ref_frame);
4761 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4762 }
4763 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4764 frame_mv[ZEROMV][ref_frame].as_int = 0;
4765 }
4766
hui su5d011cb2015-09-15 12:44:13 -07004767 mbmi->palette_mode_info.palette_size[0] = 0;
4768 mbmi->palette_mode_info.palette_size[1] = 0;
4769
Jingning Han3ee6db62015-08-05 19:00:31 -07004770 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
4771 int mode_excluded = 0;
4772 int64_t this_rd = INT64_MAX;
4773 int disable_skip = 0;
4774 int compmode_cost = 0;
4775 int rate2 = 0, rate_y = 0, rate_uv = 0;
4776 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4777 int skippable = 0;
4778 int i;
4779 int this_skip2 = 0;
4780 int64_t total_sse = INT_MAX;
4781 int early_term = 0;
4782
4783 ref_frame = vp10_ref_order[ref_index].ref_frame[0];
4784 second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
4785
4786 // Look at the reference frame of the best mode so far and set the
4787 // skip mask to look at a subset of the remaining modes.
4788 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
4789 if (ref_index == 3) {
4790 switch (best_mbmode.ref_frame[0]) {
4791 case INTRA_FRAME:
4792 break;
4793 case LAST_FRAME:
4794 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
4795 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4796 break;
4797 case GOLDEN_FRAME:
4798 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
4799 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4800 break;
4801 case ALTREF_FRAME:
4802 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
4803 break;
4804 case NONE:
4805 case MAX_REF_FRAMES:
4806 assert(0 && "Invalid Reference frame");
4807 break;
4808 }
4809 }
4810 }
4811
4812 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004813 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004814 continue;
4815
4816 // Test best rd so far against threshold for trying this mode.
4817 if (!internal_active_edge &&
4818 rd_less_than_thresh(best_rd,
4819 rd_opt->threshes[segment_id][bsize][ref_index],
4820 tile_data->thresh_freq_fact[bsize][ref_index]))
4821 continue;
4822
4823 comp_pred = second_ref_frame > INTRA_FRAME;
4824 if (comp_pred) {
4825 if (!cpi->allow_comp_inter_inter)
4826 continue;
4827 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4828 continue;
4829 // Do not allow compound prediction if the segment level reference frame
4830 // feature is in use as in this case there can only be one reference.
4831 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4832 continue;
4833
4834 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4835 best_mbmode.ref_frame[0] == INTRA_FRAME)
4836 continue;
4837 }
4838
4839 // TODO(jingning, jkoleszar): scaling reference frame not supported for
4840 // sub8x8 blocks.
4841 if (ref_frame > INTRA_FRAME &&
4842 vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
4843 continue;
4844
4845 if (second_ref_frame > INTRA_FRAME &&
4846 vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
4847 continue;
4848
4849 if (comp_pred)
4850 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4851 else if (ref_frame != INTRA_FRAME)
4852 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4853
4854 // If the segment reference frame feature is enabled....
4855 // then do nothing if the current ref frame is not allowed..
4856 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4857 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4858 continue;
4859 // Disable this drop out case if the ref frame
4860 // segment level feature is enabled for this segment. This is to
4861 // prevent the possibility that we end up unable to pick any mode.
4862 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4863 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4864 // unless ARNR filtering is enabled in which case we want
4865 // an unfiltered alternative. We allow near/nearest as well
4866 // because they may result in zero-zero MVs but be cheaper.
4867 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4868 continue;
4869 }
4870
4871 mbmi->tx_size = TX_4X4;
4872 mbmi->uv_mode = DC_PRED;
4873 mbmi->ref_frame[0] = ref_frame;
4874 mbmi->ref_frame[1] = second_ref_frame;
4875 // Evaluate all sub-pel filters irrespective of whether we can use
4876 // them for this frame.
4877 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4878 : cm->interp_filter;
4879 x->skip = 0;
4880 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4881
4882 // Select prediction reference frames.
4883 for (i = 0; i < MAX_MB_PLANE; i++) {
4884 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4885 if (comp_pred)
4886 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4887 }
4888
Jingning Han704985e2015-10-08 12:05:03 -07004889#if CONFIG_VAR_TX
4890 for (i = 0; i < 64; ++i)
4891 mbmi->inter_tx_size[i] = mbmi->tx_size;
4892#endif
4893
Jingning Han3ee6db62015-08-05 19:00:31 -07004894 if (ref_frame == INTRA_FRAME) {
4895 int rate;
4896 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4897 &distortion_y, best_rd) >= best_rd)
4898 continue;
4899 rate2 += rate;
4900 rate2 += intra_cost_penalty;
4901 distortion2 += distortion_y;
4902
4903 if (rate_uv_intra == INT_MAX) {
4904 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4,
4905 &rate_uv_intra,
4906 &rate_uv_tokenonly,
4907 &dist_uv, &skip_uv,
4908 &mode_uv);
4909 }
4910 rate2 += rate_uv_intra;
4911 rate_uv = rate_uv_tokenonly;
4912 distortion2 += dist_uv;
4913 distortion_uv = dist_uv;
4914 mbmi->uv_mode = mode_uv;
4915 } else {
4916 int rate;
4917 int64_t distortion;
4918 int64_t this_rd_thresh;
4919 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4920 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4921 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4922 int tmp_best_skippable = 0;
4923 int switchable_filter_index;
4924 int_mv *second_ref = comp_pred ?
4925 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
4926 b_mode_info tmp_best_bmodes[16];
4927 MB_MODE_INFO tmp_best_mbmode;
4928 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4929 int pred_exists = 0;
4930 int uv_skippable;
4931
4932 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4933 rd_opt->threshes[segment_id][bsize][THR_LAST] :
4934 rd_opt->threshes[segment_id][bsize][THR_ALTR];
4935 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4936 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4937 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4938 filter_cache[i] = INT64_MAX;
4939
4940 if (cm->interp_filter != BILINEAR) {
4941 tmp_best_filter = EIGHTTAP;
4942 if (x->source_variance < sf->disable_filter_search_var_thresh) {
4943 tmp_best_filter = EIGHTTAP;
4944 } else if (sf->adaptive_pred_interp_filter == 1 &&
4945 ctx->pred_interp_filter < SWITCHABLE) {
4946 tmp_best_filter = ctx->pred_interp_filter;
4947 } else if (sf->adaptive_pred_interp_filter == 2) {
4948 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4949 ctx->pred_interp_filter : 0;
4950 } else {
4951 for (switchable_filter_index = 0;
4952 switchable_filter_index < SWITCHABLE_FILTERS;
4953 ++switchable_filter_index) {
4954 int newbest, rs;
4955 int64_t rs_rd;
4956 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4957 mbmi->interp_filter = switchable_filter_index;
4958 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
4959 &mbmi_ext->ref_mvs[ref_frame][0],
4960 second_ref, best_yrd, &rate,
4961 &rate_y, &distortion,
4962 &skippable, &total_sse,
4963 (int) this_rd_thresh, seg_mvs,
4964 bsi, switchable_filter_index,
4965 mi_row, mi_col);
4966
4967 if (tmp_rd == INT64_MAX)
4968 continue;
4969 rs = vp10_get_switchable_rate(cpi, xd);
4970 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4971 filter_cache[switchable_filter_index] = tmp_rd;
4972 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004973 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004974 if (cm->interp_filter == SWITCHABLE)
4975 tmp_rd += rs_rd;
4976
James Zern5e16d392015-08-17 18:19:22 -07004977 mask_filter = VPXMAX(mask_filter, tmp_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004978
4979 newbest = (tmp_rd < tmp_best_rd);
4980 if (newbest) {
4981 tmp_best_filter = mbmi->interp_filter;
4982 tmp_best_rd = tmp_rd;
4983 }
4984 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4985 (mbmi->interp_filter == cm->interp_filter &&
4986 cm->interp_filter != SWITCHABLE)) {
4987 tmp_best_rdu = tmp_rd;
4988 tmp_best_rate = rate;
4989 tmp_best_ratey = rate_y;
4990 tmp_best_distortion = distortion;
4991 tmp_best_sse = total_sse;
4992 tmp_best_skippable = skippable;
4993 tmp_best_mbmode = *mbmi;
4994 for (i = 0; i < 4; i++) {
4995 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4996 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4997 }
4998 pred_exists = 1;
4999 if (switchable_filter_index == 0 &&
5000 sf->use_rd_breakout &&
5001 best_rd < INT64_MAX) {
5002 if (tmp_best_rdu / 2 > best_rd) {
5003 // skip searching the other filters if the first is
5004 // already substantially larger than the best so far
5005 tmp_best_filter = mbmi->interp_filter;
5006 tmp_best_rdu = INT64_MAX;
5007 break;
5008 }
5009 }
5010 }
5011 } // switchable_filter_index loop
5012 }
5013 }
5014
5015 if (tmp_best_rdu == INT64_MAX && pred_exists)
5016 continue;
5017
5018 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
5019 tmp_best_filter : cm->interp_filter);
5020 if (!pred_exists) {
5021 // Handles the special case when a filter that is not in the
5022 // switchable list (bilinear, 6-tap) is indicated at the frame level
5023 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
5024 &x->mbmi_ext->ref_mvs[ref_frame][0],
5025 second_ref, best_yrd, &rate, &rate_y,
5026 &distortion, &skippable, &total_sse,
5027 (int) this_rd_thresh, seg_mvs, bsi, 0,
5028 mi_row, mi_col);
5029 if (tmp_rd == INT64_MAX)
5030 continue;
5031 } else {
5032 total_sse = tmp_best_sse;
5033 rate = tmp_best_rate;
5034 rate_y = tmp_best_ratey;
5035 distortion = tmp_best_distortion;
5036 skippable = tmp_best_skippable;
5037 *mbmi = tmp_best_mbmode;
5038 for (i = 0; i < 4; i++)
5039 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
5040 }
5041
5042 rate2 += rate;
5043 distortion2 += distortion;
5044
5045 if (cm->interp_filter == SWITCHABLE)
5046 rate2 += vp10_get_switchable_rate(cpi, xd);
5047
5048 if (!mode_excluded)
5049 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
5050 : cm->reference_mode == COMPOUND_REFERENCE;
5051
5052 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
5053
5054 tmp_best_rdu = best_rd -
James Zern5e16d392015-08-17 18:19:22 -07005055 VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
5056 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
Jingning Han3ee6db62015-08-05 19:00:31 -07005057
5058 if (tmp_best_rdu > 0) {
5059 // If even the 'Y' rd value of split is higher than best so far
5060 // then dont bother looking at UV
5061 vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
5062 BLOCK_8X8);
5063 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
Jingning Hana8dad552015-10-08 16:46:10 -07005064#if CONFIG_VAR_TX
5065 if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5066 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5067 continue;
5068#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005069 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5070 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5071 continue;
Jingning Hana8dad552015-10-08 16:46:10 -07005072#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005073 rate2 += rate_uv;
5074 distortion2 += distortion_uv;
5075 skippable = skippable && uv_skippable;
5076 total_sse += uv_sse;
5077 }
5078 }
5079
5080 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5081 rate2 += compmode_cost;
5082
5083 // Estimate the reference frame signaling cost and add it
5084 // to the rolling cost variable.
5085 if (second_ref_frame > INTRA_FRAME) {
5086 rate2 += ref_costs_comp[ref_frame];
5087 } else {
5088 rate2 += ref_costs_single[ref_frame];
5089 }
5090
5091 if (!disable_skip) {
5092 // Skip is never coded at the segment level for sub8x8 blocks and instead
5093 // always coded in the bitstream at the mode info level.
5094
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04005095 if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07005096 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
5097 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
5098 // Add in the cost of the no skip flag.
5099 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5100 } else {
5101 // FIXME(rbultje) make this work for splitmv also
5102 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
5103 distortion2 = total_sse;
5104 assert(total_sse >= 0);
5105 rate2 -= (rate_y + rate_uv);
5106 rate_y = 0;
5107 rate_uv = 0;
5108 this_skip2 = 1;
5109 }
5110 } else {
5111 // Add in the cost of the no skip flag.
5112 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5113 }
5114
5115 // Calculate the final RD estimate for this mode.
5116 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5117 }
5118
5119 if (!disable_skip && ref_frame == INTRA_FRAME) {
5120 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07005121 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005122 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07005123 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005124 }
5125
5126 // Did this mode help.. i.e. is it the new best mode
5127 if (this_rd < best_rd || x->skip) {
5128 if (!mode_excluded) {
5129 int max_plane = MAX_MB_PLANE;
5130 // Note index of best mode so far
5131 best_ref_index = ref_index;
5132
5133 if (ref_frame == INTRA_FRAME) {
5134 /* required for left and above block mv */
5135 mbmi->mv[0].as_int = 0;
5136 max_plane = 1;
5137 }
5138
5139 rd_cost->rate = rate2;
5140 rd_cost->dist = distortion2;
5141 rd_cost->rdcost = this_rd;
5142 best_rd = this_rd;
5143 best_yrd = best_rd -
5144 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
5145 best_mbmode = *mbmi;
5146 best_skip2 = this_skip2;
5147 if (!x->select_tx_size)
5148 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005149
5150#if CONFIG_VAR_TX
5151 for (i = 0; i < MAX_MB_PLANE; ++i)
5152 memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
5153#else
Jingning Han3ee6db62015-08-05 19:00:31 -07005154 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
hui su088b05f2015-08-12 10:41:51 -07005155 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Hanbfeac5e2015-10-15 23:11:30 -07005156#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005157
5158 for (i = 0; i < 4; i++)
5159 best_bmodes[i] = xd->mi[0]->bmi[i];
5160
5161 // TODO(debargha): enhance this test with a better distortion prediction
5162 // based on qp, activity mask and history
5163 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
5164 (ref_index > MIN_EARLY_TERM_INDEX)) {
5165 int qstep = xd->plane[0].dequant[1];
5166 // TODO(debargha): Enhance this by specializing for each mode_index
5167 int scale = 4;
5168#if CONFIG_VP9_HIGHBITDEPTH
5169 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5170 qstep >>= (xd->bd - 8);
5171 }
5172#endif // CONFIG_VP9_HIGHBITDEPTH
5173 if (x->source_variance < UINT_MAX) {
5174 const int var_adjust = (x->source_variance < 16);
5175 scale -= var_adjust;
5176 }
5177 if (ref_frame > INTRA_FRAME &&
5178 distortion2 * scale < qstep * qstep) {
5179 early_term = 1;
5180 }
5181 }
5182 }
5183 }
5184
5185 /* keep record of best compound/single-only prediction */
5186 if (!disable_skip && ref_frame != INTRA_FRAME) {
5187 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5188
5189 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5190 single_rate = rate2 - compmode_cost;
5191 hybrid_rate = rate2;
5192 } else {
5193 single_rate = rate2;
5194 hybrid_rate = rate2 + compmode_cost;
5195 }
5196
5197 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
5198 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
5199
5200 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
5201 best_pred_rd[SINGLE_REFERENCE] = single_rd;
5202 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
5203 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5204
5205 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
5206 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5207 }
5208
5209 /* keep record of best filter type */
5210 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
5211 cm->interp_filter != BILINEAR) {
5212 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5213 SWITCHABLE_FILTERS : cm->interp_filter];
5214 int64_t adj_rd;
5215 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5216 if (ref == INT64_MAX)
5217 adj_rd = 0;
5218 else if (filter_cache[i] == INT64_MAX)
5219 // when early termination is triggered, the encoder does not have
5220 // access to the rate-distortion cost. it only knows that the cost
5221 // should be above the maximum valid value. hence it takes the known
5222 // maximum plus an arbitrary constant as the rate-distortion cost.
5223 adj_rd = mask_filter - ref + 10;
5224 else
5225 adj_rd = filter_cache[i] - ref;
5226
5227 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005228 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005229 }
5230 }
5231
5232 if (early_term)
5233 break;
5234
5235 if (x->skip && !comp_pred)
5236 break;
5237 }
5238
5239 if (best_rd >= best_rd_so_far) {
5240 rd_cost->rate = INT_MAX;
5241 rd_cost->rdcost = INT64_MAX;
5242 return;
5243 }
5244
5245 // If we used an estimate for the uv intra rd in the loop above...
5246 if (sf->use_uv_intra_rd_estimate) {
5247 // Do Intra UV best rd mode selection if best mode choice above was intra.
5248 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5249 *mbmi = best_mbmode;
5250 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
5251 &rate_uv_tokenonly,
5252 &dist_uv,
5253 &skip_uv,
5254 BLOCK_8X8, TX_4X4);
5255 }
5256 }
5257
5258 if (best_rd == INT64_MAX) {
5259 rd_cost->rate = INT_MAX;
5260 rd_cost->dist = INT64_MAX;
5261 rd_cost->rdcost = INT64_MAX;
5262 return;
5263 }
5264
5265 assert((cm->interp_filter == SWITCHABLE) ||
5266 (cm->interp_filter == best_mbmode.interp_filter) ||
5267 !is_inter_block(&best_mbmode));
5268
5269 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5270 sf->adaptive_rd_thresh, bsize, best_ref_index);
5271
5272 // macroblock modes
5273 *mbmi = best_mbmode;
5274 x->skip |= best_skip2;
5275 if (!is_inter_block(&best_mbmode)) {
5276 for (i = 0; i < 4; i++)
5277 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
5278 } else {
5279 for (i = 0; i < 4; ++i)
5280 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
5281
5282 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
5283 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
5284 }
5285
5286 for (i = 0; i < REFERENCE_MODES; ++i) {
5287 if (best_pred_rd[i] == INT64_MAX)
5288 best_pred_diff[i] = INT_MIN;
5289 else
5290 best_pred_diff[i] = best_rd - best_pred_rd[i];
5291 }
5292
5293 if (!x->skip) {
5294 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5295 if (best_filter_rd[i] == INT64_MAX)
5296 best_filter_diff[i] = 0;
5297 else
5298 best_filter_diff[i] = best_rd - best_filter_rd[i];
5299 }
5300 if (cm->interp_filter == SWITCHABLE)
5301 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5302 } else {
5303 vp10_zero(best_filter_diff);
5304 }
5305
5306 store_coding_context(x, ctx, best_ref_index,
5307 best_pred_diff, best_filter_diff, 0);
5308}