blob: 223c67ae28e84b40e295633cbfe0d821f2f281f0 [file] [log] [blame]
Jingning Han3ee6db62015-08-05 19:00:31 -07001/*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <assert.h>
12#include <math.h>
13
14#include "./vp10_rtcd.h"
15#include "./vpx_dsp_rtcd.h"
16
Johannc5f11912015-08-31 14:36:35 -070017#include "vpx_dsp/vpx_dsp_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070018#include "vpx_mem/vpx_mem.h"
19#include "vpx_ports/mem.h"
Jingning Han3acfe462015-08-12 09:20:31 -070020#include "vpx_ports/system_state.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070021
Jingning Han54d66ef2015-08-06 21:14:07 -070022#include "vp10/common/common.h"
23#include "vp10/common/entropy.h"
24#include "vp10/common/entropymode.h"
25#include "vp10/common/idct.h"
26#include "vp10/common/mvref_common.h"
27#include "vp10/common/pred_common.h"
28#include "vp10/common/quant_common.h"
29#include "vp10/common/reconinter.h"
30#include "vp10/common/reconintra.h"
31#include "vp10/common/scan.h"
32#include "vp10/common/seg_common.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070033
Jingning Han54d66ef2015-08-06 21:14:07 -070034#include "vp10/encoder/cost.h"
35#include "vp10/encoder/encodemb.h"
36#include "vp10/encoder/encodemv.h"
37#include "vp10/encoder/encoder.h"
38#include "vp10/encoder/mcomp.h"
hui su5d011cb2015-09-15 12:44:13 -070039#include "vp10/encoder/palette.h"
Jingning Han54d66ef2015-08-06 21:14:07 -070040#include "vp10/encoder/quantize.h"
41#include "vp10/encoder/ratectrl.h"
42#include "vp10/encoder/rd.h"
43#include "vp10/encoder/rdopt.h"
44#include "vp10/encoder/aq_variance.h"
Jingning Han3ee6db62015-08-05 19:00:31 -070045
46#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
47 (1 << INTRA_FRAME))
48#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
49 (1 << INTRA_FRAME))
50#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
51 (1 << INTRA_FRAME))
52
53#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
54
55#define MIN_EARLY_TERM_INDEX 3
56#define NEW_MV_DISCOUNT_FACTOR 8
57
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070058#if CONFIG_EXT_TX
Debargha Mukherjeeb8bc0262015-09-11 08:32:56 -070059const double ext_tx_th = 0.98;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -070060#endif
61
Jingning Han3ee6db62015-08-05 19:00:31 -070062typedef struct {
63 PREDICTION_MODE mode;
64 MV_REFERENCE_FRAME ref_frame[2];
65} MODE_DEFINITION;
66
67typedef struct {
68 MV_REFERENCE_FRAME ref_frame[2];
69} REF_DEFINITION;
70
71struct rdcost_block_args {
Jingning Han71c15602015-10-13 12:40:39 -070072#if CONFIG_VAR_TX
73 const VP10_COMP *cpi;
74#endif
Jingning Han3ee6db62015-08-05 19:00:31 -070075 MACROBLOCK *x;
76 ENTROPY_CONTEXT t_above[16];
77 ENTROPY_CONTEXT t_left[16];
78 int this_rate;
79 int64_t this_dist;
80 int64_t this_sse;
81 int64_t this_rd;
82 int64_t best_rd;
83 int exit_early;
84 int use_fast_coef_costing;
85 const scan_order *so;
86 uint8_t skippable;
87};
88
89#define LAST_NEW_MV_INDEX 6
90static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
91 {NEARESTMV, {LAST_FRAME, NONE}},
92 {NEARESTMV, {ALTREF_FRAME, NONE}},
93 {NEARESTMV, {GOLDEN_FRAME, NONE}},
94
95 {DC_PRED, {INTRA_FRAME, NONE}},
96
97 {NEWMV, {LAST_FRAME, NONE}},
98 {NEWMV, {ALTREF_FRAME, NONE}},
99 {NEWMV, {GOLDEN_FRAME, NONE}},
100
101 {NEARMV, {LAST_FRAME, NONE}},
102 {NEARMV, {ALTREF_FRAME, NONE}},
103 {NEARMV, {GOLDEN_FRAME, NONE}},
104
105 {ZEROMV, {LAST_FRAME, NONE}},
106 {ZEROMV, {GOLDEN_FRAME, NONE}},
107 {ZEROMV, {ALTREF_FRAME, NONE}},
108
109 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
110 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
111
112 {TM_PRED, {INTRA_FRAME, NONE}},
113
114 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
115 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
116 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
117 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
118
119 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
120 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
121
122 {H_PRED, {INTRA_FRAME, NONE}},
123 {V_PRED, {INTRA_FRAME, NONE}},
124 {D135_PRED, {INTRA_FRAME, NONE}},
125 {D207_PRED, {INTRA_FRAME, NONE}},
126 {D153_PRED, {INTRA_FRAME, NONE}},
127 {D63_PRED, {INTRA_FRAME, NONE}},
128 {D117_PRED, {INTRA_FRAME, NONE}},
129 {D45_PRED, {INTRA_FRAME, NONE}},
130};
131
132static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
133 {{LAST_FRAME, NONE}},
134 {{GOLDEN_FRAME, NONE}},
135 {{ALTREF_FRAME, NONE}},
136 {{LAST_FRAME, ALTREF_FRAME}},
137 {{GOLDEN_FRAME, ALTREF_FRAME}},
138 {{INTRA_FRAME, NONE}},
139};
140
hui su5d011cb2015-09-15 12:44:13 -0700141static INLINE int write_uniform_cost(int n, int v) {
142 int l = get_unsigned_bits(n), m = (1 << l) - n;
143 if (l == 0)
144 return 0;
145 if (v < m)
146 return (l - 1) * vp10_cost_bit(128, 0);
147 else
148 return l * vp10_cost_bit(128, 0);
149}
150
Jingning Han3ee6db62015-08-05 19:00:31 -0700151static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
152 int m, int n, int min_plane, int max_plane) {
153 int i;
154
155 for (i = min_plane; i < max_plane; ++i) {
156 struct macroblock_plane *const p = &x->plane[i];
157 struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
158
159 p->coeff = ctx->coeff_pbuf[i][m];
160 p->qcoeff = ctx->qcoeff_pbuf[i][m];
161 pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
162 p->eobs = ctx->eobs_pbuf[i][m];
163
164 ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
165 ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
166 ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
167 ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
168
169 ctx->coeff_pbuf[i][n] = p->coeff;
170 ctx->qcoeff_pbuf[i][n] = p->qcoeff;
171 ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
172 ctx->eobs_pbuf[i][n] = p->eobs;
173 }
174}
175
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700176static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700177 MACROBLOCK *x, MACROBLOCKD *xd,
178 int *out_rate_sum, int64_t *out_dist_sum,
179 int *skip_txfm_sb, int64_t *skip_sse_sb) {
180 // Note our transform coeffs are 8 times an orthogonal transform.
181 // Hence quantizer step is also 8 times. To get effective quantizer
182 // we need to divide by 8 before sending to modeling function.
183 int i;
184 int64_t rate_sum = 0;
185 int64_t dist_sum = 0;
186 const int ref = xd->mi[0]->mbmi.ref_frame[0];
187 unsigned int sse;
188 unsigned int var = 0;
189 unsigned int sum_sse = 0;
190 int64_t total_sse = 0;
191 int skip_flag = 1;
192 const int shift = 6;
193 int rate;
194 int64_t dist;
195 const int dequant_shift =
196#if CONFIG_VP9_HIGHBITDEPTH
197 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
198 xd->bd - 5 :
199#endif // CONFIG_VP9_HIGHBITDEPTH
200 3;
201
202 x->pred_sse[ref] = 0;
203
204 for (i = 0; i < MAX_MB_PLANE; ++i) {
205 struct macroblock_plane *const p = &x->plane[i];
206 struct macroblockd_plane *const pd = &xd->plane[i];
207 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
208 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
209 const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
210 const int64_t dc_thr = p->quant_thred[0] >> shift;
211 const int64_t ac_thr = p->quant_thred[1] >> shift;
212 // The low thresholds are used to measure if the prediction errors are
213 // low enough so that we can skip the mode search.
James Zern5e16d392015-08-17 18:19:22 -0700214 const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
215 const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700216 int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
217 int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
218 int idx, idy;
219 int lw = b_width_log2_lookup[unit_size] + 2;
220 int lh = b_height_log2_lookup[unit_size] + 2;
221
222 sum_sse = 0;
223
224 for (idy = 0; idy < bh; ++idy) {
225 for (idx = 0; idx < bw; ++idx) {
226 uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
227 uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
228 int block_idx = (idy << 1) + idx;
229 int low_err_skip = 0;
230
231 var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
232 dst, pd->dst.stride, &sse);
233 x->bsse[(i << 2) + block_idx] = sse;
234 sum_sse += sse;
235
236 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
237 if (!x->select_tx_size) {
238 // Check if all ac coefficients can be quantized to zero.
239 if (var < ac_thr || var == 0) {
240 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
241
242 // Check if dc coefficient can be quantized to zero.
243 if (sse - var < dc_thr || sse == var) {
244 x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
245
246 if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
247 low_err_skip = 1;
248 }
249 }
250 }
251
252 if (skip_flag && !low_err_skip)
253 skip_flag = 0;
254
255 if (i == 0)
256 x->pred_sse[ref] += sse;
257 }
258 }
259
260 total_sse += sum_sse;
261
262 // Fast approximate the modelling function.
263 if (cpi->sf.simple_model_rd_from_var) {
264 int64_t rate;
265 const int64_t square_error = sum_sse;
266 int quantizer = (pd->dequant[1] >> dequant_shift);
267
268 if (quantizer < 120)
269 rate = (square_error * (280 - quantizer)) >> 8;
270 else
271 rate = 0;
272 dist = (square_error * quantizer) >> 8;
273 rate_sum += rate;
274 dist_sum += dist;
275 } else {
276 vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
277 pd->dequant[1] >> dequant_shift,
278 &rate, &dist);
279 rate_sum += rate;
280 dist_sum += dist;
281 }
282 }
283
284 *skip_txfm_sb = skip_flag;
285 *skip_sse_sb = total_sse << 4;
286 *out_rate_sum = (int)rate_sum;
287 *out_dist_sum = dist_sum << 4;
288}
289
290int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291 intptr_t block_size, int64_t *ssz) {
292 int i;
293 int64_t error = 0, sqcoeff = 0;
294
295 for (i = 0; i < block_size; i++) {
296 const int diff = coeff[i] - dqcoeff[i];
297 error += diff * diff;
298 sqcoeff += coeff[i] * coeff[i];
299 }
300
301 *ssz = sqcoeff;
302 return error;
303}
304
305int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
306 int block_size) {
307 int i;
308 int64_t error = 0;
309
310 for (i = 0; i < block_size; i++) {
311 const int diff = coeff[i] - dqcoeff[i];
312 error += diff * diff;
313 }
314
315 return error;
316}
317
318#if CONFIG_VP9_HIGHBITDEPTH
319int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
320 const tran_low_t *dqcoeff,
321 intptr_t block_size,
322 int64_t *ssz, int bd) {
323 int i;
324 int64_t error = 0, sqcoeff = 0;
325 int shift = 2 * (bd - 8);
326 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
327
328 for (i = 0; i < block_size; i++) {
329 const int64_t diff = coeff[i] - dqcoeff[i];
330 error += diff * diff;
331 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
332 }
333 assert(error >= 0 && sqcoeff >= 0);
334 error = (error + rounding) >> shift;
335 sqcoeff = (sqcoeff + rounding) >> shift;
336
337 *ssz = sqcoeff;
338 return error;
339}
340#endif // CONFIG_VP9_HIGHBITDEPTH
341
342/* The trailing '0' is a terminator which is used inside cost_coeffs() to
343 * decide whether to include cost of a trailing EOB node or not (i.e. we
344 * can skip this if the last coefficient in this transform block, e.g. the
345 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
346 * were non-zero). */
347static const int16_t band_counts[TX_SIZES][8] = {
348 { 1, 2, 3, 4, 3, 16 - 13, 0 },
349 { 1, 2, 3, 4, 11, 64 - 21, 0 },
350 { 1, 2, 3, 4, 11, 256 - 21, 0 },
351 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
352};
353static int cost_coeffs(MACROBLOCK *x,
354 int plane, int block,
Jingning Han2cdc1272015-10-09 09:57:42 -0700355#if CONFIG_VAR_TX
356 int coeff_ctx,
357#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700358 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
Jingning Han2cdc1272015-10-09 09:57:42 -0700359#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700360 TX_SIZE tx_size,
361 const int16_t *scan, const int16_t *nb,
362 int use_fast_coef_costing) {
363 MACROBLOCKD *const xd = &x->e_mbd;
364 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
365 const struct macroblock_plane *p = &x->plane[plane];
366 const struct macroblockd_plane *pd = &xd->plane[plane];
367 const PLANE_TYPE type = pd->plane_type;
368 const int16_t *band_count = &band_counts[tx_size][1];
369 const int eob = p->eobs[block];
370 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
371 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
372 x->token_costs[tx_size][type][is_inter_block(mbmi)];
373 uint8_t token_cache[32 * 32];
Jingning Han2cdc1272015-10-09 09:57:42 -0700374#if CONFIG_VAR_TX
375 int pt = coeff_ctx;
376#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700377 int pt = combine_entropy_contexts(*A, *L);
Jingning Han2cdc1272015-10-09 09:57:42 -0700378#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700379 int c, cost;
380#if CONFIG_VP9_HIGHBITDEPTH
381 const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
382#else
383 const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
384#endif
385
Jingning Han2cdc1272015-10-09 09:57:42 -0700386#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700387 // Check for consistency of tx_size with mode info
388 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
389 : get_uv_tx_size(mbmi, pd) == tx_size);
Jingning Han2cdc1272015-10-09 09:57:42 -0700390#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700391
392 if (eob == 0) {
393 // single eob token
394 cost = token_costs[0][0][pt][EOB_TOKEN];
395 c = 0;
396 } else {
397 int band_left = *band_count++;
398
399 // dc token
400 int v = qcoeff[0];
401 int16_t prev_t;
402 EXTRABIT e;
403 vp10_get_token_extra(v, &prev_t, &e);
404 cost = (*token_costs)[0][pt][prev_t] +
405 vp10_get_cost(prev_t, e, cat6_high_cost);
406
407 token_cache[0] = vp10_pt_energy_class[prev_t];
408 ++token_costs;
409
410 // ac tokens
411 for (c = 1; c < eob; c++) {
412 const int rc = scan[c];
413 int16_t t;
414
415 v = qcoeff[rc];
416 vp10_get_token_extra(v, &t, &e);
417 if (use_fast_coef_costing) {
418 cost += (*token_costs)[!prev_t][!prev_t][t] +
419 vp10_get_cost(t, e, cat6_high_cost);
420 } else {
421 pt = get_coef_context(nb, token_cache, c);
422 cost += (*token_costs)[!prev_t][pt][t] +
423 vp10_get_cost(t, e, cat6_high_cost);
424 token_cache[rc] = vp10_pt_energy_class[t];
425 }
426 prev_t = t;
427 if (!--band_left) {
428 band_left = *band_count++;
429 ++token_costs;
430 }
431 }
432
433 // eob token
434 if (band_left) {
435 if (use_fast_coef_costing) {
436 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
437 } else {
438 pt = get_coef_context(nb, token_cache, c);
439 cost += (*token_costs)[0][pt][EOB_TOKEN];
440 }
441 }
442 }
443
Jingning Han2cdc1272015-10-09 09:57:42 -0700444#if !CONFIG_VAR_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700445 // is eob first coefficient;
446 *A = *L = (c > 0);
Jingning Han2cdc1272015-10-09 09:57:42 -0700447#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700448
449 return cost;
450}
451
452static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
453 int64_t *out_dist, int64_t *out_sse) {
454 const int ss_txfrm_size = tx_size << 1;
455 MACROBLOCKD* const xd = &x->e_mbd;
456 const struct macroblock_plane *const p = &x->plane[plane];
457 const struct macroblockd_plane *const pd = &xd->plane[plane];
458 int64_t this_sse;
459 int shift = tx_size == TX_32X32 ? 0 : 2;
460 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
461 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
462#if CONFIG_VP9_HIGHBITDEPTH
463 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
464 *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
465 &this_sse, bd) >> shift;
466#else
467 *out_dist = vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
468 &this_sse) >> shift;
469#endif // CONFIG_VP9_HIGHBITDEPTH
470 *out_sse = this_sse >> shift;
Jingning Han3ee6db62015-08-05 19:00:31 -0700471}
472
Jingning Hanebc48ef2015-10-07 11:43:48 -0700473static int rate_block(int plane, int block, int blk_row, int blk_col,
Jingning Han3ee6db62015-08-05 19:00:31 -0700474 TX_SIZE tx_size, struct rdcost_block_args* args) {
Jingning Han2cdc1272015-10-09 09:57:42 -0700475#if CONFIG_VAR_TX
476 int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
477 *(args->t_left + blk_row));
478 int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx,
479 tx_size, args->so->scan, args->so->neighbors,
480 args->use_fast_coef_costing);
481 const struct macroblock_plane *p = &args->x->plane[plane];
482 *(args->t_above + blk_col) = !(p->eobs[block] == 0);
483 *(args->t_left + blk_row) = !(p->eobs[block] == 0);
484 return coeff_cost;
485#else
486 return cost_coeffs(args->x, plane, block,
487 args->t_above + blk_col,
488 args->t_left + blk_row,
489 tx_size, args->so->scan, args->so->neighbors,
Jingning Han3ee6db62015-08-05 19:00:31 -0700490 args->use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -0700491#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700492}
493
Jingning Hanebc48ef2015-10-07 11:43:48 -0700494static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
495 BLOCK_SIZE plane_bsize,
Jingning Han3ee6db62015-08-05 19:00:31 -0700496 TX_SIZE tx_size, void *arg) {
497 struct rdcost_block_args *args = arg;
498 MACROBLOCK *const x = args->x;
499 MACROBLOCKD *const xd = &x->e_mbd;
500 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
501 int64_t rd1, rd2, rd;
502 int rate;
503 int64_t dist;
504 int64_t sse;
505
506 if (args->exit_early)
507 return;
508
509 if (!is_inter_block(mbmi)) {
Jingning Han71c15602015-10-13 12:40:39 -0700510#if CONFIG_VAR_TX
511 struct encode_b_args arg = {x, NULL, &mbmi->skip};
512 uint8_t *dst, *src;
513 int src_stride = x->plane[plane].src.stride;
514 int dst_stride = xd->plane[plane].dst.stride;
515 unsigned int tmp_sse;
516 PREDICTION_MODE mode = (plane == 0) ?
517 get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
518
519#if CONFIG_VP9_HIGHBITDEPTH
520 vp10_encode_block_intra(plane, block, blk_row, blk_col,
521 plane_bsize, tx_size, &arg);
522 dist_block(x, plane, block, tx_size, &dist, &sse);
523#else
524 src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)];
525 dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)];
526 vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
527 b_height_log2_lookup[plane_bsize],
528 tx_size, mode, dst, dst_stride,
529 dst, dst_stride, blk_col, blk_row, plane);
530 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
531 dst, dst_stride, &tmp_sse);
532 sse = (int64_t)tmp_sse * 16;
533 vp10_encode_block_intra(plane, block, blk_row, blk_col,
534 plane_bsize, tx_size, &arg);
535 args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride,
536 dst, dst_stride, &tmp_sse);
537 dist = (int64_t)tmp_sse * 16;
538#endif // CONFIG_VP9_HIGHBITDEPTH
539#else
Jingning Han3ee6db62015-08-05 19:00:31 -0700540 struct encode_b_args arg = {x, NULL, &mbmi->skip};
Jingning Hanebc48ef2015-10-07 11:43:48 -0700541 vp10_encode_block_intra(plane, block, blk_row, blk_col,
542 plane_bsize, tx_size, &arg);
Jingning Han3ee6db62015-08-05 19:00:31 -0700543 dist_block(x, plane, block, tx_size, &dist, &sse);
Jingning Han71c15602015-10-13 12:40:39 -0700544#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700545 } else if (max_txsize_lookup[plane_bsize] == tx_size) {
546 if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
547 SKIP_TXFM_NONE) {
548 // full forward transform and quantization
Jingning Hancaeb10b2015-10-22 17:25:00 -0700549 vp10_xform_quant(x, plane, block, blk_row, blk_col,
550 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700551 dist_block(x, plane, block, tx_size, &dist, &sse);
552 } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
553 SKIP_TXFM_AC_ONLY) {
554 // compute DC coefficient
555 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
556 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Jingning Hanebc48ef2015-10-07 11:43:48 -0700557 vp10_xform_quant_dc(x, plane, block, blk_row, blk_col,
558 plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700559 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
560 dist = sse;
561 if (x->plane[plane].eobs[block]) {
562 const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
563 const int64_t resd_sse = coeff[0] - dqcoeff[0];
564 int64_t dc_correct = orig_sse - resd_sse * resd_sse;
565#if CONFIG_VP9_HIGHBITDEPTH
566 dc_correct >>= ((xd->bd - 8) * 2);
567#endif
568 if (tx_size != TX_32X32)
569 dc_correct >>= 2;
570
James Zern5e16d392015-08-17 18:19:22 -0700571 dist = VPXMAX(0, sse - dc_correct);
Jingning Han3ee6db62015-08-05 19:00:31 -0700572 }
573 } else {
574 // SKIP_TXFM_AC_DC
575 // skip forward transform
576 x->plane[plane].eobs[block] = 0;
577 sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
578 dist = sse;
579 }
580 } else {
581 // full forward transform and quantization
Jingning Hanebc48ef2015-10-07 11:43:48 -0700582 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han3ee6db62015-08-05 19:00:31 -0700583 dist_block(x, plane, block, tx_size, &dist, &sse);
584 }
585
586 rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
587 if (args->this_rd + rd > args->best_rd) {
588 args->exit_early = 1;
589 return;
590 }
591
Jingning Hanebc48ef2015-10-07 11:43:48 -0700592 rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700593 rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
595
596 // TODO(jingning): temporarily enabled only for luma component
James Zern5e16d392015-08-17 18:19:22 -0700597 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -0700598 if (plane == 0)
599 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400600 (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700601
602 args->this_rate += rate;
603 args->this_dist += dist;
604 args->this_sse += sse;
605 args->this_rd += rd;
606
607 if (args->this_rd > args->best_rd) {
608 args->exit_early = 1;
609 return;
610 }
611
612 args->skippable &= !x->plane[plane].eobs[block];
613}
614
615static void txfm_rd_in_plane(MACROBLOCK *x,
Jingning Han71c15602015-10-13 12:40:39 -0700616#if CONFIG_VAR_TX
617 const VP10_COMP *cpi,
618#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700619 int *rate, int64_t *distortion,
620 int *skippable, int64_t *sse,
621 int64_t ref_best_rd, int plane,
622 BLOCK_SIZE bsize, TX_SIZE tx_size,
623 int use_fast_coef_casting) {
624 MACROBLOCKD *const xd = &x->e_mbd;
625 const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su5eed74e2015-08-18 16:57:07 -0700626 TX_TYPE tx_type;
Jingning Han3ee6db62015-08-05 19:00:31 -0700627 struct rdcost_block_args args;
628 vp10_zero(args);
629 args.x = x;
Jingning Han71c15602015-10-13 12:40:39 -0700630#if CONFIG_VAR_TX
631 args.cpi = cpi;
632#endif
Jingning Han3ee6db62015-08-05 19:00:31 -0700633 args.best_rd = ref_best_rd;
634 args.use_fast_coef_costing = use_fast_coef_casting;
635 args.skippable = 1;
636
637 if (plane == 0)
638 xd->mi[0]->mbmi.tx_size = tx_size;
639
640 vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
641
hui sub3cc3a02015-08-24 14:37:54 -0700642 tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700643 args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
Jingning Han3ee6db62015-08-05 19:00:31 -0700644
645 vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700646 block_rd_txfm, &args);
Jingning Han3ee6db62015-08-05 19:00:31 -0700647 if (args.exit_early) {
648 *rate = INT_MAX;
649 *distortion = INT64_MAX;
650 *sse = INT64_MAX;
651 *skippable = 0;
652 } else {
653 *distortion = args.this_dist;
654 *rate = args.this_rate;
655 *sse = args.this_sse;
656 *skippable = args.skippable;
657 }
658}
659
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700660static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700661 int *rate, int64_t *distortion,
662 int *skip, int64_t *sse,
663 int64_t ref_best_rd,
664 BLOCK_SIZE bs) {
665 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700666 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700667 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
668 MACROBLOCKD *const xd = &x->e_mbd;
669 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
hui su6c81e372015-09-29 12:09:15 -0700670#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700671 int tx_type, best_tx_type = DCT_DCT;
hui su6c81e372015-09-29 12:09:15 -0700672 int r, s;
673 int64_t d, psse, this_rd, best_rd = INT64_MAX;
674 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
675 int s0 = vp10_cost_bit(skip_prob, 0);
676 int s1 = vp10_cost_bit(skip_prob, 1);
677#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700678
James Zern5e16d392015-08-17 18:19:22 -0700679 mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700680#if CONFIG_EXT_TX
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700681 if (is_inter_block(mbmi) && bs >= BLOCK_8X8 &&
682 !xd->lossless[mbmi->segment_id]) {
hui su4f16f112015-10-02 10:45:27 -0700683 for (tx_type = DCT_DCT; tx_type < TX_TYPES - 1; ++tx_type) {
684 if (mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
685 best_tx_type == DCT_DCT) {
686 tx_type = IDTX - 1;
hui su3fa01292015-09-28 18:38:00 -0700687 continue;
688 }
hui su4f16f112015-10-02 10:45:27 -0700689 if (tx_type >= GET_TX_TYPES(mbmi->tx_size))
hui su6c81e372015-09-29 12:09:15 -0700690 continue;
691
hui su4f16f112015-10-02 10:45:27 -0700692 mbmi->tx_type = tx_type;
Jingning Han71c15602015-10-13 12:40:39 -0700693 txfm_rd_in_plane(x,
694#if CONFIG_VAR_TX
695 cpi,
696#endif
697 &r, &d, &s,
hui su6c81e372015-09-29 12:09:15 -0700698 &psse, ref_best_rd, 0, bs, mbmi->tx_size,
699 cpi->sf.use_fast_coef_costing);
700
701 if (r == INT_MAX)
702 continue;
hui su3fa01292015-09-28 18:38:00 -0700703 if (mbmi->tx_size <= TX_16X16) {
704 if (is_inter_block(mbmi))
hui su4f16f112015-10-02 10:45:27 -0700705 r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700706 else
hui su4f16f112015-10-02 10:45:27 -0700707 r += cpi->intra_tx_type_costs[mbmi->tx_size]
708 [mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700709 }
hui su6c81e372015-09-29 12:09:15 -0700710
711 if (s)
712 this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
713 else
714 this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700715 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
hui su6c81e372015-09-29 12:09:15 -0700716 this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
717
hui su4f16f112015-10-02 10:45:27 -0700718 if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
hui su6c81e372015-09-29 12:09:15 -0700719 best_rd = this_rd;
hui su4f16f112015-10-02 10:45:27 -0700720 best_tx_type = mbmi->tx_type;
hui su6c81e372015-09-29 12:09:15 -0700721 }
722 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700723 }
hui su6c81e372015-09-29 12:09:15 -0700724
hui su4f16f112015-10-02 10:45:27 -0700725 mbmi->tx_type = best_tx_type;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700726#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700727
Jingning Han71c15602015-10-13 12:40:39 -0700728 txfm_rd_in_plane(x,
729#if CONFIG_VAR_TX
730 cpi,
731#endif
732 rate, distortion, skip,
Jingning Han3ee6db62015-08-05 19:00:31 -0700733 sse, ref_best_rd, 0, bs,
734 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700735
736#if CONFIG_EXT_TX
hui su3fa01292015-09-28 18:38:00 -0700737 if (bs >= BLOCK_8X8 && mbmi->tx_size <= TX_16X16 &&
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700738 !xd->lossless[mbmi->segment_id] && *rate != INT_MAX) {
hui su3fa01292015-09-28 18:38:00 -0700739 if (is_inter_block(mbmi))
hui su4f16f112015-10-02 10:45:27 -0700740 *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700741 else
hui su4f16f112015-10-02 10:45:27 -0700742 *rate += cpi->intra_tx_type_costs[mbmi->tx_size]
743 [mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700744 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700745#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700746}
747
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400748static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
749 int *rate, int64_t *distortion,
750 int *skip, int64_t *sse,
751 int64_t ref_best_rd,
752 BLOCK_SIZE bs) {
753 MACROBLOCKD *const xd = &x->e_mbd;
754 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
755
756 mbmi->tx_size = TX_4X4;
757
Jingning Han71c15602015-10-13 12:40:39 -0700758 txfm_rd_in_plane(x,
759#if CONFIG_VAR_TX
760 cpi,
761#endif
762 rate, distortion, skip,
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400763 sse, ref_best_rd, 0, bs,
764 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
765}
766
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700767static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -0700768 int *rate,
769 int64_t *distortion,
770 int *skip,
771 int64_t *psse,
772 int64_t ref_best_rd,
773 BLOCK_SIZE bs) {
774 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xufc7cbd12015-08-13 09:36:53 -0700775 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -0700776 MACROBLOCKD *const xd = &x->e_mbd;
777 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
778 vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
hui su38debe52015-09-20 19:18:00 -0700779 int r, s;
780 int64_t d, sse;
781 int64_t rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700782 int n, m;
783 int s0, s1;
hui su38debe52015-09-20 19:18:00 -0700784 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Jingning Han3ee6db62015-08-05 19:00:31 -0700785 TX_SIZE best_tx = max_tx_size;
786 int start_tx, end_tx;
hui su38debe52015-09-20 19:18:00 -0700787 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
hui su07154b02015-09-22 10:34:18 -0700788#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700789 int tx_type, best_tx_type = DCT_DCT;
hui su07154b02015-09-22 10:34:18 -0700790 int start_tx_type, end_tx_type;
791#endif // CONFIG_EXT_TX
792
Jingning Han3ee6db62015-08-05 19:00:31 -0700793 const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
794 assert(skip_prob > 0);
795 s0 = vp10_cost_bit(skip_prob, 0);
796 s1 = vp10_cost_bit(skip_prob, 1);
797
hui su38debe52015-09-20 19:18:00 -0700798 if (tx_select) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700799 start_tx = max_tx_size;
800 end_tx = 0;
801 } else {
hui su38debe52015-09-20 19:18:00 -0700802 const TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
James Zern5e16d392015-08-17 18:19:22 -0700803 tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han3ee6db62015-08-05 19:00:31 -0700804 start_tx = chosen_tx_size;
805 end_tx = chosen_tx_size;
806 }
807
hui su38debe52015-09-20 19:18:00 -0700808 *distortion = INT64_MAX;
809 *rate = INT_MAX;
810 *skip = 0;
811 *psse = INT64_MAX;
812
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700813#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700814 start_tx_type = DCT_DCT;
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700815 if (bs >= BLOCK_8X8 && !xd->lossless[mbmi->segment_id])
hui su4f16f112015-10-02 10:45:27 -0700816 end_tx_type = TX_TYPES - 1;
hui su07154b02015-09-22 10:34:18 -0700817 else
hui su4f16f112015-10-02 10:45:27 -0700818 end_tx_type = DCT_DCT;
hui su07154b02015-09-22 10:34:18 -0700819
820 for (tx_type = start_tx_type; tx_type <= end_tx_type; ++tx_type) {
hui su4f16f112015-10-02 10:45:27 -0700821 mbmi->tx_type = tx_type;
hui su2afe7322015-09-23 18:06:48 -0700822 // TODO(huisu): clean up the logic.
hui su4f16f112015-10-02 10:45:27 -0700823 if (mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
824 best_tx_type == DCT_DCT) {
825 tx_type = IDTX - 1;
hui su2afe7322015-09-23 18:06:48 -0700826 continue;
827 }
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700828#endif // CONFIG_EXT_TX
hui su07154b02015-09-22 10:34:18 -0700829 for (n = start_tx; n >= end_tx; --n) {
830 int r_tx_size = 0;
831
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700832#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700833 if (mbmi->tx_type >= GET_TX_TYPES(n))
hui su2afe7322015-09-23 18:06:48 -0700834 continue;
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700835#endif // CONFIG_EXT_TX
836
hui su07154b02015-09-22 10:34:18 -0700837 for (m = 0; m <= n - (n == (int) max_tx_size); ++m) {
838 if (m == n)
839 r_tx_size += vp10_cost_zero(tx_probs[m]);
840 else
841 r_tx_size += vp10_cost_one(tx_probs[m]);
Shunyao Liaa006d72015-08-19 12:04:56 -0700842 }
hui su07154b02015-09-22 10:34:18 -0700843
Jingning Han71c15602015-10-13 12:40:39 -0700844 txfm_rd_in_plane(x,
845#if CONFIG_VAR_TX
846 cpi,
847#endif
848 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700849 &sse, ref_best_rd, 0, bs, n,
850 cpi->sf.use_fast_coef_costing);
851#if CONFIG_EXT_TX
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700852 if (bs >= BLOCK_8X8 && !xd->lossless[mbmi->segment_id] &&
853 r != INT_MAX && n < TX_32X32) {
hui su3fa01292015-09-28 18:38:00 -0700854 if (is_inter_block(mbmi))
hui su4f16f112015-10-02 10:45:27 -0700855 r += cpi->inter_tx_type_costs[n][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700856 else
hui su4f16f112015-10-02 10:45:27 -0700857 r += cpi->intra_tx_type_costs[n][mbmi->mode][mbmi->tx_type];
hui su3fa01292015-09-28 18:38:00 -0700858 }
hui su07154b02015-09-22 10:34:18 -0700859#endif // CONFIG_EXT_TX
860
861 if (r == INT_MAX)
862 continue;
863
864 if (tx_select)
865 r += r_tx_size;
866
867 if (s) {
868 if (is_inter_block(mbmi)) {
869 rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
870 if (tx_select)
871 r -= r_tx_size;
872 } else {
873 rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
874 }
875 } else {
876 rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
877 }
878
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700879 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
hui su07154b02015-09-22 10:34:18 -0700880 rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
881
882 // Early termination in transform size search.
Yaowu Xu5a27b3b2015-10-22 12:18:52 -0700883 if (0 && cpi->sf.tx_size_search_breakout &&
hui su07154b02015-09-22 10:34:18 -0700884 (rd== INT64_MAX ||
885 (n < (int) max_tx_size && rd > last_rd) ||
886 s == 1))
887 break;
888
889 last_rd = rd;
890#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700891 if (rd <
892 (is_inter_block(mbmi) && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
Debargha Mukherjee3e8cceb2015-09-29 04:03:39 -0700893 best_rd) {
hui su07154b02015-09-22 10:34:18 -0700894#else
895 if (rd < best_rd) {
896#endif // CONFIG_EXT_TX
897 best_tx = n;
898 best_rd = rd;
899 *distortion = d;
900 *rate = r;
901 *skip = s;
902 *psse = sse;
903#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700904 best_tx_type = mbmi->tx_type;
hui su07154b02015-09-22 10:34:18 -0700905#endif // CONFIG_EXT_TX
906 }
Jingning Han3ee6db62015-08-05 19:00:31 -0700907 }
hui su07154b02015-09-22 10:34:18 -0700908#if CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700909 }
hui su07154b02015-09-22 10:34:18 -0700910#endif // CONFIG_EXT_TX
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -0700911
Jingning Han3ee6db62015-08-05 19:00:31 -0700912 mbmi->tx_size = best_tx;
hui su07154b02015-09-22 10:34:18 -0700913#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -0700914 mbmi->tx_type = best_tx_type;
Jingning Han71c15602015-10-13 12:40:39 -0700915 txfm_rd_in_plane(x,
916#if CONFIG_VAR_TX
917 cpi,
918#endif
919 &r, &d, &s,
hui su07154b02015-09-22 10:34:18 -0700920 &sse, ref_best_rd, 0, bs, best_tx,
921 cpi->sf.use_fast_coef_costing);
922#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -0700923}
924
Yaowu Xu26a9afc2015-08-13 09:42:27 -0700925static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
Jingning Han3ee6db62015-08-05 19:00:31 -0700926 int64_t *distortion, int *skip,
927 int64_t *psse, BLOCK_SIZE bs,
928 int64_t ref_best_rd) {
929 MACROBLOCKD *xd = &x->e_mbd;
930 int64_t sse;
931 int64_t *ret_sse = psse ? psse : &sse;
932
933 assert(bs == xd->mi[0]->mbmi.sb_type);
934
Ronald S. Bultje60c58b52015-10-12 17:54:25 -0400935 if (CONFIG_MISC_FIXES && xd->lossless[xd->mi[0]->mbmi.segment_id]) {
936 choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
937 ref_best_rd, bs);
938 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
939 xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -0700940 choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
941 bs);
942 } else {
943 choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
944 ref_best_rd, bs);
945 }
946}
947
948static int conditional_skipintra(PREDICTION_MODE mode,
949 PREDICTION_MODE best_intra_mode) {
950 if (mode == D117_PRED &&
951 best_intra_mode != V_PRED &&
952 best_intra_mode != D135_PRED)
953 return 1;
954 if (mode == D63_PRED &&
955 best_intra_mode != V_PRED &&
956 best_intra_mode != D45_PRED)
957 return 1;
958 if (mode == D207_PRED &&
959 best_intra_mode != H_PRED &&
960 best_intra_mode != D45_PRED)
961 return 1;
962 if (mode == D153_PRED &&
963 best_intra_mode != H_PRED &&
964 best_intra_mode != D135_PRED)
965 return 1;
966 return 0;
967}
968
hui su5d011cb2015-09-15 12:44:13 -0700969void rd_pick_palette_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
970 int palette_ctx, int dc_mode_cost,
971 PALETTE_MODE_INFO *palette_mode_info,
972 uint8_t *best_palette_color_map,
973 TX_SIZE *best_tx, PREDICTION_MODE *mode_selected,
974 int64_t *best_rd) {
975 MACROBLOCKD *const xd = &x->e_mbd;
976 MODE_INFO *const mic = xd->mi[0];
977 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
978 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
979 int this_rate, this_rate_tokenonly, s;
980 int64_t this_distortion, this_rd;
981 int colors, n;
982 int src_stride = x->plane[0].src.stride;
983 uint8_t *src = x->plane[0].src.buf;
984
985#if CONFIG_VP9_HIGHBITDEPTH
986 if (cpi->common.use_highbitdepth)
987 colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
988 cpi->common.bit_depth);
989 else
990#endif // CONFIG_VP9_HIGHBITDEPTH
991 colors = vp10_count_colors(src, src_stride, rows, cols);
992 palette_mode_info->palette_size[0] = 0;
993
994 if (colors > 1 && colors <= 64 && cpi->common.allow_screen_content_tools) {
995 int r, c, i, j, k;
996 int max_itr = 50;
997 int color_ctx, color_idx = 0;
998 int color_order[PALETTE_MAX_SIZE];
999 double *data = x->palette_buffer->kmeans_data_buf;
1000 uint8_t *indices = x->palette_buffer->kmeans_indices_buf;
1001 uint8_t *pre_indices = x->palette_buffer->kmeans_pre_indices_buf;
1002 double centroids[PALETTE_MAX_SIZE];
1003 uint8_t *color_map;
1004 double lb, ub, val;
1005 PALETTE_MODE_INFO *pmi = &mic->mbmi.palette_mode_info;
1006#if CONFIG_VP9_HIGHBITDEPTH
1007 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1008 if (cpi->common.use_highbitdepth)
1009 lb = ub = src16[0];
1010 else
1011#endif // CONFIG_VP9_HIGHBITDEPTH
1012 lb = ub = src[0];
1013
1014#if CONFIG_VP9_HIGHBITDEPTH
1015 if (cpi->common.use_highbitdepth) {
1016 for (r = 0; r < rows; ++r) {
1017 for (c = 0; c < cols; ++c) {
1018 val = src16[r * src_stride + c];
1019 data[r * cols + c] = val;
1020 if (val < lb)
1021 lb = val;
1022 else if (val > ub)
1023 ub = val;
1024 }
1025 }
1026 } else {
1027#endif // CONFIG_VP9_HIGHBITDEPTH
1028 for (r = 0; r < rows; ++r) {
1029 for (c = 0; c < cols; ++c) {
1030 val = src[r * src_stride + c];
1031 data[r * cols + c] = val;
1032 if (val < lb)
1033 lb = val;
1034 else if (val > ub)
1035 ub = val;
1036 }
1037 }
1038#if CONFIG_VP9_HIGHBITDEPTH
1039 }
1040#endif // CONFIG_VP9_HIGHBITDEPTH
1041
1042 mic->mbmi.mode = DC_PRED;
1043
1044 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
1045 n >= 2; --n) {
1046 for (i = 0; i < n; ++i)
1047 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
1048 vp10_k_means(data, centroids, indices, pre_indices, rows * cols,
1049 n, 1, max_itr);
1050 vp10_insertion_sort(centroids, n);
hui su17c817a2015-10-15 18:04:50 -07001051 for (i = 0; i < n; ++i)
1052 centroids[i] = round(centroids[i]);
hui su5d011cb2015-09-15 12:44:13 -07001053 // remove duplicates
1054 i = 1;
1055 k = n;
1056 while (i < k) {
1057 if (centroids[i] == centroids[i - 1]) {
1058 j = i;
1059 while (j < k - 1) {
1060 centroids[j] = centroids[j + 1];
1061 ++j;
1062 }
1063 --k;
1064 } else {
1065 ++i;
1066 }
1067 }
1068
1069#if CONFIG_VP9_HIGHBITDEPTH
1070 if (cpi->common.use_highbitdepth)
1071 for (i = 0; i < k; ++i)
1072 mic->mbmi.palette_mode_info.palette_colors[i] =
1073 clip_pixel_highbd(round(centroids[i]), cpi->common.bit_depth);
1074 else
1075#endif // CONFIG_VP9_HIGHBITDEPTH
1076 for (i = 0; i < k; ++i)
Yaowu Xu8ced62f2015-10-14 08:10:05 -07001077 pmi->palette_colors[i] = clip_pixel((int)round(centroids[i]));
hui su5d011cb2015-09-15 12:44:13 -07001078 pmi->palette_size[0] = k;
1079
1080 vp10_calc_indices(data, centroids, indices, rows * cols, k, 1);
1081 for (r = 0; r < rows; ++r)
1082 for (c = 0; c < cols; ++c)
1083 xd->plane[0].color_index_map[r * cols + c] = indices[r * cols + c];
1084
1085 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1086 &s, NULL, bsize, *best_rd);
1087 if (this_rate_tokenonly == INT_MAX)
1088 continue;
1089
1090 this_rate = this_rate_tokenonly + dc_mode_cost +
1091 cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
1092 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2];
1093 this_rate +=
1094 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1095 [palette_ctx], 1);
1096 color_map = xd->plane[0].color_index_map;
1097 this_rate += write_uniform_cost(k, xd->plane[0].color_index_map[0]);
1098 for (i = 0; i < rows; ++i) {
1099 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
1100 color_ctx = vp10_get_palette_color_context(color_map, cols, i, j,
1101 k, color_order);
1102 for (r = 0; r < k; ++r)
1103 if (color_map[i * cols + j] == color_order[r]) {
1104 color_idx = r;
1105 break;
1106 }
1107 assert(color_idx < k);
1108 this_rate +=
1109 cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
1110 }
1111 }
1112 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1113
1114 if (this_rd < *best_rd) {
1115 *best_rd = this_rd;
1116 *palette_mode_info = mic->mbmi.palette_mode_info;
1117 memcpy(best_palette_color_map, xd->plane[0].color_index_map,
1118 rows * cols * sizeof(xd->plane[0].color_index_map[0]));
1119 *mode_selected = DC_PRED;
1120 *best_tx = mic->mbmi.tx_size;
1121 }
1122 }
1123 }
1124}
1125
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001126static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001127 int row, int col,
1128 PREDICTION_MODE *best_mode,
1129 const int *bmode_costs,
1130 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1131 int *bestrate, int *bestratey,
1132 int64_t *bestdistortion,
1133 BLOCK_SIZE bsize, int64_t rd_thresh) {
1134 PREDICTION_MODE mode;
1135 MACROBLOCKD *const xd = &x->e_mbd;
1136 int64_t best_rd = rd_thresh;
1137 struct macroblock_plane *p = &x->plane[0];
1138 struct macroblockd_plane *pd = &xd->plane[0];
1139 const int src_stride = p->src.stride;
1140 const int dst_stride = pd->dst.stride;
1141 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
1142 uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
1143 ENTROPY_CONTEXT ta[2], tempa[2];
1144 ENTROPY_CONTEXT tl[2], templ[2];
1145 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1146 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1147 int idx, idy;
1148 uint8_t best_dst[8 * 8];
1149#if CONFIG_VP9_HIGHBITDEPTH
1150 uint16_t best_dst16[8 * 8];
1151#endif
1152
1153 memcpy(ta, a, sizeof(ta));
1154 memcpy(tl, l, sizeof(tl));
1155 xd->mi[0]->mbmi.tx_size = TX_4X4;
hui su5d011cb2015-09-15 12:44:13 -07001156 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001157
1158#if CONFIG_VP9_HIGHBITDEPTH
1159 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1160 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1161 int64_t this_rd;
1162 int ratey = 0;
1163 int64_t distortion = 0;
1164 int rate = bmode_costs[mode];
1165
1166 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1167 continue;
1168
1169 // Only do the oblique modes if the best so far is
1170 // one of the neighboring directional modes
1171 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1172 if (conditional_skipintra(mode, *best_mode))
1173 continue;
1174 }
1175
1176 memcpy(tempa, ta, sizeof(ta));
1177 memcpy(templ, tl, sizeof(tl));
1178
1179 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1180 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1181 const int block = (row + idy) * 2 + (col + idx);
1182 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1183 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1184 int16_t *const src_diff = vp10_raster_block_offset_int16(BLOCK_8X8,
1185 block,
1186 p->src_diff);
1187 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1188 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001189 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001190 dst, dst_stride,
1191 col + idx, row + idy, 0);
1192 vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
1193 dst, dst_stride, xd->bd);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001194 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001195 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001196 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001197#if CONFIG_VAR_TX
1198 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1199 *(templ + idy));
1200#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001201 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001202 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001203 ratey += cost_coeffs(x, 0, block,
1204#if CONFIG_VAR_TX
1205 coeff_ctx,
1206#else
1207 tempa + idx, templ + idy,
1208#endif
1209 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001210 so->scan, so->neighbors,
1211 cpi->sf.use_fast_coef_costing);
1212 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1213 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001214 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1215 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001216 xd->bd, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001217 } else {
1218 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001219 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001220 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han20484042015-10-21 17:38:00 -07001221#if CONFIG_VAR_TX
1222 const int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1223 *(templ + idy));
1224#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001225 vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001226 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han20484042015-10-21 17:38:00 -07001227 ratey += cost_coeffs(x, 0, block,
1228#if CONFIG_VAR_TX
1229 coeff_ctx,
1230#else
1231 tempa + idx, templ + idy,
1232#endif
1233 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001234 so->scan, so->neighbors,
1235 cpi->sf.use_fast_coef_costing);
1236 distortion += vp10_highbd_block_error(
1237 coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1238 16, &unused, xd->bd) >> 2;
1239 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1240 goto next_highbd;
hui sud76e5b32015-08-13 16:27:19 -07001241 vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
1242 dst, dst_stride, p->eobs[block],
Yaowu Xu7c514e22015-09-28 15:55:46 -07001243 xd->bd, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001244 }
1245 }
1246 }
1247
1248 rate += ratey;
1249 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1250
1251 if (this_rd < best_rd) {
1252 *bestrate = rate;
1253 *bestratey = ratey;
1254 *bestdistortion = distortion;
1255 best_rd = this_rd;
1256 *best_mode = mode;
1257 memcpy(a, tempa, sizeof(tempa));
1258 memcpy(l, templ, sizeof(templ));
1259 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1260 memcpy(best_dst16 + idy * 8,
1261 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1262 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1263 }
1264 }
1265 next_highbd:
1266 {}
1267 }
Jingning Han481b8342015-09-11 08:56:06 -07001268 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001269 return best_rd;
1270
1271 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1272 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1273 best_dst16 + idy * 8,
1274 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1275 }
1276
1277 return best_rd;
1278 }
1279#endif // CONFIG_VP9_HIGHBITDEPTH
1280
1281 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1282 int64_t this_rd;
1283 int ratey = 0;
1284 int64_t distortion = 0;
1285 int rate = bmode_costs[mode];
1286
1287 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1288 continue;
1289
1290 // Only do the oblique modes if the best so far is
1291 // one of the neighboring directional modes
1292 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1293 if (conditional_skipintra(mode, *best_mode))
1294 continue;
1295 }
1296
1297 memcpy(tempa, ta, sizeof(ta));
1298 memcpy(templ, tl, sizeof(tl));
1299
1300 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1301 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1302 const int block = (row + idy) * 2 + (col + idx);
1303 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1304 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1305 int16_t *const src_diff =
1306 vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1307 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1308 xd->mi[0]->bmi[block].as_mode = mode;
Ronald S. Bultjec7dc1d72015-10-12 10:35:46 -04001309 vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride,
Jingning Han3ee6db62015-08-05 19:00:31 -07001310 dst, dst_stride, col + idx, row + idy, 0);
1311 vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1312
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04001313 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui sub3cc3a02015-08-24 14:37:54 -07001314 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001315 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001316#if CONFIG_VAR_TX
1317 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1318 *(templ + idy));
1319#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001320 vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001321 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001322#if CONFIG_VAR_TX
1323 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1324 so->neighbors, cpi->sf.use_fast_coef_costing);
1325 *(tempa + idx) = !(p->eobs[block] == 0);
1326 *(templ + idy) = !(p->eobs[block] == 0);
1327#else
1328 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1329 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07001330 so->scan, so->neighbors,
1331 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07001332#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001333 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1334 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001335 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001336 dst, dst_stride, p->eobs[block], DCT_DCT, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07001337 } else {
1338 int64_t unused;
hui sub3cc3a02015-08-24 14:37:54 -07001339 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07001340 const scan_order *so = get_scan(TX_4X4, tx_type, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07001341#if CONFIG_VAR_TX
1342 int coeff_ctx = combine_entropy_contexts(*(tempa + idx),
1343 *(templ + idy));
1344#endif
Yaowu Xu7c514e22015-09-28 15:55:46 -07001345 vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001346 vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
Jingning Han2cdc1272015-10-09 09:57:42 -07001347#if CONFIG_VAR_TX
1348 ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
1349 so->neighbors, cpi->sf.use_fast_coef_costing);
1350 *(tempa + idx) = !(p->eobs[block] == 0);
1351 *(templ + idy) = !(p->eobs[block] == 0);
1352#else
1353 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy,
1354 TX_4X4, so->scan, so->neighbors,
1355 cpi->sf.use_fast_coef_costing);
1356#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07001357 distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1358 16, &unused) >> 2;
1359 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1360 goto next;
hui sud76e5b32015-08-13 16:27:19 -07001361 vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block),
Yaowu Xu7c514e22015-09-28 15:55:46 -07001362 dst, dst_stride, p->eobs[block], tx_type, 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001363 }
1364 }
1365 }
1366
1367 rate += ratey;
1368 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1369
1370 if (this_rd < best_rd) {
1371 *bestrate = rate;
1372 *bestratey = ratey;
1373 *bestdistortion = distortion;
1374 best_rd = this_rd;
1375 *best_mode = mode;
1376 memcpy(a, tempa, sizeof(tempa));
1377 memcpy(l, templ, sizeof(templ));
1378 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1379 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1380 num_4x4_blocks_wide * 4);
1381 }
1382 next:
1383 {}
1384 }
1385
Jingning Hanf1376972015-09-10 12:42:21 -07001386 if (best_rd >= rd_thresh)
Jingning Han3ee6db62015-08-05 19:00:31 -07001387 return best_rd;
1388
1389 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1390 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1391 num_4x4_blocks_wide * 4);
1392
1393 return best_rd;
1394}
1395
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001396static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
Jingning Han3ee6db62015-08-05 19:00:31 -07001397 int *rate, int *rate_y,
1398 int64_t *distortion,
1399 int64_t best_rd) {
1400 int i, j;
1401 const MACROBLOCKD *const xd = &mb->e_mbd;
1402 MODE_INFO *const mic = xd->mi[0];
1403 const MODE_INFO *above_mi = xd->above_mi;
1404 const MODE_INFO *left_mi = xd->left_mi;
1405 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1406 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1407 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1408 int idx, idy;
1409 int cost = 0;
1410 int64_t total_distortion = 0;
1411 int tot_rate_y = 0;
1412 int64_t total_rd = 0;
1413 ENTROPY_CONTEXT t_above[4], t_left[4];
1414 const int *bmode_costs = cpi->mbmode_cost;
1415
1416 memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1417 memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1418
1419 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1420 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1421 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1422 PREDICTION_MODE best_mode = DC_PRED;
1423 int r = INT_MAX, ry = INT_MAX;
1424 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1425 i = idy * 2 + idx;
1426 if (cpi->common.frame_type == KEY_FRAME) {
1427 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
1428 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
1429
1430 bmode_costs = cpi->y_mode_costs[A][L];
1431 }
1432
1433 this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
1434 bmode_costs, t_above + idx, t_left + idy,
1435 &r, &ry, &d, bsize, best_rd - total_rd);
1436 if (this_rd >= best_rd - total_rd)
1437 return INT64_MAX;
1438
1439 total_rd += this_rd;
1440 cost += r;
1441 total_distortion += d;
1442 tot_rate_y += ry;
1443
1444 mic->bmi[i].as_mode = best_mode;
1445 for (j = 1; j < num_4x4_blocks_high; ++j)
1446 mic->bmi[i + j * 2].as_mode = best_mode;
1447 for (j = 1; j < num_4x4_blocks_wide; ++j)
1448 mic->bmi[i + j].as_mode = best_mode;
1449
1450 if (total_rd >= best_rd)
1451 return INT64_MAX;
1452 }
1453 }
1454
1455 *rate = cost;
1456 *rate_y = tot_rate_y;
1457 *distortion = total_distortion;
1458 mic->mbmi.mode = mic->bmi[3].as_mode;
1459
1460 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1461}
1462
1463// This function is used only for intra_only frames
Yaowu Xu26a9afc2015-08-13 09:42:27 -07001464static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07001465 int *rate, int *rate_tokenonly,
1466 int64_t *distortion, int *skippable,
1467 BLOCK_SIZE bsize,
1468 int64_t best_rd) {
1469 PREDICTION_MODE mode;
1470 PREDICTION_MODE mode_selected = DC_PRED;
1471 MACROBLOCKD *const xd = &x->e_mbd;
1472 MODE_INFO *const mic = xd->mi[0];
1473 int this_rate, this_rate_tokenonly, s;
1474 int64_t this_distortion, this_rd;
1475 TX_SIZE best_tx = TX_4X4;
hui su3fa01292015-09-28 18:38:00 -07001476#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001477 TX_TYPE best_tx_type = DCT_DCT;
hui su3fa01292015-09-28 18:38:00 -07001478#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001479 int *bmode_costs;
hui su5d011cb2015-09-15 12:44:13 -07001480 PALETTE_MODE_INFO palette_mode_info;
hui suaaf6f622015-10-14 20:18:18 -07001481 uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
1482 x->palette_buffer->best_palette_color_map : NULL;
hui su5d011cb2015-09-15 12:44:13 -07001483 int rows = 4 * num_4x4_blocks_high_lookup[bsize];
1484 int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
1485 int palette_ctx = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07001486 const MODE_INFO *above_mi = xd->above_mi;
1487 const MODE_INFO *left_mi = xd->left_mi;
1488 const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
1489 const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
1490 bmode_costs = cpi->y_mode_costs[A][L];
1491
1492 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07001493 palette_mode_info.palette_size[0] = 0;
1494 mic->mbmi.palette_mode_info.palette_size[0] = 0;
1495 if (above_mi)
1496 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1497 if (left_mi)
1498 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
1499
Jingning Han3ee6db62015-08-05 19:00:31 -07001500 /* Y Search for intra prediction mode */
1501 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
Jingning Han3ee6db62015-08-05 19:00:31 -07001502 mic->mbmi.mode = mode;
1503
1504 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1505 &s, NULL, bsize, best_rd);
1506
1507 if (this_rate_tokenonly == INT_MAX)
1508 continue;
1509
1510 this_rate = this_rate_tokenonly + bmode_costs[mode];
hui su5d011cb2015-09-15 12:44:13 -07001511 if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
1512 this_rate +=
1513 vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
1514 [palette_ctx], 0);
Jingning Han3ee6db62015-08-05 19:00:31 -07001515 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1516
1517 if (this_rd < best_rd) {
1518 mode_selected = mode;
1519 best_rd = this_rd;
1520 best_tx = mic->mbmi.tx_size;
hui su3fa01292015-09-28 18:38:00 -07001521#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001522 best_tx_type = mic->mbmi.tx_type;
hui su3fa01292015-09-28 18:38:00 -07001523#endif // CONFIG_EXT_TX
Jingning Han3ee6db62015-08-05 19:00:31 -07001524 *rate = this_rate;
1525 *rate_tokenonly = this_rate_tokenonly;
1526 *distortion = this_distortion;
1527 *skippable = s;
1528 }
1529 }
1530
hui su5d011cb2015-09-15 12:44:13 -07001531 if (cpi->common.allow_screen_content_tools)
1532 rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
1533 &palette_mode_info, best_palette_color_map,
1534 &best_tx, &mode_selected, &best_rd);
1535
Jingning Han3ee6db62015-08-05 19:00:31 -07001536 mic->mbmi.mode = mode_selected;
1537 mic->mbmi.tx_size = best_tx;
hui su3fa01292015-09-28 18:38:00 -07001538#if CONFIG_EXT_TX
hui su4f16f112015-10-02 10:45:27 -07001539 mic->mbmi.tx_type = best_tx_type;
hui su3fa01292015-09-28 18:38:00 -07001540#endif // CONFIG_EXT_TX
hui su5d011cb2015-09-15 12:44:13 -07001541 mic->mbmi.palette_mode_info.palette_size[0] =
1542 palette_mode_info.palette_size[0];
1543 if (palette_mode_info.palette_size[0] > 0) {
1544 memcpy(mic->mbmi.palette_mode_info.palette_colors,
1545 palette_mode_info.palette_colors,
1546 PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
1547 memcpy(xd->plane[0].color_index_map, best_palette_color_map,
1548 rows * cols * sizeof(best_palette_color_map[0]));
1549 }
Jingning Han3ee6db62015-08-05 19:00:31 -07001550
1551 return best_rd;
1552}
1553
Jingning Hana8dad552015-10-08 16:46:10 -07001554#if CONFIG_VAR_TX
Jingning Han71c15602015-10-13 12:40:39 -07001555static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
Jingning Han2cdc1272015-10-09 09:57:42 -07001556 int blk_row, int blk_col, int plane, int block,
1557 int plane_bsize, int coeff_ctx,
1558 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1559 MACROBLOCKD *xd = &x->e_mbd;
1560 const struct macroblock_plane *const p = &x->plane[plane];
1561 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han71c15602015-10-13 12:40:39 -07001562#if CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001563 const int ss_txfrm_size = tx_size << 1;
1564 int64_t this_sse;
1565 int shift = tx_size == TX_32X32 ? 0 : 2;
1566 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han71c15602015-10-13 12:40:39 -07001567#endif
1568 unsigned int tmp_sse = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001569 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1570 PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
1571 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1572 const scan_order *const scan_order =
1573 get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
1574
Jingning Han71c15602015-10-13 12:40:39 -07001575 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
1576 int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
1577 int src_stride = p->src.stride;
1578 uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
1579 uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
1580 DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]);
1581
1582 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1583 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1584
1585 if (xd->mb_to_bottom_edge < 0)
1586 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1587 if (xd->mb_to_right_edge < 0)
1588 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1589
Jingning Han2cdc1272015-10-09 09:57:42 -07001590 vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
1591
Jingning Han71c15602015-10-13 12:40:39 -07001592 vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32,
1593 NULL, 0, NULL, 0, bh, bh);
1594
1595 if (blk_row + (bh >> 2) > max_blocks_high ||
1596 blk_col + (bh >> 2) > max_blocks_wide) {
1597 int idx, idy;
1598 unsigned int this_sse;
1599 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1600 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1601 for (idy = 0; idy < blocks_height; idy += 2) {
1602 for (idx = 0; idx < blocks_width; idx += 2) {
1603 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1604 src_stride,
1605 rec_buffer + 4 * idy * 32 + 4 * idx,
1606 32, &this_sse);
1607 tmp_sse += this_sse;
1608 }
1609 }
1610 } else {
1611 cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse);
1612 }
1613
Jingning Han2cdc1272015-10-09 09:57:42 -07001614#if CONFIG_VP9_HIGHBITDEPTH
1615 *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
1616 &this_sse, xd->bd) >> shift;
Jingning Han2cdc1272015-10-09 09:57:42 -07001617 *bsse += this_sse >> shift;
Jingning Han71c15602015-10-13 12:40:39 -07001618#else
1619 *bsse += (int64_t)tmp_sse * 16;
1620
1621 if (p->eobs[block] > 0) {
1622 // TODO(jingning): integrate multiple transform type experiment
1623 TX_TYPE tx_type = DCT_DCT;
1624 switch (tx_size) {
1625 case TX_32X32:
1626 vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
1627 tx_type);
1628 break;
1629 case TX_16X16:
1630 vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block],
1631 tx_type);
1632 break;
1633 case TX_8X8:
1634 vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block],
1635 tx_type);
1636 break;
1637 case TX_4X4:
1638 vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block],
1639 tx_type,
1640 xd->lossless[xd->mi[0]->mbmi.segment_id]);
1641 break;
1642 default:
1643 assert(0 && "Invalid transform size");
1644 break;
1645 }
1646
1647 if ((bh >> 2) + blk_col > max_blocks_wide ||
1648 (bh >> 2) + blk_row > max_blocks_high) {
1649 int idx, idy;
1650 unsigned int this_sse;
1651 int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
1652 int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
1653 tmp_sse = 0;
1654 for (idy = 0; idy < blocks_height; idy += 2) {
1655 for (idx = 0; idx < blocks_width; idx += 2) {
1656 cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx,
1657 src_stride,
1658 rec_buffer + 4 * idy * 32 + 4 * idx,
1659 32, &this_sse);
1660 tmp_sse += this_sse;
1661 }
1662 }
1663 } else {
1664 cpi->fn_ptr[txm_bsize].vf(src, src_stride,
1665 rec_buffer, 32, &tmp_sse);
1666 }
1667 }
1668 *dist += (int64_t)tmp_sse * 16;
1669#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han2cdc1272015-10-09 09:57:42 -07001670
1671 *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size,
1672 scan_order->scan, scan_order->neighbors, 0);
1673 *skip &= (p->eobs[block] == 0);
1674}
1675
1676static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
1677 int blk_row, int blk_col, int plane, int block,
1678 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
Jingning Han2cdc1272015-10-09 09:57:42 -07001679 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
1680 int *rate, int64_t *dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001681 int64_t *bsse, int *skip,
1682 int64_t ref_best_rd, int *is_cost_valid) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001683 MACROBLOCKD *const xd = &x->e_mbd;
1684 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1685 struct macroblock_plane *const p = &x->plane[plane];
1686 struct macroblockd_plane *const pd = &xd->plane[plane];
1687 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1688 (blk_col >> (1 - pd->subsampling_x));
1689 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1690 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1691 int64_t this_rd = INT64_MAX;
Jingning Han2cdc1272015-10-09 09:57:42 -07001692 ENTROPY_CONTEXT *pta = ta + blk_col;
1693 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han3a279612015-10-12 19:20:58 -07001694 ENTROPY_CONTEXT stxa = 0, stxl = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001695 int coeff_ctx, i;
Jingning Han3a279612015-10-12 19:20:58 -07001696 int64_t sum_dist = 0, sum_bsse = 0;
1697 int64_t sum_rd = INT64_MAX;
1698 int sum_rate = vp10_cost_bit(128, 1);
1699 int all_skip = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001700 int tmp_eob = 0;
1701
1702 if (ref_best_rd < 0) {
1703 *is_cost_valid = 0;
1704 return;
1705 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001706
1707 switch (tx_size) {
1708 case TX_4X4:
Jingning Han3a279612015-10-12 19:20:58 -07001709 stxa = pta[0];
1710 stxl = ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001711 break;
1712 case TX_8X8:
Jingning Han3a279612015-10-12 19:20:58 -07001713 stxa = !!*(const uint16_t *)&pta[0];
1714 stxl = !!*(const uint16_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001715 break;
1716 case TX_16X16:
Jingning Han3a279612015-10-12 19:20:58 -07001717 stxa = !!*(const uint32_t *)&pta[0];
1718 stxl = !!*(const uint32_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001719 break;
1720 case TX_32X32:
Jingning Han3a279612015-10-12 19:20:58 -07001721 stxa = !!*(const uint64_t *)&pta[0];
1722 stxl = !!*(const uint64_t *)&ptl[0];
Jingning Han2cdc1272015-10-09 09:57:42 -07001723 break;
1724 default:
1725 assert(0 && "Invalid transform size.");
1726 break;
1727 }
Jingning Han3a279612015-10-12 19:20:58 -07001728 coeff_ctx = combine_entropy_contexts(stxa, stxl);
Jingning Han2cdc1272015-10-09 09:57:42 -07001729
1730 if (xd->mb_to_bottom_edge < 0)
1731 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1732 if (xd->mb_to_right_edge < 0)
1733 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1734
1735 *rate = 0;
1736 *dist = 0;
1737 *bsse = 0;
1738 *skip = 1;
1739
1740 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
1741 return;
1742
Jingning Han1e48f742015-10-13 11:59:49 -07001743 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
1744 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han71c15602015-10-13 12:40:39 -07001745 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han1e48f742015-10-13 11:59:49 -07001746 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
1747 if (tx_size > TX_4X4)
1748 *rate += vp10_cost_bit(128, 0);
1749 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
1750 tmp_eob = p->eobs[block];
1751 }
1752
Jingning Han2cdc1272015-10-09 09:57:42 -07001753 if (tx_size > TX_4X4) {
1754 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Han3a279612015-10-12 19:20:58 -07001755 int bsl = b_height_log2_lookup[bsize];
Jingning Han2cdc1272015-10-09 09:57:42 -07001756 int sub_step = 1 << (2 * (tx_size - 1));
1757 int i;
Jingning Han3a279612015-10-12 19:20:58 -07001758 int this_rate;
1759 int64_t this_dist;
1760 int64_t this_bsse;
1761 int this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07001762 int this_cost_valid = 1;
1763 int64_t tmp_rd = 0;
Jingning Han3a279612015-10-12 19:20:58 -07001764
1765 --bsl;
Jingning Han236623c2015-10-26 19:39:30 -07001766 for (i = 0; i < 4 && this_cost_valid; ++i) {
Jingning Han3a279612015-10-12 19:20:58 -07001767 int offsetr = (i >> 1) << bsl;
1768 int offsetc = (i & 0x01) << bsl;
Jingning Han2cdc1272015-10-09 09:57:42 -07001769 select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
1770 plane, block + i * sub_step, tx_size - 1,
Jingning Han3a279612015-10-12 19:20:58 -07001771 plane_bsize, ta, tl, &this_rate, &this_dist,
Jingning Han1e48f742015-10-13 11:59:49 -07001772 &this_bsse, &this_skip,
1773 ref_best_rd - tmp_rd, &this_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07001774 sum_rate += this_rate;
1775 sum_dist += this_dist;
1776 sum_bsse += this_bsse;
1777 all_skip &= this_skip;
Jingning Han1e48f742015-10-13 11:59:49 -07001778 tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
1779 if (this_rd < tmp_rd)
1780 break;
Jingning Han2cdc1272015-10-09 09:57:42 -07001781 }
Jingning Han1e48f742015-10-13 11:59:49 -07001782 if (this_cost_valid)
1783 sum_rd = tmp_rd;
Jingning Han3a279612015-10-12 19:20:58 -07001784 }
1785
1786 if (this_rd < sum_rd) {
Jingning Han79fe7242015-10-23 14:27:21 -07001787 int idx, idy;
Jingning Han3a279612015-10-12 19:20:58 -07001788 for (i = 0; i < (1 << tx_size); ++i)
Jingning Han1e48f742015-10-13 11:59:49 -07001789 pta[i] = ptl[i] = !(tmp_eob == 0);
1790 mbmi->inter_tx_size[tx_idx] = tx_size;
Jingning Han79fe7242015-10-23 14:27:21 -07001791
1792 for (idy = 0; idy < (1 << tx_size) / 2; ++idy)
1793 for (idx = 0; idx < (1 << tx_size) / 2; ++idx)
1794 mbmi->inter_tx_size[tx_idx + (idy << 3) + idx] = tx_size;
Jingning Han3a279612015-10-12 19:20:58 -07001795 mbmi->tx_size = tx_size;
Jingning Han236623c2015-10-26 19:39:30 -07001796 if (this_rd == INT64_MAX)
1797 *is_cost_valid = 0;
Jingning Han3a279612015-10-12 19:20:58 -07001798 } else {
1799 *rate = sum_rate;
1800 *dist = sum_dist;
1801 *bsse = sum_bsse;
1802 *skip = all_skip;
Jingning Han236623c2015-10-26 19:39:30 -07001803 if (sum_rd == INT64_MAX)
1804 *is_cost_valid = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001805 }
1806}
1807
1808static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
1809 int *rate, int64_t *distortion, int *skippable,
1810 int64_t *sse, BLOCK_SIZE bsize,
1811 int64_t ref_best_rd) {
1812 MACROBLOCKD *const xd = &x->e_mbd;
1813 int is_cost_valid = 1;
Jingning Han1e48f742015-10-13 11:59:49 -07001814 int64_t this_rd = 0;
Jingning Han2cdc1272015-10-09 09:57:42 -07001815
1816 if (ref_best_rd < 0)
1817 is_cost_valid = 0;
1818
1819 *rate = 0;
1820 *distortion = 0;
1821 *sse = 0;
1822 *skippable = 1;
1823
Jingning Hand3e55452015-10-22 12:06:32 -07001824#if CONFIG_EXT_TX
1825 xd->mi[0]->mbmi.tx_type = DCT_DCT;
1826#endif
1827
Jingning Han2cdc1272015-10-09 09:57:42 -07001828 if (is_cost_valid) {
1829 const struct macroblockd_plane *const pd = &xd->plane[0];
1830 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1831 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
1832 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
1833 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
1834 int bh = num_4x4_blocks_wide_lookup[txb_size];
1835 int idx, idy;
1836 int block = 0;
1837 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
1838 ENTROPY_CONTEXT ctxa[16], ctxl[16];
1839
1840 int pnrate = 0, pnskip = 1;
1841 int64_t pndist = 0, pnsse = 0;
1842
1843 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
1844
1845 for (idy = 0; idy < mi_height; idy += bh) {
1846 for (idx = 0; idx < mi_width; idx += bh) {
1847 select_tx_block(cpi, x, idy, idx, 0, block,
Jingning Han3a279612015-10-12 19:20:58 -07001848 max_txsize_lookup[plane_bsize], plane_bsize,
Jingning Han1e48f742015-10-13 11:59:49 -07001849 ctxa, ctxl, &pnrate, &pndist, &pnsse, &pnskip,
1850 ref_best_rd - this_rd, &is_cost_valid);
Jingning Han2cdc1272015-10-09 09:57:42 -07001851 *rate += pnrate;
1852 *distortion += pndist;
1853 *sse += pnsse;
1854 *skippable &= pnskip;
Jingning Han1e48f742015-10-13 11:59:49 -07001855 this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
1856 RDCOST(x->rdmult, x->rddiv, 0, pnsse));
Jingning Han2cdc1272015-10-09 09:57:42 -07001857 block += step;
1858 }
1859 }
1860 }
1861
1862 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
1863 RDCOST(x->rdmult, x->rddiv, 0, *sse));
1864 if (this_rd > ref_best_rd)
1865 is_cost_valid = 0;
1866
1867 if (!is_cost_valid) {
1868 // reset cost value
1869 *rate = INT_MAX;
1870 *distortion = INT64_MAX;
1871 *sse = INT64_MAX;
1872 *skippable = 0;
1873 }
1874}
1875
Jingning Hana8dad552015-10-08 16:46:10 -07001876static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
1877 int blk_row, int blk_col, int plane, int block,
1878 TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
1879 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
1880 int *rate, int64_t *dist, int64_t *bsse, int *skip) {
1881 MACROBLOCKD *const xd = &x->e_mbd;
1882 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Han2cdc1272015-10-09 09:57:42 -07001883 struct macroblock_plane *const p = &x->plane[plane];
Jingning Hana8dad552015-10-08 16:46:10 -07001884 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han2cdc1272015-10-09 09:57:42 -07001885 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
Jingning Hana8dad552015-10-08 16:46:10 -07001886 int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
1887 (blk_col >> (1 - pd->subsampling_x));
1888 TX_SIZE plane_tx_size = plane ?
Jingning Han2cdc1272015-10-09 09:57:42 -07001889 get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize,
1890 0, 0) :
Jingning Hana8dad552015-10-08 16:46:10 -07001891 mbmi->inter_tx_size[tx_idx];
1892
1893 int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
1894 int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
1895
1896 if (xd->mb_to_bottom_edge < 0)
1897 max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
1898 if (xd->mb_to_right_edge < 0)
1899 max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
1900
1901 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
1902 return;
1903
1904 if (tx_size == plane_tx_size) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001905 int coeff_ctx, i;
Jingning Hana8dad552015-10-08 16:46:10 -07001906 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
Jingning Han2cdc1272015-10-09 09:57:42 -07001907 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Jingning Hana8dad552015-10-08 16:46:10 -07001908 switch (tx_size) {
1909 case TX_4X4:
1910 break;
1911 case TX_8X8:
1912 ta[0] = !!*(const uint16_t *)&ta[0];
1913 tl[0] = !!*(const uint16_t *)&tl[0];
1914 break;
1915 case TX_16X16:
1916 ta[0] = !!*(const uint32_t *)&ta[0];
1917 tl[0] = !!*(const uint32_t *)&tl[0];
1918 break;
1919 case TX_32X32:
1920 ta[0] = !!*(const uint64_t *)&ta[0];
1921 tl[0] = !!*(const uint64_t *)&tl[0];
1922 break;
1923 default:
1924 assert(0 && "Invalid transform size.");
1925 break;
1926 }
Jingning Han2cdc1272015-10-09 09:57:42 -07001927 coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
Jingning Han71c15602015-10-13 12:40:39 -07001928 tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Jingning Han2cdc1272015-10-09 09:57:42 -07001929 plane_bsize, coeff_ctx, rate, dist, bsse, skip);
Jingning Hana8dad552015-10-08 16:46:10 -07001930 for (i = 0; i < (1 << tx_size); ++i) {
Jingning Han2cdc1272015-10-09 09:57:42 -07001931 ta[i] = !(p->eobs[block] == 0);
1932 tl[i] = !(p->eobs[block] == 0);
Jingning Hana8dad552015-10-08 16:46:10 -07001933 }
Jingning Hana8dad552015-10-08 16:46:10 -07001934 } else {
Jingning Hana8dad552015-10-08 16:46:10 -07001935 int bsl = b_width_log2_lookup[bsize];
1936 int step = 1 << (2 * (tx_size - 1));
1937 int i;
1938
1939 assert(bsl > 0);
1940 --bsl;
1941
1942 for (i = 0; i < 4; ++i) {
1943 int offsetr = (i >> 1) << bsl;
1944 int offsetc = (i & 0x01) << bsl;
1945 tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
1946 block + i * step, tx_size - 1, plane_bsize,
1947 above_ctx, left_ctx, rate, dist, bsse, skip);
1948 }
1949 }
1950}
1951
1952// Return value 0: early termination triggered, no valid rd cost available;
1953// 1: rd cost values are valid.
1954static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
1955 int *rate, int64_t *distortion, int *skippable,
1956 int64_t *sse, BLOCK_SIZE bsize,
1957 int64_t ref_best_rd) {
1958 MACROBLOCKD *const xd = &x->e_mbd;
1959 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1960 int plane;
1961 int is_cost_valid = 1;
1962 int64_t this_rd;
1963
1964 if (ref_best_rd < 0)
1965 is_cost_valid = 0;
1966
1967 if (is_inter_block(mbmi) && is_cost_valid) {
1968 int plane;
1969 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1970 vp10_subtract_plane(x, bsize, plane);
1971 }
1972
1973 *rate = 0;
1974 *distortion = 0;
1975 *sse = 0;
1976 *skippable = 1;
1977
1978 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1979 const struct macroblockd_plane *const pd = &xd->plane[plane];
1980 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1981 const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
1982 const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
1983 BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
1984 int bh = num_4x4_blocks_wide_lookup[txb_size];
1985 int idx, idy;
1986 int block = 0;
1987 int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
1988 int pnrate = 0, pnskip = 1;
1989 int64_t pndist = 0, pnsse = 0;
1990 ENTROPY_CONTEXT ta[16], tl[16];
1991
1992 vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
1993
1994 for (idy = 0; idy < mi_height; idy += bh) {
1995 for (idx = 0; idx < mi_width; idx += bh) {
1996 tx_block_rd(cpi, x, idy, idx, plane, block,
1997 max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
1998 &pnrate, &pndist, &pnsse, &pnskip);
1999 block += step;
2000 }
2001 }
2002
2003 if (pnrate == INT_MAX) {
2004 is_cost_valid = 0;
2005 break;
2006 }
2007
2008 *rate += pnrate;
2009 *distortion += pndist;
2010 *sse += pnsse;
2011 *skippable &= pnskip;
2012
2013 this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
2014 RDCOST(x->rdmult, x->rddiv, 0, *sse));
2015
2016 if (this_rd > ref_best_rd) {
2017 is_cost_valid = 0;
2018 break;
2019 }
2020 }
2021
2022 if (!is_cost_valid) {
2023 // reset cost value
2024 *rate = INT_MAX;
2025 *distortion = INT64_MAX;
2026 *sse = INT64_MAX;
2027 *skippable = 0;
2028 }
2029
2030 return is_cost_valid;
2031}
2032#endif
2033
Jingning Han3ee6db62015-08-05 19:00:31 -07002034// Return value 0: early termination triggered, no valid rd cost available;
2035// 1: rd cost values are valid.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002036static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002037 int *rate, int64_t *distortion, int *skippable,
2038 int64_t *sse, BLOCK_SIZE bsize,
2039 int64_t ref_best_rd) {
2040 MACROBLOCKD *const xd = &x->e_mbd;
2041 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2042 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
2043 int plane;
2044 int pnrate = 0, pnskip = 1;
2045 int64_t pndist = 0, pnsse = 0;
2046 int is_cost_valid = 1;
2047
2048 if (ref_best_rd < 0)
2049 is_cost_valid = 0;
2050
2051 if (is_inter_block(mbmi) && is_cost_valid) {
2052 int plane;
2053 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
2054 vp10_subtract_plane(x, bsize, plane);
2055 }
2056
2057 *rate = 0;
2058 *distortion = 0;
2059 *sse = 0;
2060 *skippable = 1;
2061
2062 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Jingning Han71c15602015-10-13 12:40:39 -07002063 txfm_rd_in_plane(x,
2064#if CONFIG_VAR_TX
2065 cpi,
2066#endif
2067 &pnrate, &pndist, &pnskip, &pnsse,
Jingning Han3ee6db62015-08-05 19:00:31 -07002068 ref_best_rd, plane, bsize, uv_tx_size,
2069 cpi->sf.use_fast_coef_costing);
2070 if (pnrate == INT_MAX) {
2071 is_cost_valid = 0;
2072 break;
2073 }
2074 *rate += pnrate;
2075 *distortion += pndist;
2076 *sse += pnsse;
2077 *skippable &= pnskip;
2078 }
2079
2080 if (!is_cost_valid) {
2081 // reset cost value
2082 *rate = INT_MAX;
2083 *distortion = INT64_MAX;
2084 *sse = INT64_MAX;
2085 *skippable = 0;
2086 }
2087
2088 return is_cost_valid;
2089}
2090
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002091static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002092 PICK_MODE_CONTEXT *ctx,
2093 int *rate, int *rate_tokenonly,
2094 int64_t *distortion, int *skippable,
2095 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
2096 MACROBLOCKD *xd = &x->e_mbd;
2097 PREDICTION_MODE mode;
2098 PREDICTION_MODE mode_selected = DC_PRED;
2099 int64_t best_rd = INT64_MAX, this_rd;
2100 int this_rate_tokenonly, this_rate, s;
2101 int64_t this_distortion, this_sse;
2102
2103 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su5d011cb2015-09-15 12:44:13 -07002104 xd->mi[0]->mbmi.palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07002105 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2106 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
2107 continue;
2108
2109 xd->mi[0]->mbmi.uv_mode = mode;
2110
2111 if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
2112 &this_distortion, &s, &this_sse, bsize, best_rd))
2113 continue;
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002114 this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mode];
Jingning Han3ee6db62015-08-05 19:00:31 -07002115 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
2116
2117 if (this_rd < best_rd) {
2118 mode_selected = mode;
2119 best_rd = this_rd;
2120 *rate = this_rate;
2121 *rate_tokenonly = this_rate_tokenonly;
2122 *distortion = this_distortion;
2123 *skippable = s;
2124 if (!x->select_tx_size)
2125 swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
2126 }
2127 }
2128
2129 xd->mi[0]->mbmi.uv_mode = mode_selected;
2130 return best_rd;
2131}
2132
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002133static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002134 int *rate, int *rate_tokenonly,
2135 int64_t *distortion, int *skippable,
2136 BLOCK_SIZE bsize) {
Jingning Han3ee6db62015-08-05 19:00:31 -07002137 int64_t unused;
2138
2139 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
2140 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
2141 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
2142 skippable, &unused, bsize, INT64_MAX);
Ronald S. Bultjed8f3bb12015-10-13 14:07:47 -04002143 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[DC_PRED];
Jingning Han3ee6db62015-08-05 19:00:31 -07002144 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
2145}
2146
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002147static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002148 PICK_MODE_CONTEXT *ctx,
2149 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
2150 int *rate_uv, int *rate_uv_tokenonly,
2151 int64_t *dist_uv, int *skip_uv,
2152 PREDICTION_MODE *mode_uv) {
2153 // Use an estimated rd for uv_intra based on DC_PRED if the
2154 // appropriate speed flag is set.
2155 if (cpi->sf.use_uv_intra_rd_estimate) {
2156 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
2157 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
2158 // Else do a proper rd search for each possible transform size that may
2159 // be considered in the main rd loop.
2160 } else {
2161 rd_pick_intra_sbuv_mode(cpi, x, ctx,
2162 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
2163 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
2164 }
2165 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
2166}
2167
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002168static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
Jingning Han3ee6db62015-08-05 19:00:31 -07002169 int mode_context) {
2170 assert(is_inter_mode(mode));
2171 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
2172}
2173
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002174static int set_and_cost_bmi_mvs(VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
Jingning Han3ee6db62015-08-05 19:00:31 -07002175 int i,
2176 PREDICTION_MODE mode, int_mv this_mv[2],
2177 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
2178 int_mv seg_mvs[MAX_REF_FRAMES],
2179 int_mv *best_ref_mv[2], const int *mvjcost,
2180 int *mvcost[2]) {
2181 MODE_INFO *const mic = xd->mi[0];
2182 const MB_MODE_INFO *const mbmi = &mic->mbmi;
2183 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2184 int thismvcost = 0;
2185 int idx, idy;
2186 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
2187 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
2188 const int is_compound = has_second_ref(mbmi);
2189
2190 switch (mode) {
2191 case NEWMV:
2192 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
2193 thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
2194 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2195 if (is_compound) {
2196 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
2197 thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
2198 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
2199 }
2200 break;
2201 case NEARMV:
2202 case NEARESTMV:
2203 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
2204 if (is_compound)
2205 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
2206 break;
2207 case ZEROMV:
2208 this_mv[0].as_int = 0;
2209 if (is_compound)
2210 this_mv[1].as_int = 0;
2211 break;
2212 default:
2213 break;
2214 }
2215
2216 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
2217 if (is_compound)
2218 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
2219
2220 mic->bmi[i].as_mode = mode;
2221
2222 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
2223 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
2224 memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
2225
2226 return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mbmi->ref_frame[0]]) +
2227 thismvcost;
2228}
2229
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002230static int64_t encode_inter_mb_segment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07002231 MACROBLOCK *x,
2232 int64_t best_yrd,
2233 int i,
2234 int *labelyrate,
2235 int64_t *distortion, int64_t *sse,
2236 ENTROPY_CONTEXT *ta,
2237 ENTROPY_CONTEXT *tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002238 int ir, int ic,
Jingning Han3ee6db62015-08-05 19:00:31 -07002239 int mi_row, int mi_col) {
2240 int k;
2241 MACROBLOCKD *xd = &x->e_mbd;
2242 struct macroblockd_plane *const pd = &xd->plane[0];
2243 struct macroblock_plane *const p = &x->plane[0];
2244 MODE_INFO *const mi = xd->mi[0];
2245 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
2246 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
2247 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
2248 int idx, idy;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002249 void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
Jingning Han3ee6db62015-08-05 19:00:31 -07002250
2251 const uint8_t *const src =
2252 &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
2253 uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2254 pd->dst.stride)];
2255 int64_t thisdistortion = 0, thissse = 0;
Yaowu Xu7c514e22015-09-28 15:55:46 -07002256 int thisrate = 0;
hui sub3cc3a02015-08-24 14:37:54 -07002257 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4);
Debargha Mukherjee9fc691e2015-09-03 02:58:12 -07002258 const scan_order *so = get_scan(TX_4X4, tx_type, 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07002259
Yaowu Xu7c514e22015-09-28 15:55:46 -07002260 vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
2261
Jingning Han3ee6db62015-08-05 19:00:31 -07002262#if CONFIG_VP9_HIGHBITDEPTH
2263 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002264 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_highbd_fwht4x4
2265 : vpx_highbd_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002266 } else {
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002267 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002268 }
2269#else
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04002270 fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? vp10_fwht4x4 : vpx_fdct4x4;
Jingning Han3ee6db62015-08-05 19:00:31 -07002271#endif // CONFIG_VP9_HIGHBITDEPTH
Jingning Han3ee6db62015-08-05 19:00:31 -07002272
2273#if CONFIG_VP9_HIGHBITDEPTH
2274 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2275 vpx_highbd_subtract_block(
2276 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2277 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
2278 } else {
2279 vpx_subtract_block(
2280 height, width, vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2281 8, src, p->src.stride, dst, pd->dst.stride);
2282 }
2283#else
2284 vpx_subtract_block(height, width,
2285 vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
2286 8, src, p->src.stride, dst, pd->dst.stride);
2287#endif // CONFIG_VP9_HIGHBITDEPTH
2288
2289 k = i;
2290 for (idy = 0; idy < height / 4; ++idy) {
2291 for (idx = 0; idx < width / 4; ++idx) {
2292 int64_t ssz, rd, rd1, rd2;
2293 tran_low_t* coeff;
Jingning Han2cdc1272015-10-09 09:57:42 -07002294#if CONFIG_VAR_TX
2295 int coeff_ctx;
2296#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002297 k += (idy * 2 + idx);
Jingning Han2cdc1272015-10-09 09:57:42 -07002298#if CONFIG_VAR_TX
2299 coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)),
2300 *(tl + (k >> 1)));
2301#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002302 coeff = BLOCK_OFFSET(p->coeff, k);
Yaowu Xu7c514e22015-09-28 15:55:46 -07002303 fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
2304 coeff, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002305 vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
2306#if CONFIG_VP9_HIGHBITDEPTH
2307 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2308 thisdistortion += vp10_highbd_block_error(coeff,
2309 BLOCK_OFFSET(pd->dqcoeff, k),
2310 16, &ssz, xd->bd);
2311 } else {
2312 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2313 16, &ssz);
2314 }
2315#else
2316 thisdistortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
2317 16, &ssz);
2318#endif // CONFIG_VP9_HIGHBITDEPTH
2319 thissse += ssz;
Jingning Han2cdc1272015-10-09 09:57:42 -07002320#if CONFIG_VAR_TX
2321 thisrate += cost_coeffs(x, 0, k, coeff_ctx,
2322 TX_4X4,
Jingning Han3ee6db62015-08-05 19:00:31 -07002323 so->scan, so->neighbors,
2324 cpi->sf.use_fast_coef_costing);
Jingning Han2cdc1272015-10-09 09:57:42 -07002325 *(ta + (k & 1)) = !(p->eobs[k] == 0);
2326 *(tl + (k >> 1)) = !(p->eobs[k] == 0);
2327#else
2328 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1),
2329 TX_4X4,
2330 so->scan, so->neighbors,
2331 cpi->sf.use_fast_coef_costing);
2332#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07002333 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
2334 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
James Zern5e16d392015-08-17 18:19:22 -07002335 rd = VPXMIN(rd1, rd2);
Jingning Han3ee6db62015-08-05 19:00:31 -07002336 if (rd >= best_yrd)
2337 return INT64_MAX;
2338 }
2339 }
2340
2341 *distortion = thisdistortion >> 2;
2342 *labelyrate = thisrate;
2343 *sse = thissse >> 2;
2344
2345 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
2346}
2347
2348typedef struct {
2349 int eobs;
2350 int brate;
2351 int byrate;
2352 int64_t bdist;
2353 int64_t bsse;
2354 int64_t brdcost;
2355 int_mv mvs[2];
2356 ENTROPY_CONTEXT ta[2];
2357 ENTROPY_CONTEXT tl[2];
2358} SEG_RDSTAT;
2359
2360typedef struct {
2361 int_mv *ref_mv[2];
2362 int_mv mvp;
2363
2364 int64_t segment_rd;
2365 int r;
2366 int64_t d;
2367 int64_t sse;
2368 int segment_yrate;
2369 PREDICTION_MODE modes[4];
2370 SEG_RDSTAT rdstat[4][INTER_MODES];
2371 int mvthresh;
2372} BEST_SEG_INFO;
2373
2374static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
2375 return (mv->row >> 3) < x->mv_row_min ||
2376 (mv->row >> 3) > x->mv_row_max ||
2377 (mv->col >> 3) < x->mv_col_min ||
2378 (mv->col >> 3) > x->mv_col_max;
2379}
2380
2381static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
2382 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
2383 struct macroblock_plane *const p = &x->plane[0];
2384 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
2385
2386 p->src.buf = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i,
2387 p->src.stride)];
2388 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
2389 pd->pre[0].buf = &pd->pre[0].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2390 pd->pre[0].stride)];
2391 if (has_second_ref(mbmi))
2392 pd->pre[1].buf = &pd->pre[1].buf[vp10_raster_block_offset(BLOCK_8X8, i,
2393 pd->pre[1].stride)];
2394}
2395
2396static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
2397 struct buf_2d orig_pre[2]) {
2398 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
2399 x->plane[0].src = orig_src;
2400 x->e_mbd.plane[0].pre[0] = orig_pre[0];
2401 if (has_second_ref(mbmi))
2402 x->e_mbd.plane[0].pre[1] = orig_pre[1];
2403}
2404
2405static INLINE int mv_has_subpel(const MV *mv) {
2406 return (mv->row & 0x0F) || (mv->col & 0x0F);
2407}
2408
2409// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
2410// TODO(aconverse): Find out if this is still productive then clean up or remove
2411static int check_best_zero_mv(
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002412 const VP10_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
Jingning Han3ee6db62015-08-05 19:00:31 -07002413 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
2414 const MV_REFERENCE_FRAME ref_frames[2]) {
2415 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2416 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
2417 (ref_frames[1] == NONE ||
2418 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
2419 int rfc = mode_context[ref_frames[0]];
2420 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2421 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2422 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2423
2424 if (this_mode == NEARMV) {
2425 if (c1 > c3) return 0;
2426 } else if (this_mode == NEARESTMV) {
2427 if (c2 > c3) return 0;
2428 } else {
2429 assert(this_mode == ZEROMV);
2430 if (ref_frames[1] == NONE) {
2431 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
2432 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
2433 return 0;
2434 } else {
2435 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
2436 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
2437 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
2438 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
2439 return 0;
2440 }
2441 }
2442 }
2443 return 1;
2444}
2445
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002446static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002447 BLOCK_SIZE bsize,
2448 int_mv *frame_mv,
2449 int mi_row, int mi_col,
2450 int_mv single_newmv[MAX_REF_FRAMES],
2451 int *rate_mv) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07002452 const VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07002453 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2454 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2455 MACROBLOCKD *xd = &x->e_mbd;
2456 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2457 const int refs[2] = {mbmi->ref_frame[0],
2458 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
2459 int_mv ref_mv[2];
2460 int ite, ref;
2461 const InterpKernel *kernel = vp10_filter_kernels[mbmi->interp_filter];
2462 struct scale_factors sf;
2463
2464 // Do joint motion search in compound mode to get more accurate mv.
2465 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2466 int last_besterr[2] = {INT_MAX, INT_MAX};
2467 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2468 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2469 vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2470 };
2471
2472 // Prediction buffer from second frame.
2473#if CONFIG_VP9_HIGHBITDEPTH
2474 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
2475 uint8_t *second_pred;
2476#else
2477 DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
2478#endif // CONFIG_VP9_HIGHBITDEPTH
2479
2480 for (ref = 0; ref < 2; ++ref) {
2481 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
2482
2483 if (scaled_ref_frame[ref]) {
2484 int i;
2485 // Swap out the reference frame for a version that's been scaled to
2486 // match the resolution of the current frame, allowing the existing
2487 // motion search code to be used without additional modifications.
2488 for (i = 0; i < MAX_MB_PLANE; i++)
2489 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2490 vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2491 NULL);
2492 }
2493
2494 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2495 }
2496
2497 // Since we have scaled the reference frames to match the size of the current
2498 // frame we must use a unit scaling factor during mode selection.
2499#if CONFIG_VP9_HIGHBITDEPTH
2500 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
2501 cm->width, cm->height,
2502 cm->use_highbitdepth);
2503#else
2504 vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height,
2505 cm->width, cm->height);
2506#endif // CONFIG_VP9_HIGHBITDEPTH
2507
2508 // Allow joint search multiple times iteratively for each reference frame
2509 // and break out of the search loop if it couldn't find a better mv.
2510 for (ite = 0; ite < 4; ite++) {
2511 struct buf_2d ref_yv12[2];
2512 int bestsme = INT_MAX;
2513 int sadpb = x->sadperbit16;
2514 MV tmp_mv;
2515 int search_range = 3;
2516
2517 int tmp_col_min = x->mv_col_min;
2518 int tmp_col_max = x->mv_col_max;
2519 int tmp_row_min = x->mv_row_min;
2520 int tmp_row_max = x->mv_row_max;
2521 int id = ite % 2; // Even iterations search in the first reference frame,
2522 // odd iterations search in the second. The predictor
2523 // found for the 'other' reference frame is factored in.
2524
2525 // Initialized here because of compiler problem in Visual Studio.
2526 ref_yv12[0] = xd->plane[0].pre[0];
2527 ref_yv12[1] = xd->plane[0].pre[1];
2528
2529 // Get the prediction block from the 'other' reference frame.
2530#if CONFIG_VP9_HIGHBITDEPTH
2531 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2532 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
2533 vp10_highbd_build_inter_predictor(ref_yv12[!id].buf,
2534 ref_yv12[!id].stride,
2535 second_pred, pw,
2536 &frame_mv[refs[!id]].as_mv,
2537 &sf, pw, ph, 0,
2538 kernel, MV_PRECISION_Q3,
2539 mi_col * MI_SIZE, mi_row * MI_SIZE,
2540 xd->bd);
2541 } else {
2542 second_pred = (uint8_t *)second_pred_alloc_16;
2543 vp10_build_inter_predictor(ref_yv12[!id].buf,
2544 ref_yv12[!id].stride,
2545 second_pred, pw,
2546 &frame_mv[refs[!id]].as_mv,
2547 &sf, pw, ph, 0,
2548 kernel, MV_PRECISION_Q3,
2549 mi_col * MI_SIZE, mi_row * MI_SIZE);
2550 }
2551#else
2552 vp10_build_inter_predictor(ref_yv12[!id].buf,
2553 ref_yv12[!id].stride,
2554 second_pred, pw,
2555 &frame_mv[refs[!id]].as_mv,
2556 &sf, pw, ph, 0,
2557 kernel, MV_PRECISION_Q3,
2558 mi_col * MI_SIZE, mi_row * MI_SIZE);
2559#endif // CONFIG_VP9_HIGHBITDEPTH
2560
2561 // Do compound motion search on the current reference frame.
2562 if (id)
2563 xd->plane[0].pre[0] = ref_yv12[id];
2564 vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
2565
2566 // Use the mv result from the single mode as mv predictor.
2567 tmp_mv = frame_mv[refs[id]].as_mv;
2568
2569 tmp_mv.col >>= 3;
2570 tmp_mv.row >>= 3;
2571
2572 // Small-range full-pixel motion search.
2573 bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
2574 search_range,
2575 &cpi->fn_ptr[bsize],
2576 &ref_mv[id].as_mv, second_pred);
2577 if (bestsme < INT_MAX)
2578 bestsme = vp10_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2579 second_pred, &cpi->fn_ptr[bsize], 1);
2580
2581 x->mv_col_min = tmp_col_min;
2582 x->mv_col_max = tmp_col_max;
2583 x->mv_row_min = tmp_row_min;
2584 x->mv_row_max = tmp_row_max;
2585
2586 if (bestsme < INT_MAX) {
2587 int dis; /* TODO: use dis in distortion calculation later. */
2588 unsigned int sse;
2589 bestsme = cpi->find_fractional_mv_step(
2590 x, &tmp_mv,
2591 &ref_mv[id].as_mv,
2592 cpi->common.allow_high_precision_mv,
2593 x->errorperbit,
2594 &cpi->fn_ptr[bsize],
2595 0, cpi->sf.mv.subpel_iters_per_step,
2596 NULL,
2597 x->nmvjointcost, x->mvcost,
2598 &dis, &sse, second_pred,
2599 pw, ph);
2600 }
2601
2602 // Restore the pointer to the first (possibly scaled) prediction buffer.
2603 if (id)
2604 xd->plane[0].pre[0] = ref_yv12[0];
2605
2606 if (bestsme < last_besterr[id]) {
2607 frame_mv[refs[id]].as_mv = tmp_mv;
2608 last_besterr[id] = bestsme;
2609 } else {
2610 break;
2611 }
2612 }
2613
2614 *rate_mv = 0;
2615
2616 for (ref = 0; ref < 2; ++ref) {
2617 if (scaled_ref_frame[ref]) {
2618 // Restore the prediction frame pointers to their unscaled versions.
2619 int i;
2620 for (i = 0; i < MAX_MB_PLANE; i++)
2621 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2622 }
2623
2624 *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2625 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
2626 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2627 }
2628}
2629
Yaowu Xu26a9afc2015-08-13 09:42:27 -07002630static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07002631 int_mv *best_ref_mv,
2632 int_mv *second_best_ref_mv,
2633 int64_t best_rd, int *returntotrate,
2634 int *returnyrate,
2635 int64_t *returndistortion,
2636 int *skippable, int64_t *psse,
2637 int mvthresh,
2638 int_mv seg_mvs[4][MAX_REF_FRAMES],
2639 BEST_SEG_INFO *bsi_buf, int filter_idx,
2640 int mi_row, int mi_col) {
2641 int i;
2642 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2643 MACROBLOCKD *xd = &x->e_mbd;
2644 MODE_INFO *mi = xd->mi[0];
2645 MB_MODE_INFO *mbmi = &mi->mbmi;
2646 int mode_idx;
2647 int k, br = 0, idx, idy;
2648 int64_t bd = 0, block_sse = 0;
2649 PREDICTION_MODE this_mode;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07002650 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07002651 struct macroblock_plane *const p = &x->plane[0];
2652 struct macroblockd_plane *const pd = &xd->plane[0];
2653 const int label_count = 4;
2654 int64_t this_segment_rd = 0;
2655 int label_mv_thresh;
2656 int segmentyrate = 0;
2657 const BLOCK_SIZE bsize = mbmi->sb_type;
2658 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2659 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2660 ENTROPY_CONTEXT t_above[2], t_left[2];
2661 int subpelmv = 1, have_ref = 0;
2662 const int has_second_rf = has_second_ref(mbmi);
2663 const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
2664 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2665
2666 vp10_zero(*bsi);
2667
2668 bsi->segment_rd = best_rd;
2669 bsi->ref_mv[0] = best_ref_mv;
2670 bsi->ref_mv[1] = second_best_ref_mv;
2671 bsi->mvp.as_int = best_ref_mv->as_int;
2672 bsi->mvthresh = mvthresh;
2673
2674 for (i = 0; i < 4; i++)
2675 bsi->modes[i] = ZEROMV;
2676
2677 memcpy(t_above, pd->above_context, sizeof(t_above));
2678 memcpy(t_left, pd->left_context, sizeof(t_left));
2679
2680 // 64 makes this threshold really big effectively
2681 // making it so that we very rarely check mvs on
2682 // segments. setting this to 1 would make mv thresh
2683 // roughly equal to what it is for macroblocks
2684 label_mv_thresh = 1 * bsi->mvthresh / label_count;
2685
2686 // Segmentation method overheads
2687 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
2688 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
2689 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
2690 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
2691 int_mv mode_mv[MB_MODE_COUNT][2];
2692 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2693 PREDICTION_MODE mode_selected = ZEROMV;
2694 int64_t best_rd = INT64_MAX;
2695 const int i = idy * 2 + idx;
2696 int ref;
2697
2698 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2699 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
2700 frame_mv[ZEROMV][frame].as_int = 0;
2701 vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
2702 &frame_mv[NEARESTMV][frame],
2703 &frame_mv[NEARMV][frame],
2704 mbmi_ext->mode_context);
2705 }
2706
2707 // search for the best motion vector on this segment
2708 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
2709 const struct buf_2d orig_src = x->plane[0].src;
2710 struct buf_2d orig_pre[2];
2711
2712 mode_idx = INTER_OFFSET(this_mode);
2713 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
2714 if (!(inter_mode_mask & (1 << this_mode)))
2715 continue;
2716
2717 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
2718 this_mode, mbmi->ref_frame))
2719 continue;
2720
2721 memcpy(orig_pre, pd->pre, sizeof(orig_pre));
2722 memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
2723 sizeof(bsi->rdstat[i][mode_idx].ta));
2724 memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
2725 sizeof(bsi->rdstat[i][mode_idx].tl));
2726
2727 // motion search for newmv (single predictor case only)
2728 if (!has_second_rf && this_mode == NEWMV &&
2729 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
2730 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
2731 int step_param = 0;
2732 int thissme, bestsme = INT_MAX;
2733 int sadpb = x->sadperbit4;
2734 MV mvp_full;
2735 int max_mv;
2736 int cost_list[5];
2737
2738 /* Is the best so far sufficiently good that we cant justify doing
2739 * and new motion search. */
2740 if (best_rd < label_mv_thresh)
2741 break;
2742
2743 if (cpi->oxcf.mode != BEST) {
2744 // use previous block's result as next block's MV predictor.
2745 if (i > 0) {
2746 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
2747 if (i == 2)
2748 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
2749 }
2750 }
2751 if (i == 0)
2752 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
2753 else
James Zern5e16d392015-08-17 18:19:22 -07002754 max_mv =
2755 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
Jingning Han3ee6db62015-08-05 19:00:31 -07002756
2757 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2758 // Take wtd average of the step_params based on the last frame's
2759 // max mv magnitude and the best ref mvs of the current block for
2760 // the given reference.
2761 step_param = (vp10_init_search_range(max_mv) +
2762 cpi->mv_step_param) / 2;
2763 } else {
2764 step_param = cpi->mv_step_param;
2765 }
2766
2767 mvp_full.row = bsi->mvp.as_mv.row >> 3;
2768 mvp_full.col = bsi->mvp.as_mv.col >> 3;
2769
2770 if (cpi->sf.adaptive_motion_search) {
2771 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
2772 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
James Zern5e16d392015-08-17 18:19:22 -07002773 step_param = VPXMAX(step_param, 8);
Jingning Han3ee6db62015-08-05 19:00:31 -07002774 }
2775
2776 // adjust src pointer for this block
2777 mi_buf_shift(x, i);
2778
2779 vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
2780
2781 bestsme = vp10_full_pixel_search(
2782 cpi, x, bsize, &mvp_full, step_param, sadpb,
2783 cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2784 &bsi->ref_mv[0]->as_mv, new_mv,
2785 INT_MAX, 1);
2786
2787 // Should we do a full search (best quality only)
2788 if (cpi->oxcf.mode == BEST) {
2789 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
2790 /* Check if mvp_full is within the range. */
2791 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
2792 x->mv_row_min, x->mv_row_max);
2793 thissme = cpi->full_search_sad(x, &mvp_full,
2794 sadpb, 16, &cpi->fn_ptr[bsize],
2795 &bsi->ref_mv[0]->as_mv,
2796 &best_mv->as_mv);
2797 cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
2798 if (thissme < bestsme) {
2799 bestsme = thissme;
2800 *new_mv = best_mv->as_mv;
2801 } else {
2802 // The full search result is actually worse so re-instate the
2803 // previous best vector
2804 best_mv->as_mv = *new_mv;
2805 }
2806 }
2807
2808 if (bestsme < INT_MAX) {
2809 int distortion;
2810 cpi->find_fractional_mv_step(
2811 x,
2812 new_mv,
2813 &bsi->ref_mv[0]->as_mv,
2814 cm->allow_high_precision_mv,
2815 x->errorperbit, &cpi->fn_ptr[bsize],
2816 cpi->sf.mv.subpel_force_stop,
2817 cpi->sf.mv.subpel_iters_per_step,
2818 cond_cost_list(cpi, cost_list),
2819 x->nmvjointcost, x->mvcost,
2820 &distortion,
2821 &x->pred_sse[mbmi->ref_frame[0]],
2822 NULL, 0, 0);
2823
2824 // save motion search result for use in compound prediction
2825 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
2826 }
2827
2828 if (cpi->sf.adaptive_motion_search)
2829 x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
2830
2831 // restore src pointers
2832 mi_buf_restore(x, orig_src, orig_pre);
2833 }
2834
2835 if (has_second_rf) {
2836 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
2837 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
2838 continue;
2839 }
2840
2841 if (has_second_rf && this_mode == NEWMV &&
2842 mbmi->interp_filter == EIGHTTAP) {
2843 // adjust src pointers
2844 mi_buf_shift(x, i);
2845 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2846 int rate_mv;
2847 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
2848 mi_row, mi_col, seg_mvs[i],
2849 &rate_mv);
2850 seg_mvs[i][mbmi->ref_frame[0]].as_int =
2851 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
2852 seg_mvs[i][mbmi->ref_frame[1]].as_int =
2853 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
2854 }
2855 // restore src pointers
2856 mi_buf_restore(x, orig_src, orig_pre);
2857 }
2858
2859 bsi->rdstat[i][mode_idx].brate =
2860 set_and_cost_bmi_mvs(cpi, x, xd, i, this_mode, mode_mv[this_mode],
2861 frame_mv, seg_mvs[i], bsi->ref_mv,
2862 x->nmvjointcost, x->mvcost);
2863
2864 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2865 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
2866 mode_mv[this_mode][ref].as_int;
2867 if (num_4x4_blocks_wide > 1)
2868 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
2869 mode_mv[this_mode][ref].as_int;
2870 if (num_4x4_blocks_high > 1)
2871 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
2872 mode_mv[this_mode][ref].as_int;
2873 }
2874
2875 // Trap vectors that reach beyond the UMV borders
2876 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
2877 (has_second_rf &&
2878 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
2879 continue;
2880
2881 if (filter_idx > 0) {
2882 BEST_SEG_INFO *ref_bsi = bsi_buf;
2883 subpelmv = 0;
2884 have_ref = 1;
2885
2886 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2887 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2888 have_ref &= mode_mv[this_mode][ref].as_int ==
2889 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2890 }
2891
2892 if (filter_idx > 1 && !subpelmv && !have_ref) {
2893 ref_bsi = bsi_buf + 1;
2894 have_ref = 1;
2895 for (ref = 0; ref < 1 + has_second_rf; ++ref)
2896 have_ref &= mode_mv[this_mode][ref].as_int ==
2897 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2898 }
2899
2900 if (!subpelmv && have_ref &&
2901 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2902 memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2903 sizeof(SEG_RDSTAT));
2904 if (num_4x4_blocks_wide > 1)
2905 bsi->rdstat[i + 1][mode_idx].eobs =
2906 ref_bsi->rdstat[i + 1][mode_idx].eobs;
2907 if (num_4x4_blocks_high > 1)
2908 bsi->rdstat[i + 2][mode_idx].eobs =
2909 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2910
2911 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2912 mode_selected = this_mode;
2913 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2914 }
2915 continue;
2916 }
2917 }
2918
2919 bsi->rdstat[i][mode_idx].brdcost =
2920 encode_inter_mb_segment(cpi, x,
2921 bsi->segment_rd - this_segment_rd, i,
2922 &bsi->rdstat[i][mode_idx].byrate,
2923 &bsi->rdstat[i][mode_idx].bdist,
2924 &bsi->rdstat[i][mode_idx].bsse,
2925 bsi->rdstat[i][mode_idx].ta,
2926 bsi->rdstat[i][mode_idx].tl,
Yaowu Xu7c514e22015-09-28 15:55:46 -07002927 idy, idx,
Jingning Han3ee6db62015-08-05 19:00:31 -07002928 mi_row, mi_col);
2929 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2930 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2931 bsi->rdstat[i][mode_idx].brate, 0);
2932 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2933 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2934 if (num_4x4_blocks_wide > 1)
2935 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2936 if (num_4x4_blocks_high > 1)
2937 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2938 }
2939
2940 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2941 mode_selected = this_mode;
2942 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2943 }
2944 } /*for each 4x4 mode*/
2945
2946 if (best_rd == INT64_MAX) {
2947 int iy, midx;
2948 for (iy = i + 1; iy < 4; ++iy)
2949 for (midx = 0; midx < INTER_MODES; ++midx)
2950 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2951 bsi->segment_rd = INT64_MAX;
2952 return INT64_MAX;
2953 }
2954
2955 mode_idx = INTER_OFFSET(mode_selected);
2956 memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2957 memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2958
2959 set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
2960 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2961 x->mvcost);
2962
2963 br += bsi->rdstat[i][mode_idx].brate;
2964 bd += bsi->rdstat[i][mode_idx].bdist;
2965 block_sse += bsi->rdstat[i][mode_idx].bsse;
2966 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2967 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2968
2969 if (this_segment_rd > bsi->segment_rd) {
2970 int iy, midx;
2971 for (iy = i + 1; iy < 4; ++iy)
2972 for (midx = 0; midx < INTER_MODES; ++midx)
2973 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2974 bsi->segment_rd = INT64_MAX;
2975 return INT64_MAX;
2976 }
2977 }
2978 } /* for each label */
2979
2980 bsi->r = br;
2981 bsi->d = bd;
2982 bsi->segment_yrate = segmentyrate;
2983 bsi->segment_rd = this_segment_rd;
2984 bsi->sse = block_sse;
2985
2986 // update the coding decisions
2987 for (k = 0; k < 4; ++k)
2988 bsi->modes[k] = mi->bmi[k].as_mode;
2989
2990 if (bsi->segment_rd > best_rd)
2991 return INT64_MAX;
2992 /* set it to the best */
2993 for (i = 0; i < 4; i++) {
2994 mode_idx = INTER_OFFSET(bsi->modes[i]);
2995 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2996 if (has_second_ref(mbmi))
2997 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2998 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2999 mi->bmi[i].as_mode = bsi->modes[i];
3000 }
3001
3002 /*
3003 * used to set mbmi->mv.as_int
3004 */
3005 *returntotrate = bsi->r;
3006 *returndistortion = bsi->d;
3007 *returnyrate = bsi->segment_yrate;
3008 *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
3009 *psse = bsi->sse;
3010 mbmi->mode = bsi->modes[3];
3011
3012 return bsi->segment_rd;
3013}
3014
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003015static void estimate_ref_frame_costs(const VP10_COMMON *cm,
Jingning Han3ee6db62015-08-05 19:00:31 -07003016 const MACROBLOCKD *xd,
3017 int segment_id,
3018 unsigned int *ref_costs_single,
3019 unsigned int *ref_costs_comp,
3020 vpx_prob *comp_mode_p) {
3021 int seg_ref_active = segfeature_active(&cm->seg, segment_id,
3022 SEG_LVL_REF_FRAME);
3023 if (seg_ref_active) {
3024 memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
3025 memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
3026 *comp_mode_p = 128;
3027 } else {
3028 vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
3029 vpx_prob comp_inter_p = 128;
3030
3031 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3032 comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
3033 *comp_mode_p = comp_inter_p;
3034 } else {
3035 *comp_mode_p = 128;
3036 }
3037
3038 ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
3039
3040 if (cm->reference_mode != COMPOUND_REFERENCE) {
3041 vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
3042 vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
3043 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3044
3045 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3046 base_cost += vp10_cost_bit(comp_inter_p, 0);
3047
3048 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
3049 ref_costs_single[ALTREF_FRAME] = base_cost;
3050 ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
3051 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3052 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
3053 ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
3054 ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
3055 } else {
3056 ref_costs_single[LAST_FRAME] = 512;
3057 ref_costs_single[GOLDEN_FRAME] = 512;
3058 ref_costs_single[ALTREF_FRAME] = 512;
3059 }
3060 if (cm->reference_mode != SINGLE_REFERENCE) {
3061 vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
3062 unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
3063
3064 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3065 base_cost += vp10_cost_bit(comp_inter_p, 1);
3066
3067 ref_costs_comp[LAST_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 0);
3068 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp10_cost_bit(ref_comp_p, 1);
3069 } else {
3070 ref_costs_comp[LAST_FRAME] = 512;
3071 ref_costs_comp[GOLDEN_FRAME] = 512;
3072 }
3073 }
3074}
3075
3076static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3077 int mode_index,
3078 int64_t comp_pred_diff[REFERENCE_MODES],
3079 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
3080 int skippable) {
3081 MACROBLOCKD *const xd = &x->e_mbd;
3082
3083 // Take a snapshot of the coding context so it can be
3084 // restored if we decide to encode this way
3085 ctx->skip = x->skip;
3086 ctx->skippable = skippable;
3087 ctx->best_mode_index = mode_index;
3088 ctx->mic = *xd->mi[0];
3089 ctx->mbmi_ext = *x->mbmi_ext;
3090 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
3091 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
3092 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
3093
3094 memcpy(ctx->best_filter_diff, best_filter_diff,
3095 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
3096}
3097
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003098static void setup_buffer_inter(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003099 MV_REFERENCE_FRAME ref_frame,
3100 BLOCK_SIZE block_size,
3101 int mi_row, int mi_col,
3102 int_mv frame_nearest_mv[MAX_REF_FRAMES],
3103 int_mv frame_near_mv[MAX_REF_FRAMES],
3104 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003105 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003106 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
3107 MACROBLOCKD *const xd = &x->e_mbd;
3108 MODE_INFO *const mi = xd->mi[0];
3109 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
3110 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
3111 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3112
3113 assert(yv12 != NULL);
3114
3115 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
3116 // use the UV scaling factors.
3117 vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
3118
3119 // Gets an initial list of candidate vectors from neighbours and orders them
3120 vp10_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
3121 NULL, NULL, mbmi_ext->mode_context);
3122
3123 // Candidate refinement carried out at encoder and decoder
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003124 vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
3125 &frame_nearest_mv[ref_frame],
3126 &frame_near_mv[ref_frame]);
Jingning Han3ee6db62015-08-05 19:00:31 -07003127
3128 // Further refinement that is encode side only to test the top few candidates
3129 // in full and choose the best as the centre point for subsequent searches.
3130 // The current implementation doesn't support scaling.
3131 if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
3132 vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
3133 ref_frame, block_size);
3134}
3135
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003136static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003137 BLOCK_SIZE bsize,
3138 int mi_row, int mi_col,
3139 int_mv *tmp_mv, int *rate_mv) {
3140 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003141 const VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003142 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3143 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
3144 int bestsme = INT_MAX;
3145 int step_param;
3146 int sadpb = x->sadperbit16;
3147 MV mvp_full;
3148 int ref = mbmi->ref_frame[0];
3149 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3150
3151 int tmp_col_min = x->mv_col_min;
3152 int tmp_col_max = x->mv_col_max;
3153 int tmp_row_min = x->mv_row_min;
3154 int tmp_row_max = x->mv_row_max;
3155 int cost_list[5];
3156
3157 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
3158 ref);
3159
3160 MV pred_mv[3];
3161 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
3162 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
3163 pred_mv[2] = x->pred_mv[ref];
3164
3165 if (scaled_ref_frame) {
3166 int i;
3167 // Swap out the reference frame for a version that's been scaled to
3168 // match the resolution of the current frame, allowing the existing
3169 // motion search code to be used without additional modifications.
3170 for (i = 0; i < MAX_MB_PLANE; i++)
3171 backup_yv12[i] = xd->plane[i].pre[0];
3172
3173 vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
3174 }
3175
3176 vp10_set_mv_search_range(x, &ref_mv);
3177
3178 // Work out the size of the first step in the mv step search.
James Zern5e16d392015-08-17 18:19:22 -07003179 // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
Jingning Han3ee6db62015-08-05 19:00:31 -07003180 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
3181 // Take wtd average of the step_params based on the last frame's
3182 // max mv magnitude and that based on the best ref mvs of the current
3183 // block for the given reference.
3184 step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
3185 cpi->mv_step_param) / 2;
3186 } else {
3187 step_param = cpi->mv_step_param;
3188 }
3189
3190 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
James Zern5e16d392015-08-17 18:19:22 -07003191 int boffset =
3192 2 * (b_width_log2_lookup[BLOCK_64X64] -
3193 VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
3194 step_param = VPXMAX(step_param, boffset);
Jingning Han3ee6db62015-08-05 19:00:31 -07003195 }
3196
3197 if (cpi->sf.adaptive_motion_search) {
3198 int bwl = b_width_log2_lookup[bsize];
3199 int bhl = b_height_log2_lookup[bsize];
3200 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
3201
3202 if (tlevel < 5)
3203 step_param += 2;
3204
3205 // prev_mv_sad is not setup for dynamically scaled frames.
3206 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
3207 int i;
3208 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
3209 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
3210 x->pred_mv[ref].row = 0;
3211 x->pred_mv[ref].col = 0;
3212 tmp_mv->as_int = INVALID_MV;
3213
3214 if (scaled_ref_frame) {
3215 int i;
3216 for (i = 0; i < MAX_MB_PLANE; ++i)
3217 xd->plane[i].pre[0] = backup_yv12[i];
3218 }
3219 return;
3220 }
3221 }
3222 }
3223 }
3224
3225 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
3226
3227 mvp_full.col >>= 3;
3228 mvp_full.row >>= 3;
3229
3230 bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
3231 cond_cost_list(cpi, cost_list),
3232 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
3233
3234 x->mv_col_min = tmp_col_min;
3235 x->mv_col_max = tmp_col_max;
3236 x->mv_row_min = tmp_row_min;
3237 x->mv_row_max = tmp_row_max;
3238
3239 if (bestsme < INT_MAX) {
3240 int dis; /* TODO: use dis in distortion calculation later. */
3241 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
3242 cm->allow_high_precision_mv,
3243 x->errorperbit,
3244 &cpi->fn_ptr[bsize],
3245 cpi->sf.mv.subpel_force_stop,
3246 cpi->sf.mv.subpel_iters_per_step,
3247 cond_cost_list(cpi, cost_list),
3248 x->nmvjointcost, x->mvcost,
3249 &dis, &x->pred_sse[ref], NULL, 0, 0);
3250 }
3251 *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
3252 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3253
3254 if (cpi->sf.adaptive_motion_search)
3255 x->pred_mv[ref] = tmp_mv->as_mv;
3256
3257 if (scaled_ref_frame) {
3258 int i;
3259 for (i = 0; i < MAX_MB_PLANE; i++)
3260 xd->plane[i].pre[0] = backup_yv12[i];
3261 }
3262}
3263
3264
3265
3266static INLINE void restore_dst_buf(MACROBLOCKD *xd,
3267 uint8_t *orig_dst[MAX_MB_PLANE],
3268 int orig_dst_stride[MAX_MB_PLANE]) {
3269 int i;
3270 for (i = 0; i < MAX_MB_PLANE; i++) {
3271 xd->plane[i].dst.buf = orig_dst[i];
3272 xd->plane[i].dst.stride = orig_dst_stride[i];
3273 }
3274}
3275
3276// In some situations we want to discount tha pparent cost of a new motion
3277// vector. Where there is a subtle motion field and especially where there is
3278// low spatial complexity then it can be hard to cover the cost of a new motion
3279// vector in a single block, even if that motion vector reduces distortion.
3280// However, once established that vector may be usable through the nearest and
3281// near mv modes to reduce distortion in subsequent blocks and also improve
3282// visual quality.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003283static int discount_newmv_test(const VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003284 int this_mode,
3285 int_mv this_mv,
3286 int_mv (*mode_mv)[MAX_REF_FRAMES],
3287 int ref_frame) {
3288 return (!cpi->rc.is_src_frame_alt_ref &&
3289 (this_mode == NEWMV) &&
3290 (this_mv.as_int != 0) &&
3291 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
3292 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
3293 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
3294 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
3295}
3296
Ronald S. Bultje5b4805d2015-10-02 11:51:54 -04003297#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3)
3298#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
3299 VP9_INTERP_EXTEND) << 3)
3300
3301// TODO(jingning): this mv clamping function should be block size dependent.
3302static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
3303 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
3304 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
3305 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
3306 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
3307}
3308
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003309static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003310 BLOCK_SIZE bsize,
3311 int *rate2, int64_t *distortion,
3312 int *skippable,
3313 int *rate_y, int *rate_uv,
3314 int *disable_skip,
3315 int_mv (*mode_mv)[MAX_REF_FRAMES],
3316 int mi_row, int mi_col,
3317 int_mv single_newmv[MAX_REF_FRAMES],
3318 INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
3319 int (*single_skippable)[MAX_REF_FRAMES],
3320 int64_t *psse,
3321 const int64_t ref_best_rd,
3322 int64_t *mask_filter,
3323 int64_t filter_cache[]) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003324 VP10_COMMON *cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003325 MACROBLOCKD *xd = &x->e_mbd;
3326 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
3327 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3328 const int is_comp_pred = has_second_ref(mbmi);
3329 const int this_mode = mbmi->mode;
3330 int_mv *frame_mv = mode_mv[this_mode];
3331 int i;
3332 int refs[2] = { mbmi->ref_frame[0],
3333 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
3334 int_mv cur_mv[2];
3335#if CONFIG_VP9_HIGHBITDEPTH
3336 DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
3337 uint8_t *tmp_buf;
3338#else
3339 DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
3340#endif // CONFIG_VP9_HIGHBITDEPTH
3341 int pred_exists = 0;
3342 int intpel_mv;
3343 int64_t rd, tmp_rd, best_rd = INT64_MAX;
3344 int best_needs_copy = 0;
3345 uint8_t *orig_dst[MAX_MB_PLANE];
3346 int orig_dst_stride[MAX_MB_PLANE];
3347 int rs = 0;
3348 INTERP_FILTER best_filter = SWITCHABLE;
3349 uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
3350 int64_t bsse[MAX_MB_PLANE << 2] = {0};
3351
3352 int bsl = mi_width_log2_lookup[bsize];
3353 int pred_filter_search = cpi->sf.cb_pred_filter_search ?
3354 (((mi_row + mi_col) >> bsl) +
3355 get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
3356
3357 int skip_txfm_sb = 0;
3358 int64_t skip_sse_sb = INT64_MAX;
3359 int64_t distortion_y = 0, distortion_uv = 0;
3360
3361#if CONFIG_VP9_HIGHBITDEPTH
3362 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3363 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
3364 } else {
3365 tmp_buf = (uint8_t *)tmp_buf16;
3366 }
3367#endif // CONFIG_VP9_HIGHBITDEPTH
3368
3369 if (pred_filter_search) {
3370 INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
3371 if (xd->up_available)
3372 af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
3373 if (xd->left_available)
3374 lf = xd->mi[-1]->mbmi.interp_filter;
3375
3376 if ((this_mode != NEWMV) || (af == lf))
3377 best_filter = af;
3378 }
3379
3380 if (is_comp_pred) {
3381 if (frame_mv[refs[0]].as_int == INVALID_MV ||
3382 frame_mv[refs[1]].as_int == INVALID_MV)
3383 return INT64_MAX;
3384
3385 if (cpi->sf.adaptive_mode_search) {
3386 if (single_filter[this_mode][refs[0]] ==
3387 single_filter[this_mode][refs[1]])
3388 best_filter = single_filter[this_mode][refs[0]];
3389 }
3390 }
3391
3392 if (this_mode == NEWMV) {
3393 int rate_mv;
3394 if (is_comp_pred) {
3395 // Initialize mv using single prediction mode result.
3396 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
3397 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
3398
3399 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
3400 joint_motion_search(cpi, x, bsize, frame_mv,
3401 mi_row, mi_col, single_newmv, &rate_mv);
3402 } else {
3403 rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
3404 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
3405 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3406 rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
3407 &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
3408 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
3409 }
3410 *rate2 += rate_mv;
3411 } else {
3412 int_mv tmp_mv;
3413 single_motion_search(cpi, x, bsize, mi_row, mi_col,
3414 &tmp_mv, &rate_mv);
3415 if (tmp_mv.as_int == INVALID_MV)
3416 return INT64_MAX;
3417
3418 frame_mv[refs[0]].as_int =
3419 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
3420 single_newmv[refs[0]].as_int = tmp_mv.as_int;
3421
3422 // Estimate the rate implications of a new mv but discount this
3423 // under certain circumstances where we want to help initiate a weak
3424 // motion field, where the distortion gain for a single block may not
3425 // be enough to overcome the cost of a new mv.
3426 if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07003427 *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
Jingning Han3ee6db62015-08-05 19:00:31 -07003428 } else {
3429 *rate2 += rate_mv;
3430 }
3431 }
3432 }
3433
3434 for (i = 0; i < is_comp_pred + 1; ++i) {
3435 cur_mv[i] = frame_mv[refs[i]];
3436 // Clip "next_nearest" so that it does not extend to far out of image
3437 if (this_mode != NEWMV)
3438 clamp_mv2(&cur_mv[i].as_mv, xd);
3439
3440 if (mv_check_bounds(x, &cur_mv[i].as_mv))
3441 return INT64_MAX;
3442 mbmi->mv[i].as_int = cur_mv[i].as_int;
3443 }
3444
3445 // do first prediction into the destination buffer. Do the next
3446 // prediction into a temporary buffer. Then keep track of which one
3447 // of these currently holds the best predictor, and use the other
3448 // one for future predictions. In the end, copy from tmp_buf to
3449 // dst if necessary.
3450 for (i = 0; i < MAX_MB_PLANE; i++) {
3451 orig_dst[i] = xd->plane[i].dst.buf;
3452 orig_dst_stride[i] = xd->plane[i].dst.stride;
3453 }
3454
3455 // We don't include the cost of the second reference here, because there
3456 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
3457 // words if you present them in that order, the second one is always known
3458 // if the first is known.
3459 //
3460 // Under some circumstances we discount the cost of new mv mode to encourage
3461 // initiation of a motion field.
3462 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
3463 mode_mv, refs[0])) {
James Zern5e16d392015-08-17 18:19:22 -07003464 *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
3465 mbmi_ext->mode_context[refs[0]]),
3466 cost_mv_ref(cpi, NEARESTMV,
3467 mbmi_ext->mode_context[refs[0]]));
Jingning Han3ee6db62015-08-05 19:00:31 -07003468 } else {
3469 *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
3470 }
3471
3472 if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
3473 mbmi->mode != NEARESTMV)
3474 return INT64_MAX;
3475
3476 pred_exists = 0;
3477 // Are all MVs integer pel for Y and UV
3478 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
3479 if (is_comp_pred)
3480 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
3481
3482 // Search for best switchable filter by checking the variance of
3483 // pred error irrespective of whether the filter will be used
3484 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3485 filter_cache[i] = INT64_MAX;
3486
3487 if (cm->interp_filter != BILINEAR) {
3488 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3489 best_filter = EIGHTTAP;
3490 } else if (best_filter == SWITCHABLE) {
3491 int newbest;
3492 int tmp_rate_sum = 0;
3493 int64_t tmp_dist_sum = 0;
3494
3495 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3496 int j;
3497 int64_t rs_rd;
3498 int tmp_skip_sb = 0;
3499 int64_t tmp_skip_sse = INT64_MAX;
3500
3501 mbmi->interp_filter = i;
3502 rs = vp10_get_switchable_rate(cpi, xd);
3503 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3504
3505 if (i > 0 && intpel_mv) {
3506 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
3507 filter_cache[i] = rd;
3508 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07003509 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003510 if (cm->interp_filter == SWITCHABLE)
3511 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07003512 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003513 } else {
3514 int rate_sum = 0;
3515 int64_t dist_sum = 0;
3516 if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
3517 (cpi->sf.interp_filter_search_mask & (1 << i))) {
3518 rate_sum = INT_MAX;
3519 dist_sum = INT64_MAX;
3520 continue;
3521 }
3522
3523 if ((cm->interp_filter == SWITCHABLE &&
3524 (!i || best_needs_copy)) ||
3525 (cm->interp_filter != SWITCHABLE &&
3526 (cm->interp_filter == mbmi->interp_filter ||
3527 (i == 0 && intpel_mv)))) {
3528 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3529 } else {
3530 for (j = 0; j < MAX_MB_PLANE; j++) {
3531 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
3532 xd->plane[j].dst.stride = 64;
3533 }
3534 }
3535 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
3536 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum,
3537 &tmp_skip_sb, &tmp_skip_sse);
3538
3539 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
3540 filter_cache[i] = rd;
3541 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07003542 VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003543 if (cm->interp_filter == SWITCHABLE)
3544 rd += rs_rd;
James Zern5e16d392015-08-17 18:19:22 -07003545 *mask_filter = VPXMAX(*mask_filter, rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07003546
3547 if (i == 0 && intpel_mv) {
3548 tmp_rate_sum = rate_sum;
3549 tmp_dist_sum = dist_sum;
3550 }
3551 }
3552
3553 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3554 if (rd / 2 > ref_best_rd) {
3555 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3556 return INT64_MAX;
3557 }
3558 }
3559 newbest = i == 0 || rd < best_rd;
3560
3561 if (newbest) {
3562 best_rd = rd;
3563 best_filter = mbmi->interp_filter;
3564 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
3565 best_needs_copy = !best_needs_copy;
3566 }
3567
3568 if ((cm->interp_filter == SWITCHABLE && newbest) ||
3569 (cm->interp_filter != SWITCHABLE &&
3570 cm->interp_filter == mbmi->interp_filter)) {
3571 pred_exists = 1;
3572 tmp_rd = best_rd;
3573
3574 skip_txfm_sb = tmp_skip_sb;
3575 skip_sse_sb = tmp_skip_sse;
3576 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3577 memcpy(bsse, x->bsse, sizeof(bsse));
3578 }
3579 }
3580 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3581 }
3582 }
3583 // Set the appropriate filter
3584 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
3585 cm->interp_filter : best_filter;
3586 rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
3587
3588 if (pred_exists) {
3589 if (best_needs_copy) {
3590 // again temporarily set the buffers to local memory to prevent a memcpy
3591 for (i = 0; i < MAX_MB_PLANE; i++) {
3592 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
3593 xd->plane[i].dst.stride = 64;
3594 }
3595 }
3596 rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
3597 } else {
3598 int tmp_rate;
3599 int64_t tmp_dist;
3600 // Handles the special case when a filter that is not in the
3601 // switchable list (ex. bilinear) is indicated at the frame level, or
3602 // skip condition holds.
3603 vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
3604 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
3605 &skip_txfm_sb, &skip_sse_sb);
3606 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
3607 memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
3608 memcpy(bsse, x->bsse, sizeof(bsse));
3609 }
3610
3611 if (!is_comp_pred)
3612 single_filter[this_mode][refs[0]] = mbmi->interp_filter;
3613
3614 if (cpi->sf.adaptive_mode_search)
3615 if (is_comp_pred)
3616 if (single_skippable[this_mode][refs[0]] &&
3617 single_skippable[this_mode][refs[1]])
3618 memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
3619
3620 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
3621 // if current pred_error modeled rd is substantially more than the best
3622 // so far, do not bother doing full rd
3623 if (rd / 2 > ref_best_rd) {
3624 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3625 return INT64_MAX;
3626 }
3627 }
3628
3629 if (cm->interp_filter == SWITCHABLE)
3630 *rate2 += rs;
3631
3632 memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
3633 memcpy(x->bsse, bsse, sizeof(bsse));
3634
3635 if (!skip_txfm_sb) {
3636 int skippable_y, skippable_uv;
3637 int64_t sseuv = INT64_MAX;
3638 int64_t rdcosty = INT64_MAX;
3639
3640 // Y cost and distortion
3641 vp10_subtract_plane(x, bsize, 0);
Jingning Han2cdc1272015-10-09 09:57:42 -07003642#if CONFIG_VAR_TX
Jingning Hanf0dee772015-10-26 12:32:30 -07003643 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
Jingning Han2cdc1272015-10-09 09:57:42 -07003644 inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3645 bsize, ref_best_rd);
3646 } else {
3647 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3648 bsize, ref_best_rd);
3649 for (i = 0; i < 64; ++i)
3650 mbmi->inter_tx_size[i] = mbmi->tx_size;
3651 }
3652#else
Jingning Han3ee6db62015-08-05 19:00:31 -07003653 super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
3654 bsize, ref_best_rd);
Jingning Han704985e2015-10-08 12:05:03 -07003655#endif
3656
Jingning Han3ee6db62015-08-05 19:00:31 -07003657 if (*rate_y == INT_MAX) {
3658 *rate2 = INT_MAX;
3659 *distortion = INT64_MAX;
3660 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3661 return INT64_MAX;
3662 }
3663
3664 *rate2 += *rate_y;
3665 *distortion += distortion_y;
3666
3667 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
James Zern5e16d392015-08-17 18:19:22 -07003668 rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
Jingning Han3ee6db62015-08-05 19:00:31 -07003669
Jingning Hana8dad552015-10-08 16:46:10 -07003670#if CONFIG_VAR_TX
3671 if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
3672 &sseuv, bsize, ref_best_rd - rdcosty)) {
3673#else
Jingning Han3ee6db62015-08-05 19:00:31 -07003674 if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
3675 &sseuv, bsize, ref_best_rd - rdcosty)) {
Jingning Hana8dad552015-10-08 16:46:10 -07003676#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07003677 *rate2 = INT_MAX;
3678 *distortion = INT64_MAX;
3679 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3680 return INT64_MAX;
3681 }
3682
3683 *psse += sseuv;
3684 *rate2 += *rate_uv;
3685 *distortion += distortion_uv;
3686 *skippable = skippable_y && skippable_uv;
3687 } else {
3688 x->skip = 1;
3689 *disable_skip = 1;
3690
3691 // The cost of skip bit needs to be added.
3692 *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3693
3694 *distortion = skip_sse_sb;
3695 }
3696
3697 if (!is_comp_pred)
3698 single_skippable[this_mode][refs[0]] = *skippable;
3699
3700 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3701 return 0; // The rate-distortion cost will be re-calculated by caller.
3702}
3703
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003704void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x,
Jingning Han3ee6db62015-08-05 19:00:31 -07003705 RD_COST *rd_cost, BLOCK_SIZE bsize,
3706 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003707 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003708 MACROBLOCKD *const xd = &x->e_mbd;
3709 struct macroblockd_plane *const pd = xd->plane;
3710 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3711 int y_skip = 0, uv_skip = 0;
3712 int64_t dist_y = 0, dist_uv = 0;
3713 TX_SIZE max_uv_tx_size;
Jingning Han3ee6db62015-08-05 19:00:31 -07003714 ctx->skip = 0;
3715 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3716 xd->mi[0]->mbmi.ref_frame[1] = NONE;
3717
3718 if (bsize >= BLOCK_8X8) {
3719 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3720 &dist_y, &y_skip, bsize,
3721 best_rd) >= best_rd) {
3722 rd_cost->rate = INT_MAX;
3723 return;
3724 }
3725 } else {
3726 y_skip = 0;
3727 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3728 &dist_y, best_rd) >= best_rd) {
3729 rd_cost->rate = INT_MAX;
3730 return;
3731 }
3732 }
3733 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
3734 pd[1].subsampling_x,
3735 pd[1].subsampling_y);
3736 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
James Zern5e16d392015-08-17 18:19:22 -07003737 &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
Jingning Han3ee6db62015-08-05 19:00:31 -07003738 max_uv_tx_size);
3739
3740 if (y_skip && uv_skip) {
3741 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3742 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
3743 rd_cost->dist = dist_y + dist_uv;
3744 } else {
3745 rd_cost->rate = rate_y + rate_uv +
3746 vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
3747 rd_cost->dist = dist_y + dist_uv;
3748 }
3749
3750 ctx->mic = *xd->mi[0];
3751 ctx->mbmi_ext = *x->mbmi_ext;
3752 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
3753}
3754
3755// This function is designed to apply a bias or adjustment to an rd value based
3756// on the relative variance of the source and reconstruction.
3757#define LOW_VAR_THRESH 16
3758#define VLOW_ADJ_MAX 25
3759#define VHIGH_ADJ_MAX 8
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003760static void rd_variance_adjustment(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003761 MACROBLOCK *x,
3762 BLOCK_SIZE bsize,
3763 int64_t *this_rd,
3764 MV_REFERENCE_FRAME ref_frame,
3765 unsigned int source_variance) {
3766 MACROBLOCKD *const xd = &x->e_mbd;
3767 unsigned int recon_variance;
3768 unsigned int absvar_diff = 0;
3769 int64_t var_error = 0;
3770 int64_t var_factor = 0;
3771
3772 if (*this_rd == INT64_MAX)
3773 return;
3774
3775#if CONFIG_VP9_HIGHBITDEPTH
3776 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3777 recon_variance =
3778 vp10_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize, xd->bd);
3779 } else {
3780 recon_variance =
3781 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
3782 }
3783#else
3784 recon_variance =
3785 vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
3786#endif // CONFIG_VP9_HIGHBITDEPTH
3787
3788 if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
3789 absvar_diff = (source_variance > recon_variance)
3790 ? (source_variance - recon_variance)
3791 : (recon_variance - source_variance);
3792
3793 var_error = (200 * source_variance * recon_variance) /
3794 ((source_variance * source_variance) +
3795 (recon_variance * recon_variance));
3796 var_error = 100 - var_error;
3797 }
3798
3799 // Source variance above a threshold and ref frame is intra.
3800 // This case is targeted mainly at discouraging intra modes that give rise
3801 // to a predictor with a low spatial complexity compared to the source.
3802 if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
3803 (source_variance > recon_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07003804 var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07003805 // A second possible case of interest is where the source variance
3806 // is very low and we wish to discourage false texture or motion trails.
3807 } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
3808 (recon_variance > source_variance)) {
James Zern5e16d392015-08-17 18:19:22 -07003809 var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
Jingning Han3ee6db62015-08-05 19:00:31 -07003810 }
3811 *this_rd += (*this_rd * var_factor) / 100;
3812}
3813
3814
3815// Do we have an internal image edge (e.g. formatting bars).
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003816int vp10_internal_image_edge(VP10_COMP *cpi) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003817 return (cpi->oxcf.pass == 2) &&
3818 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
3819 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
3820}
3821
3822// Checks to see if a super block is on a horizontal image edge.
3823// In most cases this is the "real" edge unless there are formatting
3824// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003825int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003826 int top_edge = 0;
3827 int bottom_edge = cpi->common.mi_rows;
3828 int is_active_h_edge = 0;
3829
3830 // For two pass account for any formatting bars detected.
3831 if (cpi->oxcf.pass == 2) {
3832 TWO_PASS *twopass = &cpi->twopass;
3833
3834 // The inactive region is specified in MBs not mi units.
3835 // The image edge is in the following MB row.
3836 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
3837
3838 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
James Zern5e16d392015-08-17 18:19:22 -07003839 bottom_edge = VPXMAX(top_edge, bottom_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07003840 }
3841
3842 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
3843 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
3844 is_active_h_edge = 1;
3845 }
3846 return is_active_h_edge;
3847}
3848
3849// Checks to see if a super block is on a vertical image edge.
3850// In most cases this is the "real" edge unless there are formatting
3851// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003852int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
Jingning Han3ee6db62015-08-05 19:00:31 -07003853 int left_edge = 0;
3854 int right_edge = cpi->common.mi_cols;
3855 int is_active_v_edge = 0;
3856
3857 // For two pass account for any formatting bars detected.
3858 if (cpi->oxcf.pass == 2) {
3859 TWO_PASS *twopass = &cpi->twopass;
3860
3861 // The inactive region is specified in MBs not mi units.
3862 // The image edge is in the following MB row.
3863 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
3864
3865 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
James Zern5e16d392015-08-17 18:19:22 -07003866 right_edge = VPXMAX(left_edge, right_edge);
Jingning Han3ee6db62015-08-05 19:00:31 -07003867 }
3868
3869 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
3870 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
3871 is_active_v_edge = 1;
3872 }
3873 return is_active_v_edge;
3874}
3875
3876// Checks to see if a super block is at the edge of the active image.
3877// In most cases this is the "real" edge unless there are formatting
3878// bars embedded in the stream.
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003879int vp10_active_edge_sb(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07003880 int mi_row, int mi_col) {
3881 return vp10_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
3882 vp10_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
3883}
3884
Yaowu Xu26a9afc2015-08-13 09:42:27 -07003885void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
Jingning Han4fa8e732015-09-10 12:24:06 -07003886 TileDataEnc *tile_data,
3887 MACROBLOCK *x,
3888 int mi_row, int mi_col,
3889 RD_COST *rd_cost, BLOCK_SIZE bsize,
3890 PICK_MODE_CONTEXT *ctx,
3891 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07003892 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07003893 RD_OPT *const rd_opt = &cpi->rd;
3894 SPEED_FEATURES *const sf = &cpi->sf;
3895 MACROBLOCKD *const xd = &x->e_mbd;
3896 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3897 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3898 const struct segmentation *const seg = &cm->seg;
3899 PREDICTION_MODE this_mode;
3900 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3901 unsigned char segment_id = mbmi->segment_id;
3902 int comp_pred, i, k;
3903 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3904 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3905 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3906 INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3907 int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3908 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3909 VP9_ALT_FLAG };
3910 int64_t best_rd = best_rd_so_far;
3911 int64_t best_pred_diff[REFERENCE_MODES];
3912 int64_t best_pred_rd[REFERENCE_MODES];
3913 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3914 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3915 MB_MODE_INFO best_mbmode;
3916 int best_mode_skippable = 0;
3917 int midx, best_mode_index = -1;
3918 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3919 vpx_prob comp_mode_p;
3920 int64_t best_intra_rd = INT64_MAX;
3921 unsigned int best_pred_sse = UINT_MAX;
3922 PREDICTION_MODE best_intra_mode = DC_PRED;
3923 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3924 int64_t dist_uv[TX_SIZES];
3925 int skip_uv[TX_SIZES];
3926 PREDICTION_MODE mode_uv[TX_SIZES];
3927 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
3928 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3929 int best_skip2 = 0;
3930 uint8_t ref_frame_skip_mask[2] = { 0 };
3931 uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3932 int mode_skip_start = sf->mode_skip_start + 1;
3933 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3934 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3935 int64_t mode_threshold[MAX_MODES];
3936 int *mode_map = tile_data->mode_map[bsize];
3937 const int mode_search_skip_flags = sf->mode_search_skip_flags;
3938 int64_t mask_filter = 0;
3939 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3940
3941 vp10_zero(best_mbmode);
3942
Jingning Han3ee6db62015-08-05 19:00:31 -07003943 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3944 filter_cache[i] = INT64_MAX;
3945
3946 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3947 &comp_mode_p);
3948
3949 for (i = 0; i < REFERENCE_MODES; ++i)
3950 best_pred_rd[i] = INT64_MAX;
3951 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3952 best_filter_rd[i] = INT64_MAX;
3953 for (i = 0; i < TX_SIZES; i++)
3954 rate_uv_intra[i] = INT_MAX;
3955 for (i = 0; i < MAX_REF_FRAMES; ++i)
3956 x->pred_sse[i] = INT_MAX;
3957 for (i = 0; i < MB_MODE_COUNT; ++i) {
3958 for (k = 0; k < MAX_REF_FRAMES; ++k) {
3959 single_inter_filter[i][k] = SWITCHABLE;
3960 single_skippable[i][k] = 0;
3961 }
3962 }
3963
3964 rd_cost->rate = INT_MAX;
3965
3966 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3967 x->pred_mv_sad[ref_frame] = INT_MAX;
3968 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3969 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
3970 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3971 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3972 }
3973 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3974 frame_mv[ZEROMV][ref_frame].as_int = 0;
3975 }
3976
3977 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3978 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3979 // Skip checking missing references in both single and compound reference
3980 // modes. Note that a mode will be skipped iff both reference frames
3981 // are masked out.
3982 ref_frame_skip_mask[0] |= (1 << ref_frame);
3983 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Jingning Han1eb760e2015-09-10 12:56:41 -07003984 } else {
Jingning Han3ee6db62015-08-05 19:00:31 -07003985 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3986 // Skip fixed mv modes for poor references
3987 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3988 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3989 break;
3990 }
3991 }
3992 }
3993 // If the segment reference frame feature is enabled....
3994 // then do nothing if the current ref frame is not allowed..
3995 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3996 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3997 ref_frame_skip_mask[0] |= (1 << ref_frame);
3998 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3999 }
4000 }
4001
4002 // Disable this drop out case if the ref frame
4003 // segment level feature is enabled for this segment. This is to
4004 // prevent the possibility that we end up unable to pick any mode.
4005 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4006 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4007 // unless ARNR filtering is enabled in which case we want
4008 // an unfiltered alternative. We allow near/nearest as well
4009 // because they may result in zero-zero MVs but be cheaper.
4010 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
4011 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
4012 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4013 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
4014 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
4015 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
4016 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
4017 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
4018 }
4019 }
4020
4021 if (cpi->rc.is_src_frame_alt_ref) {
4022 if (sf->alt_ref_search_fp) {
4023 mode_skip_mask[ALTREF_FRAME] = 0;
4024 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
4025 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
4026 }
4027 }
4028
4029 if (sf->alt_ref_search_fp)
4030 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
4031 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
4032 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
4033
4034 if (sf->adaptive_mode_search) {
4035 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
4036 cpi->rc.frames_since_golden >= 3)
4037 if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
4038 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
4039 }
4040
4041 if (bsize > sf->max_intra_bsize) {
4042 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
4043 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
4044 }
4045
4046 mode_skip_mask[INTRA_FRAME] |=
4047 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
4048
4049 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4050 mode_threshold[i] = 0;
4051 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
4052 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
4053
4054 midx = sf->schedule_mode_search ? mode_skip_start : 0;
4055 while (midx > 4) {
4056 uint8_t end_pos = 0;
4057 for (i = 5; i < midx; ++i) {
4058 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
4059 uint8_t tmp = mode_map[i];
4060 mode_map[i] = mode_map[i - 1];
4061 mode_map[i - 1] = tmp;
4062 end_pos = i;
4063 }
4064 }
4065 midx = end_pos;
4066 }
4067
hui su5d011cb2015-09-15 12:44:13 -07004068 mbmi->palette_mode_info.palette_size[0] = 0;
4069 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004070 for (midx = 0; midx < MAX_MODES; ++midx) {
4071 int mode_index = mode_map[midx];
4072 int mode_excluded = 0;
4073 int64_t this_rd = INT64_MAX;
4074 int disable_skip = 0;
4075 int compmode_cost = 0;
4076 int rate2 = 0, rate_y = 0, rate_uv = 0;
4077 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4078 int skippable = 0;
4079 int this_skip2 = 0;
4080 int64_t total_sse = INT64_MAX;
4081 int early_term = 0;
4082
4083 this_mode = vp10_mode_order[mode_index].mode;
4084 ref_frame = vp10_mode_order[mode_index].ref_frame[0];
4085 second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
4086
4087 // Look at the reference frame of the best mode so far and set the
4088 // skip mask to look at a subset of the remaining modes.
4089 if (midx == mode_skip_start && best_mode_index >= 0) {
4090 switch (best_mbmode.ref_frame[0]) {
4091 case INTRA_FRAME:
4092 break;
4093 case LAST_FRAME:
4094 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
4095 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4096 break;
4097 case GOLDEN_FRAME:
4098 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
4099 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4100 break;
4101 case ALTREF_FRAME:
4102 ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
4103 break;
4104 case NONE:
4105 case MAX_REF_FRAMES:
4106 assert(0 && "Invalid Reference frame");
4107 break;
4108 }
4109 }
4110
4111 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004112 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004113 continue;
4114
4115 if (mode_skip_mask[ref_frame] & (1 << this_mode))
4116 continue;
4117
4118 // Test best rd so far against threshold for trying this mode.
4119 if (best_mode_skippable && sf->schedule_mode_search)
4120 mode_threshold[mode_index] <<= 1;
4121
4122 if (best_rd < mode_threshold[mode_index])
4123 continue;
4124
Jingning Han3ee6db62015-08-05 19:00:31 -07004125 comp_pred = second_ref_frame > INTRA_FRAME;
4126 if (comp_pred) {
4127 if (!cpi->allow_comp_inter_inter)
4128 continue;
4129
4130 // Skip compound inter modes if ARF is not available.
4131 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4132 continue;
4133
4134 // Do not allow compound prediction if the segment level reference frame
4135 // feature is in use as in this case there can only be one reference.
4136 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4137 continue;
4138
4139 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4140 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
4141 continue;
4142
4143 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4144 } else {
4145 if (ref_frame != INTRA_FRAME)
4146 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4147 }
4148
4149 if (ref_frame == INTRA_FRAME) {
4150 if (sf->adaptive_mode_search)
4151 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
4152 continue;
4153
4154 if (this_mode != DC_PRED) {
4155 // Disable intra modes other than DC_PRED for blocks with low variance
4156 // Threshold for intra skipping based on source variance
4157 // TODO(debargha): Specialize the threshold for super block sizes
4158 const unsigned int skip_intra_var_thresh = 64;
4159 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4160 x->source_variance < skip_intra_var_thresh)
4161 continue;
4162 // Only search the oblique modes if the best so far is
4163 // one of the neighboring directional modes
4164 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
4165 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
4166 if (best_mode_index >= 0 &&
4167 best_mbmode.ref_frame[0] > INTRA_FRAME)
4168 continue;
4169 }
4170 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
4171 if (conditional_skipintra(this_mode, best_intra_mode))
4172 continue;
4173 }
4174 }
4175 } else {
4176 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
4177 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
4178 this_mode, ref_frames))
4179 continue;
4180 }
4181
4182 mbmi->mode = this_mode;
4183 mbmi->uv_mode = DC_PRED;
4184 mbmi->ref_frame[0] = ref_frame;
4185 mbmi->ref_frame[1] = second_ref_frame;
4186 // Evaluate all sub-pel filters irrespective of whether we can use
4187 // them for this frame.
4188 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4189 : cm->interp_filter;
4190 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4191
4192 x->skip = 0;
4193 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4194
4195 // Select prediction reference frames.
4196 for (i = 0; i < MAX_MB_PLANE; i++) {
4197 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4198 if (comp_pred)
4199 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4200 }
4201
4202 if (ref_frame == INTRA_FRAME) {
4203 TX_SIZE uv_tx;
4204 struct macroblockd_plane *const pd = &xd->plane[1];
4205 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4206 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
4207 NULL, bsize, best_rd);
4208 if (rate_y == INT_MAX)
4209 continue;
Jingning Han3ee6db62015-08-05 19:00:31 -07004210 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
4211 pd->subsampling_y);
4212 if (rate_uv_intra[uv_tx] == INT_MAX) {
4213 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx,
4214 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
4215 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
4216 }
4217
4218 rate_uv = rate_uv_tokenonly[uv_tx];
4219 distortion_uv = dist_uv[uv_tx];
4220 skippable = skippable && skip_uv[uv_tx];
4221 mbmi->uv_mode = mode_uv[uv_tx];
4222
4223 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
4224 if (this_mode != DC_PRED && this_mode != TM_PRED)
4225 rate2 += intra_cost_penalty;
4226 distortion2 = distortion_y + distortion_uv;
4227 } else {
4228 this_rd = handle_inter_mode(cpi, x, bsize,
4229 &rate2, &distortion2, &skippable,
4230 &rate_y, &rate_uv,
4231 &disable_skip, frame_mv,
4232 mi_row, mi_col,
4233 single_newmv, single_inter_filter,
4234 single_skippable, &total_sse, best_rd,
4235 &mask_filter, filter_cache);
4236 if (this_rd == INT64_MAX)
4237 continue;
4238
4239 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4240
4241 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4242 rate2 += compmode_cost;
4243 }
4244
4245 // Estimate the reference frame signaling cost and add it
4246 // to the rolling cost variable.
4247 if (comp_pred) {
4248 rate2 += ref_costs_comp[ref_frame];
4249 } else {
4250 rate2 += ref_costs_single[ref_frame];
4251 }
4252
4253 if (!disable_skip) {
4254 if (skippable) {
4255 // Back out the coefficient coding costs
4256 rate2 -= (rate_y + rate_uv);
4257
4258 // Cost the skip mb case
4259 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04004260 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07004261 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4262 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4263 // Add in the cost of the no skip flag.
4264 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4265 } else {
4266 // FIXME(rbultje) make this work for splitmv also
4267 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
4268 distortion2 = total_sse;
4269 assert(total_sse >= 0);
4270 rate2 -= (rate_y + rate_uv);
4271 this_skip2 = 1;
4272 }
4273 } else {
4274 // Add in the cost of the no skip flag.
4275 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
4276 }
4277
4278 // Calculate the final RD estimate for this mode.
4279 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4280 }
4281
4282 // Apply an adjustment to the rd value based on the similarity of the
4283 // source variance and reconstructed variance.
4284 rd_variance_adjustment(cpi, x, bsize, &this_rd,
4285 ref_frame, x->source_variance);
4286
4287 if (ref_frame == INTRA_FRAME) {
4288 // Keep record of best intra rd
4289 if (this_rd < best_intra_rd) {
4290 best_intra_rd = this_rd;
4291 best_intra_mode = mbmi->mode;
4292 }
4293 }
4294
4295 if (!disable_skip && ref_frame == INTRA_FRAME) {
4296 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07004297 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004298 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07004299 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004300 }
4301
4302 // Did this mode help.. i.e. is it the new best mode
4303 if (this_rd < best_rd || x->skip) {
4304 int max_plane = MAX_MB_PLANE;
4305 if (!mode_excluded) {
4306 // Note index of best mode so far
4307 best_mode_index = mode_index;
4308
4309 if (ref_frame == INTRA_FRAME) {
4310 /* required for left and above block mv */
4311 mbmi->mv[0].as_int = 0;
4312 max_plane = 1;
4313 } else {
4314 best_pred_sse = x->pred_sse[ref_frame];
4315 }
4316
4317 rd_cost->rate = rate2;
4318 rd_cost->dist = distortion2;
4319 rd_cost->rdcost = this_rd;
4320 best_rd = this_rd;
4321 best_mbmode = *mbmi;
4322 best_skip2 = this_skip2;
4323 best_mode_skippable = skippable;
4324
4325 if (!x->select_tx_size)
4326 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4327 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
hui su088b05f2015-08-12 10:41:51 -07004328 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Han3ee6db62015-08-05 19:00:31 -07004329
4330 // TODO(debargha): enhance this test with a better distortion prediction
4331 // based on qp, activity mask and history
4332 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4333 (mode_index > MIN_EARLY_TERM_INDEX)) {
4334 int qstep = xd->plane[0].dequant[1];
4335 // TODO(debargha): Enhance this by specializing for each mode_index
4336 int scale = 4;
4337#if CONFIG_VP9_HIGHBITDEPTH
4338 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4339 qstep >>= (xd->bd - 8);
4340 }
4341#endif // CONFIG_VP9_HIGHBITDEPTH
4342 if (x->source_variance < UINT_MAX) {
4343 const int var_adjust = (x->source_variance < 16);
4344 scale -= var_adjust;
4345 }
4346 if (ref_frame > INTRA_FRAME &&
4347 distortion2 * scale < qstep * qstep) {
4348 early_term = 1;
4349 }
4350 }
4351 }
4352 }
4353
4354 /* keep record of best compound/single-only prediction */
4355 if (!disable_skip && ref_frame != INTRA_FRAME) {
4356 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4357
4358 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4359 single_rate = rate2 - compmode_cost;
4360 hybrid_rate = rate2;
4361 } else {
4362 single_rate = rate2;
4363 hybrid_rate = rate2 + compmode_cost;
4364 }
4365
4366 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4367 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4368
4369 if (!comp_pred) {
4370 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
4371 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4372 } else {
4373 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
4374 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4375 }
4376 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4377 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4378
4379 /* keep record of best filter type */
4380 if (!mode_excluded && cm->interp_filter != BILINEAR) {
4381 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
4382 SWITCHABLE_FILTERS : cm->interp_filter];
4383
4384 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4385 int64_t adj_rd;
4386 if (ref == INT64_MAX)
4387 adj_rd = 0;
4388 else if (filter_cache[i] == INT64_MAX)
4389 // when early termination is triggered, the encoder does not have
4390 // access to the rate-distortion cost. it only knows that the cost
4391 // should be above the maximum valid value. hence it takes the known
4392 // maximum plus an arbitrary constant as the rate-distortion cost.
4393 adj_rd = mask_filter - ref + 10;
4394 else
4395 adj_rd = filter_cache[i] - ref;
4396
4397 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07004398 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004399 }
4400 }
4401 }
4402
4403 if (early_term)
4404 break;
4405
4406 if (x->skip && !comp_pred)
4407 break;
4408 }
4409
4410 // The inter modes' rate costs are not calculated precisely in some cases.
4411 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
4412 // ZEROMV. Here, checks are added for those cases, and the mode decisions
4413 // are corrected.
4414 if (best_mbmode.mode == NEWMV) {
4415 const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
4416 best_mbmode.ref_frame[1]};
4417 int comp_pred_mode = refs[1] > INTRA_FRAME;
4418
4419 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4420 ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
4421 best_mbmode.mv[1].as_int) || !comp_pred_mode))
4422 best_mbmode.mode = NEARESTMV;
4423 else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
4424 ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
4425 best_mbmode.mv[1].as_int) || !comp_pred_mode))
4426 best_mbmode.mode = NEARMV;
4427 else if (best_mbmode.mv[0].as_int == 0 &&
4428 ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
4429 best_mbmode.mode = ZEROMV;
4430 }
4431
4432 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
4433 rd_cost->rate = INT_MAX;
4434 rd_cost->rdcost = INT64_MAX;
4435 return;
4436 }
4437
4438 // If we used an estimate for the uv intra rd in the loop above...
4439 if (sf->use_uv_intra_rd_estimate) {
4440 // Do Intra UV best rd mode selection if best mode choice above was intra.
4441 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4442 TX_SIZE uv_tx_size;
4443 *mbmi = best_mbmode;
4444 uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
4445 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4446 &rate_uv_tokenonly[uv_tx_size],
4447 &dist_uv[uv_tx_size],
4448 &skip_uv[uv_tx_size],
4449 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
4450 uv_tx_size);
4451 }
4452 }
4453
4454 assert((cm->interp_filter == SWITCHABLE) ||
4455 (cm->interp_filter == best_mbmode.interp_filter) ||
4456 !is_inter_block(&best_mbmode));
4457
4458 if (!cpi->rc.is_src_frame_alt_ref)
4459 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4460 sf->adaptive_rd_thresh, bsize, best_mode_index);
4461
4462 // macroblock modes
4463 *mbmi = best_mbmode;
4464 x->skip |= best_skip2;
4465
4466 for (i = 0; i < REFERENCE_MODES; ++i) {
4467 if (best_pred_rd[i] == INT64_MAX)
4468 best_pred_diff[i] = INT_MIN;
4469 else
4470 best_pred_diff[i] = best_rd - best_pred_rd[i];
4471 }
4472
4473 if (!x->skip) {
4474 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4475 if (best_filter_rd[i] == INT64_MAX)
4476 best_filter_diff[i] = 0;
4477 else
4478 best_filter_diff[i] = best_rd - best_filter_rd[i];
4479 }
4480 if (cm->interp_filter == SWITCHABLE)
4481 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4482 } else {
4483 vp10_zero(best_filter_diff);
4484 }
4485
4486 // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
4487 // updating code causes PSNR loss. Need to figure out the confliction.
4488 x->skip |= best_mode_skippable;
4489
4490 if (!x->skip && !x->select_tx_size) {
4491 int has_high_freq_coeff = 0;
4492 int plane;
4493 int max_plane = is_inter_block(&xd->mi[0]->mbmi)
4494 ? MAX_MB_PLANE : 1;
4495 for (plane = 0; plane < max_plane; ++plane) {
4496 x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
4497 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
4498 }
4499
4500 for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
4501 x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
4502 has_high_freq_coeff |= vp10_has_high_freq_in_plane(x, bsize, plane);
4503 }
4504
4505 best_mode_skippable |= !has_high_freq_coeff;
4506 }
4507
4508 assert(best_mode_index >= 0);
4509
4510 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
4511 best_filter_diff, best_mode_skippable);
4512}
4513
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004514void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004515 TileDataEnc *tile_data,
4516 MACROBLOCK *x,
4517 RD_COST *rd_cost,
4518 BLOCK_SIZE bsize,
4519 PICK_MODE_CONTEXT *ctx,
4520 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004521 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004522 MACROBLOCKD *const xd = &x->e_mbd;
4523 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4524 unsigned char segment_id = mbmi->segment_id;
4525 const int comp_pred = 0;
4526 int i;
4527 int64_t best_pred_diff[REFERENCE_MODES];
4528 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4529 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4530 vpx_prob comp_mode_p;
4531 INTERP_FILTER best_filter = SWITCHABLE;
4532 int64_t this_rd = INT64_MAX;
4533 int rate2 = 0;
4534 const int64_t distortion2 = 0;
4535
Jingning Han3ee6db62015-08-05 19:00:31 -07004536 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4537 &comp_mode_p);
4538
4539 for (i = 0; i < MAX_REF_FRAMES; ++i)
4540 x->pred_sse[i] = INT_MAX;
4541 for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
4542 x->pred_mv_sad[i] = INT_MAX;
4543
4544 rd_cost->rate = INT_MAX;
4545
4546 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
4547
hui su5d011cb2015-09-15 12:44:13 -07004548 mbmi->palette_mode_info.palette_size[0] = 0;
4549 mbmi->palette_mode_info.palette_size[1] = 0;
Jingning Han3ee6db62015-08-05 19:00:31 -07004550 mbmi->mode = ZEROMV;
4551 mbmi->uv_mode = DC_PRED;
4552 mbmi->ref_frame[0] = LAST_FRAME;
4553 mbmi->ref_frame[1] = NONE;
4554 mbmi->mv[0].as_int = 0;
4555 x->skip = 1;
4556
4557 if (cm->interp_filter != BILINEAR) {
4558 best_filter = EIGHTTAP;
4559 if (cm->interp_filter == SWITCHABLE &&
4560 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
4561 int rs;
4562 int best_rs = INT_MAX;
4563 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
4564 mbmi->interp_filter = i;
4565 rs = vp10_get_switchable_rate(cpi, xd);
4566 if (rs < best_rs) {
4567 best_rs = rs;
4568 best_filter = mbmi->interp_filter;
4569 }
4570 }
4571 }
4572 }
4573 // Set the appropriate filter
4574 if (cm->interp_filter == SWITCHABLE) {
4575 mbmi->interp_filter = best_filter;
4576 rate2 += vp10_get_switchable_rate(cpi, xd);
4577 } else {
4578 mbmi->interp_filter = cm->interp_filter;
4579 }
4580
4581 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4582 rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
4583
4584 // Estimate the reference frame signaling cost and add it
4585 // to the rolling cost variable.
4586 rate2 += ref_costs_single[LAST_FRAME];
4587 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4588
4589 rd_cost->rate = rate2;
4590 rd_cost->dist = distortion2;
4591 rd_cost->rdcost = this_rd;
4592
4593 if (this_rd >= best_rd_so_far) {
4594 rd_cost->rate = INT_MAX;
4595 rd_cost->rdcost = INT64_MAX;
4596 return;
4597 }
4598
4599 assert((cm->interp_filter == SWITCHABLE) ||
4600 (cm->interp_filter == mbmi->interp_filter));
4601
4602 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
4603 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
4604
4605 vp10_zero(best_pred_diff);
4606 vp10_zero(best_filter_diff);
4607
4608 if (!x->select_tx_size)
4609 swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
4610 store_coding_context(x, ctx, THR_ZEROMV,
4611 best_pred_diff, best_filter_diff, 0);
4612}
4613
Yaowu Xu26a9afc2015-08-13 09:42:27 -07004614void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
Jingning Han3ee6db62015-08-05 19:00:31 -07004615 TileDataEnc *tile_data,
4616 MACROBLOCK *x,
4617 int mi_row, int mi_col,
4618 RD_COST *rd_cost,
4619 BLOCK_SIZE bsize,
4620 PICK_MODE_CONTEXT *ctx,
4621 int64_t best_rd_so_far) {
Yaowu Xufc7cbd12015-08-13 09:36:53 -07004622 VP10_COMMON *const cm = &cpi->common;
Jingning Han3ee6db62015-08-05 19:00:31 -07004623 RD_OPT *const rd_opt = &cpi->rd;
4624 SPEED_FEATURES *const sf = &cpi->sf;
4625 MACROBLOCKD *const xd = &x->e_mbd;
4626 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4627 const struct segmentation *const seg = &cm->seg;
4628 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
4629 unsigned char segment_id = mbmi->segment_id;
4630 int comp_pred, i;
4631 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
4632 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
4633 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
4634 VP9_ALT_FLAG };
4635 int64_t best_rd = best_rd_so_far;
4636 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
4637 int64_t best_pred_diff[REFERENCE_MODES];
4638 int64_t best_pred_rd[REFERENCE_MODES];
4639 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
4640 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
4641 MB_MODE_INFO best_mbmode;
4642 int ref_index, best_ref_index = 0;
4643 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
4644 vpx_prob comp_mode_p;
4645 INTERP_FILTER tmp_best_filter = SWITCHABLE;
4646 int rate_uv_intra, rate_uv_tokenonly;
4647 int64_t dist_uv;
4648 int skip_uv;
4649 PREDICTION_MODE mode_uv = DC_PRED;
4650 const int intra_cost_penalty = vp10_get_intra_cost_penalty(
4651 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
4652 int_mv seg_mvs[4][MAX_REF_FRAMES];
4653 b_mode_info best_bmodes[4];
4654 int best_skip2 = 0;
4655 int ref_frame_skip_mask[2] = { 0 };
4656 int64_t mask_filter = 0;
4657 int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
4658 int internal_active_edge =
4659 vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
4660
Jingning Han3ee6db62015-08-05 19:00:31 -07004661 memset(x->zcoeff_blk[TX_4X4], 0, 4);
4662 vp10_zero(best_mbmode);
4663
4664 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4665 filter_cache[i] = INT64_MAX;
4666
4667 for (i = 0; i < 4; i++) {
4668 int j;
4669 for (j = 0; j < MAX_REF_FRAMES; j++)
4670 seg_mvs[i][j].as_int = INVALID_MV;
4671 }
4672
4673 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
4674 &comp_mode_p);
4675
4676 for (i = 0; i < REFERENCE_MODES; ++i)
4677 best_pred_rd[i] = INT64_MAX;
4678 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4679 best_filter_rd[i] = INT64_MAX;
4680 rate_uv_intra = INT_MAX;
4681
4682 rd_cost->rate = INT_MAX;
4683
4684 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
4685 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
4686 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
4687 frame_mv[NEARESTMV], frame_mv[NEARMV],
4688 yv12_mb);
4689 } else {
4690 ref_frame_skip_mask[0] |= (1 << ref_frame);
4691 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4692 }
4693 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
4694 frame_mv[ZEROMV][ref_frame].as_int = 0;
4695 }
4696
hui su5d011cb2015-09-15 12:44:13 -07004697 mbmi->palette_mode_info.palette_size[0] = 0;
4698 mbmi->palette_mode_info.palette_size[1] = 0;
4699
Jingning Han3ee6db62015-08-05 19:00:31 -07004700 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
4701 int mode_excluded = 0;
4702 int64_t this_rd = INT64_MAX;
4703 int disable_skip = 0;
4704 int compmode_cost = 0;
4705 int rate2 = 0, rate_y = 0, rate_uv = 0;
4706 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
4707 int skippable = 0;
4708 int i;
4709 int this_skip2 = 0;
4710 int64_t total_sse = INT_MAX;
4711 int early_term = 0;
4712
4713 ref_frame = vp10_ref_order[ref_index].ref_frame[0];
4714 second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
4715
4716 // Look at the reference frame of the best mode so far and set the
4717 // skip mask to look at a subset of the remaining modes.
4718 if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
4719 if (ref_index == 3) {
4720 switch (best_mbmode.ref_frame[0]) {
4721 case INTRA_FRAME:
4722 break;
4723 case LAST_FRAME:
4724 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
4725 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4726 break;
4727 case GOLDEN_FRAME:
4728 ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
4729 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
4730 break;
4731 case ALTREF_FRAME:
4732 ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
4733 break;
4734 case NONE:
4735 case MAX_REF_FRAMES:
4736 assert(0 && "Invalid Reference frame");
4737 break;
4738 }
4739 }
4740 }
4741
4742 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
James Zern5e16d392015-08-17 18:19:22 -07004743 (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
Jingning Han3ee6db62015-08-05 19:00:31 -07004744 continue;
4745
4746 // Test best rd so far against threshold for trying this mode.
4747 if (!internal_active_edge &&
4748 rd_less_than_thresh(best_rd,
4749 rd_opt->threshes[segment_id][bsize][ref_index],
4750 tile_data->thresh_freq_fact[bsize][ref_index]))
4751 continue;
4752
4753 comp_pred = second_ref_frame > INTRA_FRAME;
4754 if (comp_pred) {
4755 if (!cpi->allow_comp_inter_inter)
4756 continue;
4757 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4758 continue;
4759 // Do not allow compound prediction if the segment level reference frame
4760 // feature is in use as in this case there can only be one reference.
4761 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
4762 continue;
4763
4764 if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
4765 best_mbmode.ref_frame[0] == INTRA_FRAME)
4766 continue;
4767 }
4768
4769 // TODO(jingning, jkoleszar): scaling reference frame not supported for
4770 // sub8x8 blocks.
4771 if (ref_frame > INTRA_FRAME &&
4772 vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
4773 continue;
4774
4775 if (second_ref_frame > INTRA_FRAME &&
4776 vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
4777 continue;
4778
4779 if (comp_pred)
4780 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
4781 else if (ref_frame != INTRA_FRAME)
4782 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
4783
4784 // If the segment reference frame feature is enabled....
4785 // then do nothing if the current ref frame is not allowed..
4786 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4787 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
4788 continue;
4789 // Disable this drop out case if the ref frame
4790 // segment level feature is enabled for this segment. This is to
4791 // prevent the possibility that we end up unable to pick any mode.
4792 } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
4793 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4794 // unless ARNR filtering is enabled in which case we want
4795 // an unfiltered alternative. We allow near/nearest as well
4796 // because they may result in zero-zero MVs but be cheaper.
4797 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4798 continue;
4799 }
4800
4801 mbmi->tx_size = TX_4X4;
4802 mbmi->uv_mode = DC_PRED;
4803 mbmi->ref_frame[0] = ref_frame;
4804 mbmi->ref_frame[1] = second_ref_frame;
4805 // Evaluate all sub-pel filters irrespective of whether we can use
4806 // them for this frame.
4807 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
4808 : cm->interp_filter;
4809 x->skip = 0;
4810 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
4811
4812 // Select prediction reference frames.
4813 for (i = 0; i < MAX_MB_PLANE; i++) {
4814 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4815 if (comp_pred)
4816 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4817 }
4818
Jingning Han704985e2015-10-08 12:05:03 -07004819#if CONFIG_VAR_TX
4820 for (i = 0; i < 64; ++i)
4821 mbmi->inter_tx_size[i] = mbmi->tx_size;
4822#endif
4823
Jingning Han3ee6db62015-08-05 19:00:31 -07004824 if (ref_frame == INTRA_FRAME) {
4825 int rate;
4826 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4827 &distortion_y, best_rd) >= best_rd)
4828 continue;
4829 rate2 += rate;
4830 rate2 += intra_cost_penalty;
4831 distortion2 += distortion_y;
4832
4833 if (rate_uv_intra == INT_MAX) {
4834 choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4,
4835 &rate_uv_intra,
4836 &rate_uv_tokenonly,
4837 &dist_uv, &skip_uv,
4838 &mode_uv);
4839 }
4840 rate2 += rate_uv_intra;
4841 rate_uv = rate_uv_tokenonly;
4842 distortion2 += dist_uv;
4843 distortion_uv = dist_uv;
4844 mbmi->uv_mode = mode_uv;
4845 } else {
4846 int rate;
4847 int64_t distortion;
4848 int64_t this_rd_thresh;
4849 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4850 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4851 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4852 int tmp_best_skippable = 0;
4853 int switchable_filter_index;
4854 int_mv *second_ref = comp_pred ?
4855 &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
4856 b_mode_info tmp_best_bmodes[16];
4857 MB_MODE_INFO tmp_best_mbmode;
4858 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4859 int pred_exists = 0;
4860 int uv_skippable;
4861
4862 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4863 rd_opt->threshes[segment_id][bsize][THR_LAST] :
4864 rd_opt->threshes[segment_id][bsize][THR_ALTR];
4865 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4866 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4867 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4868 filter_cache[i] = INT64_MAX;
4869
4870 if (cm->interp_filter != BILINEAR) {
4871 tmp_best_filter = EIGHTTAP;
4872 if (x->source_variance < sf->disable_filter_search_var_thresh) {
4873 tmp_best_filter = EIGHTTAP;
4874 } else if (sf->adaptive_pred_interp_filter == 1 &&
4875 ctx->pred_interp_filter < SWITCHABLE) {
4876 tmp_best_filter = ctx->pred_interp_filter;
4877 } else if (sf->adaptive_pred_interp_filter == 2) {
4878 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4879 ctx->pred_interp_filter : 0;
4880 } else {
4881 for (switchable_filter_index = 0;
4882 switchable_filter_index < SWITCHABLE_FILTERS;
4883 ++switchable_filter_index) {
4884 int newbest, rs;
4885 int64_t rs_rd;
4886 MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4887 mbmi->interp_filter = switchable_filter_index;
4888 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
4889 &mbmi_ext->ref_mvs[ref_frame][0],
4890 second_ref, best_yrd, &rate,
4891 &rate_y, &distortion,
4892 &skippable, &total_sse,
4893 (int) this_rd_thresh, seg_mvs,
4894 bsi, switchable_filter_index,
4895 mi_row, mi_col);
4896
4897 if (tmp_rd == INT64_MAX)
4898 continue;
4899 rs = vp10_get_switchable_rate(cpi, xd);
4900 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4901 filter_cache[switchable_filter_index] = tmp_rd;
4902 filter_cache[SWITCHABLE_FILTERS] =
James Zern5e16d392015-08-17 18:19:22 -07004903 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004904 if (cm->interp_filter == SWITCHABLE)
4905 tmp_rd += rs_rd;
4906
James Zern5e16d392015-08-17 18:19:22 -07004907 mask_filter = VPXMAX(mask_filter, tmp_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07004908
4909 newbest = (tmp_rd < tmp_best_rd);
4910 if (newbest) {
4911 tmp_best_filter = mbmi->interp_filter;
4912 tmp_best_rd = tmp_rd;
4913 }
4914 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4915 (mbmi->interp_filter == cm->interp_filter &&
4916 cm->interp_filter != SWITCHABLE)) {
4917 tmp_best_rdu = tmp_rd;
4918 tmp_best_rate = rate;
4919 tmp_best_ratey = rate_y;
4920 tmp_best_distortion = distortion;
4921 tmp_best_sse = total_sse;
4922 tmp_best_skippable = skippable;
4923 tmp_best_mbmode = *mbmi;
4924 for (i = 0; i < 4; i++) {
4925 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4926 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4927 }
4928 pred_exists = 1;
4929 if (switchable_filter_index == 0 &&
4930 sf->use_rd_breakout &&
4931 best_rd < INT64_MAX) {
4932 if (tmp_best_rdu / 2 > best_rd) {
4933 // skip searching the other filters if the first is
4934 // already substantially larger than the best so far
4935 tmp_best_filter = mbmi->interp_filter;
4936 tmp_best_rdu = INT64_MAX;
4937 break;
4938 }
4939 }
4940 }
4941 } // switchable_filter_index loop
4942 }
4943 }
4944
4945 if (tmp_best_rdu == INT64_MAX && pred_exists)
4946 continue;
4947
4948 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4949 tmp_best_filter : cm->interp_filter);
4950 if (!pred_exists) {
4951 // Handles the special case when a filter that is not in the
4952 // switchable list (bilinear, 6-tap) is indicated at the frame level
4953 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x,
4954 &x->mbmi_ext->ref_mvs[ref_frame][0],
4955 second_ref, best_yrd, &rate, &rate_y,
4956 &distortion, &skippable, &total_sse,
4957 (int) this_rd_thresh, seg_mvs, bsi, 0,
4958 mi_row, mi_col);
4959 if (tmp_rd == INT64_MAX)
4960 continue;
4961 } else {
4962 total_sse = tmp_best_sse;
4963 rate = tmp_best_rate;
4964 rate_y = tmp_best_ratey;
4965 distortion = tmp_best_distortion;
4966 skippable = tmp_best_skippable;
4967 *mbmi = tmp_best_mbmode;
4968 for (i = 0; i < 4; i++)
4969 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4970 }
4971
4972 rate2 += rate;
4973 distortion2 += distortion;
4974
4975 if (cm->interp_filter == SWITCHABLE)
4976 rate2 += vp10_get_switchable_rate(cpi, xd);
4977
4978 if (!mode_excluded)
4979 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4980 : cm->reference_mode == COMPOUND_REFERENCE;
4981
4982 compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
4983
4984 tmp_best_rdu = best_rd -
James Zern5e16d392015-08-17 18:19:22 -07004985 VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4986 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
Jingning Han3ee6db62015-08-05 19:00:31 -07004987
4988 if (tmp_best_rdu > 0) {
4989 // If even the 'Y' rd value of split is higher than best so far
4990 // then dont bother looking at UV
4991 vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4992 BLOCK_8X8);
4993 memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
Jingning Hana8dad552015-10-08 16:46:10 -07004994#if CONFIG_VAR_TX
4995 if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4996 &uv_sse, BLOCK_8X8, tmp_best_rdu))
4997 continue;
4998#else
Jingning Han3ee6db62015-08-05 19:00:31 -07004999 if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
5000 &uv_sse, BLOCK_8X8, tmp_best_rdu))
5001 continue;
Jingning Hana8dad552015-10-08 16:46:10 -07005002#endif
Jingning Han3ee6db62015-08-05 19:00:31 -07005003 rate2 += rate_uv;
5004 distortion2 += distortion_uv;
5005 skippable = skippable && uv_skippable;
5006 total_sse += uv_sse;
5007 }
5008 }
5009
5010 if (cm->reference_mode == REFERENCE_MODE_SELECT)
5011 rate2 += compmode_cost;
5012
5013 // Estimate the reference frame signaling cost and add it
5014 // to the rolling cost variable.
5015 if (second_ref_frame > INTRA_FRAME) {
5016 rate2 += ref_costs_comp[ref_frame];
5017 } else {
5018 rate2 += ref_costs_single[ref_frame];
5019 }
5020
5021 if (!disable_skip) {
5022 // Skip is never coded at the segment level for sub8x8 blocks and instead
5023 // always coded in the bitstream at the mode info level.
5024
Ronald S. Bultje60c58b52015-10-12 17:54:25 -04005025 if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Jingning Han3ee6db62015-08-05 19:00:31 -07005026 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
5027 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
5028 // Add in the cost of the no skip flag.
5029 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5030 } else {
5031 // FIXME(rbultje) make this work for splitmv also
5032 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
5033 distortion2 = total_sse;
5034 assert(total_sse >= 0);
5035 rate2 -= (rate_y + rate_uv);
5036 rate_y = 0;
5037 rate_uv = 0;
5038 this_skip2 = 1;
5039 }
5040 } else {
5041 // Add in the cost of the no skip flag.
5042 rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
5043 }
5044
5045 // Calculate the final RD estimate for this mode.
5046 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
5047 }
5048
5049 if (!disable_skip && ref_frame == INTRA_FRAME) {
5050 for (i = 0; i < REFERENCE_MODES; ++i)
James Zern5e16d392015-08-17 18:19:22 -07005051 best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005052 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
James Zern5e16d392015-08-17 18:19:22 -07005053 best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005054 }
5055
5056 // Did this mode help.. i.e. is it the new best mode
5057 if (this_rd < best_rd || x->skip) {
5058 if (!mode_excluded) {
5059 int max_plane = MAX_MB_PLANE;
5060 // Note index of best mode so far
5061 best_ref_index = ref_index;
5062
5063 if (ref_frame == INTRA_FRAME) {
5064 /* required for left and above block mv */
5065 mbmi->mv[0].as_int = 0;
5066 max_plane = 1;
5067 }
5068
5069 rd_cost->rate = rate2;
5070 rd_cost->dist = distortion2;
5071 rd_cost->rdcost = this_rd;
5072 best_rd = this_rd;
5073 best_yrd = best_rd -
5074 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
5075 best_mbmode = *mbmi;
5076 best_skip2 = this_skip2;
5077 if (!x->select_tx_size)
5078 swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
5079 memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
hui su088b05f2015-08-12 10:41:51 -07005080 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
Jingning Han3ee6db62015-08-05 19:00:31 -07005081
5082 for (i = 0; i < 4; i++)
5083 best_bmodes[i] = xd->mi[0]->bmi[i];
5084
5085 // TODO(debargha): enhance this test with a better distortion prediction
5086 // based on qp, activity mask and history
5087 if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
5088 (ref_index > MIN_EARLY_TERM_INDEX)) {
5089 int qstep = xd->plane[0].dequant[1];
5090 // TODO(debargha): Enhance this by specializing for each mode_index
5091 int scale = 4;
5092#if CONFIG_VP9_HIGHBITDEPTH
5093 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5094 qstep >>= (xd->bd - 8);
5095 }
5096#endif // CONFIG_VP9_HIGHBITDEPTH
5097 if (x->source_variance < UINT_MAX) {
5098 const int var_adjust = (x->source_variance < 16);
5099 scale -= var_adjust;
5100 }
5101 if (ref_frame > INTRA_FRAME &&
5102 distortion2 * scale < qstep * qstep) {
5103 early_term = 1;
5104 }
5105 }
5106 }
5107 }
5108
5109 /* keep record of best compound/single-only prediction */
5110 if (!disable_skip && ref_frame != INTRA_FRAME) {
5111 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5112
5113 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5114 single_rate = rate2 - compmode_cost;
5115 hybrid_rate = rate2;
5116 } else {
5117 single_rate = rate2;
5118 hybrid_rate = rate2 + compmode_cost;
5119 }
5120
5121 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
5122 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
5123
5124 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
5125 best_pred_rd[SINGLE_REFERENCE] = single_rd;
5126 else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
5127 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5128
5129 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
5130 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5131 }
5132
5133 /* keep record of best filter type */
5134 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
5135 cm->interp_filter != BILINEAR) {
5136 int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ?
5137 SWITCHABLE_FILTERS : cm->interp_filter];
5138 int64_t adj_rd;
5139 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5140 if (ref == INT64_MAX)
5141 adj_rd = 0;
5142 else if (filter_cache[i] == INT64_MAX)
5143 // when early termination is triggered, the encoder does not have
5144 // access to the rate-distortion cost. it only knows that the cost
5145 // should be above the maximum valid value. hence it takes the known
5146 // maximum plus an arbitrary constant as the rate-distortion cost.
5147 adj_rd = mask_filter - ref + 10;
5148 else
5149 adj_rd = filter_cache[i] - ref;
5150
5151 adj_rd += this_rd;
James Zern5e16d392015-08-17 18:19:22 -07005152 best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
Jingning Han3ee6db62015-08-05 19:00:31 -07005153 }
5154 }
5155
5156 if (early_term)
5157 break;
5158
5159 if (x->skip && !comp_pred)
5160 break;
5161 }
5162
5163 if (best_rd >= best_rd_so_far) {
5164 rd_cost->rate = INT_MAX;
5165 rd_cost->rdcost = INT64_MAX;
5166 return;
5167 }
5168
5169 // If we used an estimate for the uv intra rd in the loop above...
5170 if (sf->use_uv_intra_rd_estimate) {
5171 // Do Intra UV best rd mode selection if best mode choice above was intra.
5172 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
5173 *mbmi = best_mbmode;
5174 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
5175 &rate_uv_tokenonly,
5176 &dist_uv,
5177 &skip_uv,
5178 BLOCK_8X8, TX_4X4);
5179 }
5180 }
5181
5182 if (best_rd == INT64_MAX) {
5183 rd_cost->rate = INT_MAX;
5184 rd_cost->dist = INT64_MAX;
5185 rd_cost->rdcost = INT64_MAX;
5186 return;
5187 }
5188
5189 assert((cm->interp_filter == SWITCHABLE) ||
5190 (cm->interp_filter == best_mbmode.interp_filter) ||
5191 !is_inter_block(&best_mbmode));
5192
5193 vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
5194 sf->adaptive_rd_thresh, bsize, best_ref_index);
5195
5196 // macroblock modes
5197 *mbmi = best_mbmode;
5198 x->skip |= best_skip2;
5199 if (!is_inter_block(&best_mbmode)) {
5200 for (i = 0; i < 4; i++)
5201 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
5202 } else {
5203 for (i = 0; i < 4; ++i)
5204 memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
5205
5206 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
5207 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
5208 }
5209
5210 for (i = 0; i < REFERENCE_MODES; ++i) {
5211 if (best_pred_rd[i] == INT64_MAX)
5212 best_pred_diff[i] = INT_MIN;
5213 else
5214 best_pred_diff[i] = best_rd - best_pred_rd[i];
5215 }
5216
5217 if (!x->skip) {
5218 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
5219 if (best_filter_rd[i] == INT64_MAX)
5220 best_filter_diff[i] = 0;
5221 else
5222 best_filter_diff[i] = best_rd - best_filter_rd[i];
5223 }
5224 if (cm->interp_filter == SWITCHABLE)
5225 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
5226 } else {
5227 vp10_zero(best_filter_diff);
5228 }
5229
5230 store_coding_context(x, ctx, best_ref_index,
5231 best_pred_diff, best_filter_diff, 0);
5232}