blob: adb8eac5ebe6589ad004df07cd92856573e31c9a [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
Yaowu Xuf883b422016-08-30 14:01:10 -070012#include "./av1_rtcd.h"
13#include "./aom_config.h"
14#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070015
Nathan E. Egge6675be02016-12-21 13:02:43 -050016#include "aom_dsp/bitwriter.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017#include "aom_dsp/quantize.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070018#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "aom_ports/mem.h"
20
21#include "av1/common/idct.h"
22#include "av1/common/reconinter.h"
23#include "av1/common/reconintra.h"
24#include "av1/common/scan.h"
25
Tom Finegan17ce8b12017-02-08 12:46:31 -080026#include "av1/encoder/av1_quantize.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070027#include "av1/encoder/encodemb.h"
Angie Chiang74e23072017-03-24 14:54:23 -070028#if CONFIG_LV_MAP
29#include "av1/encoder/encodetxb.h"
30#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070031#include "av1/encoder/hybrid_fwd_txfm.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070032#include "av1/encoder/rd.h"
33#include "av1/encoder/tokenize.h"
34
Yushin Cho77bba8d2016-11-04 16:36:56 -070035#if CONFIG_PVQ
36#include "av1/encoder/encint.h"
37#include "av1/common/partition.h"
38#include "av1/encoder/pvq_encoder.h"
39#endif
40
Luc Trudeaue3980282017-04-25 23:17:21 -040041#if CONFIG_CFL
42#include "av1/common/cfl.h"
43#endif
44
Jingning Hane325abd2016-12-01 09:35:10 -080045// Check if one needs to use c version subtraction.
46static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }
47
Angie Chiang19407b52017-04-02 15:27:57 -070048static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
49 int16_t *diff, ptrdiff_t diff_stride,
50 const uint8_t *src8, ptrdiff_t src_stride,
51 const uint8_t *pred8, ptrdiff_t pred_stride) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020052#if !CONFIG_HIGHBITDEPTH
Angie Chiang19407b52017-04-02 15:27:57 -070053 (void)xd;
54#endif
55
56 if (check_subtract_block_size(rows, cols)) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020057#if CONFIG_HIGHBITDEPTH
Angie Chiang19407b52017-04-02 15:27:57 -070058 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
59 aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
60 src_stride, pred8, pred_stride, xd->bd);
61 return;
62 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020063#endif // CONFIG_HIGHBITDEPTH
Angie Chiang19407b52017-04-02 15:27:57 -070064 aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
65 pred_stride);
66
67 return;
68 }
69
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020070#if CONFIG_HIGHBITDEPTH
Angie Chiang19407b52017-04-02 15:27:57 -070071 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
72 aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
73 pred8, pred_stride, xd->bd);
74 return;
75 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020076#endif // CONFIG_HIGHBITDEPTH
Angie Chiang19407b52017-04-02 15:27:57 -070077 aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
78 pred_stride);
79}
80
Angie Chiangf87e43f2017-04-02 16:51:19 -070081void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
82 int blk_col, int blk_row, TX_SIZE tx_size) {
83 MACROBLOCKD *const xd = &x->e_mbd;
84 struct macroblock_plane *const p = &x->plane[plane];
85 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
86 const int diff_stride = block_size_wide[plane_bsize];
87 const int src_stride = p->src.stride;
88 const int dst_stride = pd->dst.stride;
89 const int tx1d_width = tx_size_wide[tx_size];
90 const int tx1d_height = tx_size_high[tx_size];
91 uint8_t *dst =
92 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
93 uint8_t *src =
94 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
95 int16_t *src_diff =
96 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
97 subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
98 src_stride, dst, dst_stride);
99}
100
Yaowu Xuf883b422016-08-30 14:01:10 -0700101void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700102 struct macroblock_plane *const p = &x->plane[plane];
103 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
104 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Jingning Hanae5cfde2016-11-30 12:01:44 -0800105 const int bw = block_size_wide[plane_bsize];
106 const int bh = block_size_high[plane_bsize];
Angie Chiang19407b52017-04-02 15:27:57 -0700107 const MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700108
Angie Chiang19407b52017-04-02 15:27:57 -0700109 subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
110 pd->dst.buf, pd->dst.stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700111}
112
Yaowu Xuc27fc142016-08-22 16:08:15 -0700113// These numbers are empirically obtained.
114static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
Thomas Davies10525752017-03-06 12:10:46 +0000115#if CONFIG_EC_ADAPT
116 { 10, 7 }, { 8, 5 },
117#else
Angie Chiangebcdb252017-05-05 18:15:10 -0700118 { 10, 6 }, { 8, 6 },
Thomas Davies10525752017-03-06 12:10:46 +0000119#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700120};
121
122#define UPDATE_RD_COST() \
123 { \
124 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
125 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
126 }
127
Yaowu Xu42350322017-05-05 11:12:19 -0700128static INLINE unsigned int get_token_bit_costs(
129 unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
130 int ctx, int token) {
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000131#if CONFIG_NEW_TOKENSET
132 (void)skip_eob;
133 return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
134#else
135 return token_costs[skip_eob][ctx][token];
136#endif
137}
138
Dake He97f56642017-03-29 16:46:51 -0700139#define USE_GREEDY_OPTIMIZE_B 0
140
141#if USE_GREEDY_OPTIMIZE_B
142
Angie Chiang7dec6c42017-05-03 17:58:17 -0700143typedef struct av1_token_state_greedy {
Dake He97f56642017-03-29 16:46:51 -0700144 int16_t token;
145 tran_low_t qc;
146 tran_low_t dqc;
Angie Chiang7dec6c42017-05-03 17:58:17 -0700147} av1_token_state_greedy;
Dake He97f56642017-03-29 16:46:51 -0700148
Angie Chiang7dec6c42017-05-03 17:58:17 -0700149static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
150 int block, TX_SIZE tx_size, int ctx) {
Dake He97f56642017-03-29 16:46:51 -0700151 MACROBLOCKD *const xd = &mb->e_mbd;
152 struct macroblock_plane *const p = &mb->plane[plane];
153 struct macroblockd_plane *const pd = &xd->plane[plane];
154 const int ref = is_inter_block(&xd->mi[0]->mbmi);
Angie Chiang7dec6c42017-05-03 17:58:17 -0700155 av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
Dake He97f56642017-03-29 16:46:51 -0700156 uint8_t token_cache[MAX_TX_SQUARE];
157 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
158 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
159 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
160 const int eob = p->eobs[block];
161 const PLANE_TYPE plane_type = pd->plane_type;
162 const int16_t *const dequant_ptr = pd->dequant;
163 const uint8_t *const band_translate = get_band_translate(tx_size);
164 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
165 const SCAN_ORDER *const scan_order =
166 get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
167 const int16_t *const scan = scan_order->scan;
168 const int16_t *const nb = scan_order->neighbors;
169 int dqv;
Jingning Hanff705452017-04-27 11:32:15 -0700170 const int shift = av1_get_tx_scale(tx_size);
Dake He97f56642017-03-29 16:46:51 -0700171#if CONFIG_AOM_QM
172 int seg_id = xd->mi[0]->mbmi.segment_id;
173 const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
174#endif
175#if CONFIG_NEW_QUANT
176 int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
177 const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
178#elif !CONFIG_AOM_QM
179 const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
180#endif // CONFIG_NEW_QUANT
181 int sz = 0;
182 const int64_t rddiv = mb->rddiv;
183 int64_t rd_cost0, rd_cost1;
184 int16_t t0, t1;
185 int i, final_eob;
Dake He97f56642017-03-29 16:46:51 -0700186 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Dake He97f56642017-03-29 16:46:51 -0700187 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
188 mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
189 const int default_eob = tx_size_2d[tx_size];
190
Angie Chiang7dec6c42017-05-03 17:58:17 -0700191 assert(mb->qindex > 0);
Dake He97f56642017-03-29 16:46:51 -0700192
193 assert((!plane_type && !plane) || (plane_type && plane));
194 assert(eob <= default_eob);
195
196 int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
Dake He97f56642017-03-29 16:46:51 -0700197
198 int64_t rate0, rate1;
199 for (i = 0; i < eob; i++) {
200 const int rc = scan[i];
201 int x = qcoeff[rc];
202 t0 = av1_get_token(x);
203
204 tokens[i][0].qc = x;
205 tokens[i][0].token = t0;
206 tokens[i][0].dqc = dqcoeff[rc];
207
208 token_cache[rc] = av1_pt_energy_class[t0];
209 }
210 tokens[eob][0].token = EOB_TOKEN;
211 tokens[eob][0].qc = 0;
212 tokens[eob][0].dqc = 0;
213 tokens[eob][1] = tokens[eob][0];
214
215 unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
216 token_costs;
217
218 final_eob = 0;
219
220 int64_t eob_cost0, eob_cost1;
221
222 const int ctx0 = ctx;
223 /* Record the r-d cost */
224 int64_t accu_rate = 0;
225 int64_t accu_error = 0;
226
227 rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
228 EOB_TOKEN);
229 int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
230
231 // int64_t best_block_rd_cost_all0 = best_block_rd_cost;
232
233 int x_prev = 1;
234
235 for (i = 0; i < eob; i++) {
236 const int rc = scan[i];
237 int x = qcoeff[rc];
238 sz = -(x < 0);
239
240 int band_cur = band_translate[i];
241 int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
242 int token_tree_sel_cur = (x_prev == 0);
243
244 if (x == 0) {
245 // no need to search when x == 0
246 rate0 =
247 get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur,
248 ctx_cur, tokens[i][0].token);
249 accu_rate += rate0;
250 x_prev = 0;
251 // accu_error does not change when x==0
252 } else {
253 /* Computing distortion
254 */
255 // compute the distortion for the first candidate
256 // and the distortion for quantizing to 0.
257 int dx0 = (-coeff[rc]) * (1 << shift);
258#if CONFIG_HIGHBITDEPTH
259 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
260 dx0 >>= xd->bd - 8;
261 }
262#endif
263 int64_t d0 = (int64_t)dx0 * dx0;
264
265 int x_a = x - 2 * sz - 1;
266 int64_t d2, d2_a;
267
268 int dx;
269
270#if CONFIG_AOM_QM
271 int iwt = iqmatrix[rc];
272 dqv = dequant_ptr[rc != 0];
273 dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
274#else
275 dqv = dequant_ptr[rc != 0];
276#endif
277
278 dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
279#if CONFIG_HIGHBITDEPTH
280 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
281 dx >>= xd->bd - 8;
282 }
283#endif // CONFIG_HIGHBITDEPTH
284 d2 = (int64_t)dx * dx;
285
286 /* compute the distortion for the second candidate
287 * x_a = x - 2 * sz + 1;
288 */
289 if (x_a != 0) {
290#if CONFIG_NEW_QUANT
291 dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
292 (coeff[rc] << shift);
293#if CONFIG_HIGHBITDEPTH
294 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
295 dx >>= xd->bd - 8;
296 }
297#endif // CONFIG_HIGHBITDEPTH
298#else // CONFIG_NEW_QUANT
299#if CONFIG_HIGHBITDEPTH
300 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
301 dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
302 } else {
303 dx -= (dqv + sz) ^ sz;
304 }
305#else
306 dx -= (dqv + sz) ^ sz;
307#endif // CONFIG_HIGHBITDEPTH
308#endif // CONFIG_NEW_QUANT
309 d2_a = (int64_t)dx * dx;
310 } else {
311 d2_a = d0;
312 }
313 /* Computing rates and r-d cost
314 */
315
316 int best_x, best_eob_x;
317 int64_t base_bits, next_bits0, next_bits1;
318 int64_t next_eob_bits0, next_eob_bits1;
319
320 // rate cost of x
321 base_bits = av1_get_token_cost(x, &t0, cat6_bits);
322 rate0 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
323 token_tree_sel_cur, ctx_cur, t0);
324
325 base_bits = av1_get_token_cost(x_a, &t1, cat6_bits);
326 rate1 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
327 token_tree_sel_cur, ctx_cur, t1);
328
329 next_bits0 = 0;
330 next_bits1 = 0;
331 next_eob_bits0 = 0;
332 next_eob_bits1 = 0;
333
334 if (i < default_eob - 1) {
335 int ctx_next, token_tree_sel_next;
336 int band_next = band_translate[i + 1];
337
338 token_cache[rc] = av1_pt_energy_class[t0];
339 ctx_next = get_coef_context(nb, token_cache, i + 1);
340 token_tree_sel_next = (x == 0);
341
342 next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next),
343 token_tree_sel_next, ctx_next,
344 tokens[i + 1][0].token);
345 next_eob_bits0 =
346 get_token_bit_costs(*(token_costs_ptr + band_next),
347 token_tree_sel_next, ctx_next, EOB_TOKEN);
348
349 token_cache[rc] = av1_pt_energy_class[t1];
350 ctx_next = get_coef_context(nb, token_cache, i + 1);
351 token_tree_sel_next = (x_a == 0);
352
353 next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next),
354 token_tree_sel_next, ctx_next,
355 tokens[i + 1][0].token);
356
357 if (x_a != 0) {
358 next_eob_bits1 =
359 get_token_bit_costs(*(token_costs_ptr + band_next),
360 token_tree_sel_next, ctx_next, EOB_TOKEN);
361 }
362 }
363
364 rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2);
365 rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a);
366
367 best_x = (rd_cost1 < rd_cost0);
368
369 eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
370 (accu_error + d2 - d0));
371 eob_cost1 = eob_cost0;
372 if (x_a != 0) {
373 eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
374 (accu_error + d2_a - d0));
375 best_eob_x = (eob_cost1 < eob_cost0);
376 } else {
377 best_eob_x = 0;
378 }
379
380 int dqc, dqc_a = 0;
381
382 dqc = dqcoeff[rc];
383 if (best_x + best_eob_x) {
384 if (x_a != 0) {
385#if CONFIG_NEW_QUANT
386 dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
387 dequant_val[band_translate[i]]);
388 dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a;
389 if (sz) dqc_a = -dqc_a;
390#else
391// The 32x32 transform coefficient uses half quantization step size.
392// Account for the rounding difference in the dequantized coefficeint
393// value when the quantization index is dropped from an even number
394// to an odd number.
395
396#if CONFIG_AOM_QM
397 tran_low_t offset = dqv >> shift;
398#else
399 tran_low_t offset = dq_step[rc != 0];
400#endif
401 if (shift & x_a) offset += (dqv & 0x01);
402
403 if (sz == 0)
404 dqc_a = dqcoeff[rc] - offset;
405 else
406 dqc_a = dqcoeff[rc] + offset;
407#endif // CONFIG_NEW_QUANT
408 } else {
409 dqc_a = 0;
410 } // if (x_a != 0)
411 }
412
413 // record the better quantized value
414 if (best_x) {
415 qcoeff[rc] = x_a;
416 dqcoeff[rc] = dqc_a;
417
418 accu_rate += rate1;
419 accu_error += d2_a - d0;
420 assert(d2_a <= d0);
421
422 token_cache[rc] = av1_pt_energy_class[t1];
423 } else {
424 accu_rate += rate0;
425 accu_error += d2 - d0;
426 assert(d2 <= d0);
427
428 token_cache[rc] = av1_pt_energy_class[t0];
429 }
430
431 x_prev = qcoeff[rc];
432
433 // determine whether to move the eob position to i+1
434 int64_t best_eob_cost_i = eob_cost0;
435
436 tokens[i][1].token = t0;
437 tokens[i][1].qc = x;
438 tokens[i][1].dqc = dqc;
439
440 if ((x_a != 0) && (best_eob_x)) {
441 best_eob_cost_i = eob_cost1;
442
443 tokens[i][1].token = t1;
444 tokens[i][1].qc = x_a;
445 tokens[i][1].dqc = dqc_a;
446 }
447
448 if (best_eob_cost_i < best_block_rd_cost) {
449 best_block_rd_cost = best_eob_cost_i;
450 final_eob = i + 1;
451 }
452 } // if (x==0)
453 } // for (i)
454
455 assert(final_eob <= eob);
456 if (final_eob > 0) {
457 assert(tokens[final_eob - 1][1].qc != 0);
458 i = final_eob - 1;
459 int rc = scan[i];
460 qcoeff[rc] = tokens[i][1].qc;
461 dqcoeff[rc] = tokens[i][1].dqc;
462 }
463
464 for (i = final_eob; i < eob; i++) {
465 int rc = scan[i];
466 qcoeff[rc] = 0;
467 dqcoeff[rc] = 0;
468 }
469
470 mb->plane[plane].eobs[block] = final_eob;
471 return final_eob;
Dake He97f56642017-03-29 16:46:51 -0700472}
473
474#else // USE_GREEDY_OPTIMIZE_B
475
Angie Chiang7dec6c42017-05-03 17:58:17 -0700476typedef struct av1_token_state_org {
Dake He97f56642017-03-29 16:46:51 -0700477 int64_t error;
478 int rate;
479 int16_t next;
480 int16_t token;
481 tran_low_t qc;
482 tran_low_t dqc;
483 uint8_t best_index;
Angie Chiang7dec6c42017-05-03 17:58:17 -0700484} av1_token_state_org;
Dake He97f56642017-03-29 16:46:51 -0700485
Angie Chiang7dec6c42017-05-03 17:58:17 -0700486static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
487 int block, TX_SIZE tx_size, int ctx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700488 MACROBLOCKD *const xd = &mb->e_mbd;
489 struct macroblock_plane *const p = &mb->plane[plane];
490 struct macroblockd_plane *const pd = &xd->plane[plane];
491 const int ref = is_inter_block(&xd->mi[0]->mbmi);
Angie Chiang7dec6c42017-05-03 17:58:17 -0700492 av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700493 uint8_t token_cache[MAX_TX_SQUARE];
Urvang Joshifeb925f2016-12-05 10:37:29 -0800494 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700495 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
496 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
497 const int eob = p->eobs[block];
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700498 const PLANE_TYPE plane_type = pd->plane_type;
Jingning Hande953b92016-10-25 12:35:43 -0700499 const int default_eob = tx_size_2d[tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700500 const int16_t *const dequant_ptr = pd->dequant;
501 const uint8_t *const band_translate = get_band_translate(tx_size);
Angie Chiang752ccce2017-04-09 13:41:13 -0700502 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
Urvang Joshi03f6fdc2016-10-14 15:53:39 -0700503 const SCAN_ORDER *const scan_order =
Angie Chiangff6d8902016-10-21 11:02:09 -0700504 get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
Urvang Joshi03f6fdc2016-10-14 15:53:39 -0700505 const int16_t *const scan = scan_order->scan;
506 const int16_t *const nb = scan_order->neighbors;
Thomas Daviese0f8c552016-11-15 17:28:03 +0000507 int dqv;
Jingning Hanff705452017-04-27 11:32:15 -0700508 const int shift = av1_get_tx_scale(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700509#if CONFIG_AOM_QM
510 int seg_id = xd->mi[0]->mbmi.segment_id;
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700511 const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700512#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700513#if CONFIG_NEW_QUANT
David Barkerd7d78c82016-10-24 10:55:35 +0100514 int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700515 const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800516#elif !CONFIG_AOM_QM
Yaowu Xuc27fc142016-08-22 16:08:15 -0700517 const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
518#endif // CONFIG_NEW_QUANT
519 int next = eob, sz = 0;
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700520 const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700521 const int64_t rddiv = mb->rddiv;
522 int64_t rd_cost0, rd_cost1;
523 int rate0, rate1;
524 int64_t error0, error1;
525 int16_t t0, t1;
526 int best, band = (eob < default_eob) ? band_translate[eob]
527 : band_translate[eob - 1];
528 int pt, i, final_eob;
Alex Converseda3d94f2017-03-15 14:54:29 -0700529 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700530 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700531 mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700532 const uint16_t *band_counts = &band_count_table[tx_size][band];
533 uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
534 int shortcut = 0;
535 int next_shortcut = 0;
536
Fangwen Fu6160df22017-04-24 09:45:51 -0700537#if CONFIG_EXT_DELTA_Q
538 const int qindex = cm->seg.enabled
539 ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
540 cm->base_qindex)
541 : cm->base_qindex;
Angie Chiang7dec6c42017-05-03 17:58:17 -0700542 assert(qindex > 0);
543 (void)qindex;
Fangwen Fu6160df22017-04-24 09:45:51 -0700544#else
Angie Chiang7dec6c42017-05-03 17:58:17 -0700545 assert(mb->qindex > 0);
Fangwen Fu6160df22017-04-24 09:45:51 -0700546#endif
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700547
Yaowu Xuc27fc142016-08-22 16:08:15 -0700548 token_costs += band;
549
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700550 assert((!plane_type && !plane) || (plane_type && plane));
Yaowu Xuc27fc142016-08-22 16:08:15 -0700551 assert(eob <= default_eob);
552
553 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
554 /* Initialize the sentinel node of the trellis. */
555 tokens[eob][0].rate = 0;
556 tokens[eob][0].error = 0;
557 tokens[eob][0].next = default_eob;
558 tokens[eob][0].token = EOB_TOKEN;
559 tokens[eob][0].qc = 0;
560 tokens[eob][1] = tokens[eob][0];
561
562 for (i = 0; i < eob; i++) {
563 const int rc = scan[i];
Alex Converseda3d94f2017-03-15 14:54:29 -0700564 tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700565 tokens[i][0].token = t0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700566 token_cache[rc] = av1_pt_energy_class[t0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700567 }
568
569 for (i = eob; i-- > 0;) {
570 int base_bits, dx;
571 int64_t d2;
572 const int rc = scan[i];
Thomas Daviese0f8c552016-11-15 17:28:03 +0000573 int x = qcoeff[rc];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700574#if CONFIG_AOM_QM
575 int iwt = iqmatrix[rc];
Thomas Daviese0f8c552016-11-15 17:28:03 +0000576 dqv = dequant_ptr[rc != 0];
577 dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
578#else
579 dqv = dequant_ptr[rc != 0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700580#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700581 next_shortcut = shortcut;
582
583 /* Only add a trellis state for non-zero coefficients. */
584 if (UNLIKELY(x)) {
585 error0 = tokens[next][0].error;
586 error1 = tokens[next][1].error;
587 /* Evaluate the first possibility for this state. */
588 rate0 = tokens[next][0].rate;
589 rate1 = tokens[next][1].rate;
590
591 if (next_shortcut) {
592 /* Consider both possible successor states. */
593 if (next < default_eob) {
594 pt = get_coef_context(nb, token_cache, i + 1);
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000595 rate0 +=
596 get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
597 rate1 +=
598 get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700599 }
600 UPDATE_RD_COST();
601 /* And pick the best. */
602 best = rd_cost1 < rd_cost0;
603 } else {
604 if (next < default_eob) {
605 pt = get_coef_context(nb, token_cache, i + 1);
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000606 rate0 +=
607 get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700608 }
609 best = 0;
610 }
611
612 dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200613#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700614 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
615 dx >>= xd->bd - 8;
616 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200617#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700618 d2 = (int64_t)dx * dx;
619 tokens[i][0].rate += (best ? rate1 : rate0);
620 tokens[i][0].error = d2 + (best ? error1 : error0);
621 tokens[i][0].next = next;
622 tokens[i][0].qc = x;
623 tokens[i][0].dqc = dqcoeff[rc];
Alex Conversec9d2fcc2017-03-17 17:32:56 -0700624 tokens[i][0].best_index = best;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700625
626 /* Evaluate the second possibility for this state. */
627 rate0 = tokens[next][0].rate;
628 rate1 = tokens[next][1].rate;
629
630 // The threshold of 3 is empirically obtained.
631 if (UNLIKELY(abs(x) > 3)) {
632 shortcut = 0;
633 } else {
634#if CONFIG_NEW_QUANT
Thomas Daviese0f8c552016-11-15 17:28:03 +0000635 shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700636 dequant_val[band_translate[i]]) >
Yaowu Xuc27fc142016-08-22 16:08:15 -0700637 (abs(coeff[rc]) << shift)) &&
Thomas Daviese0f8c552016-11-15 17:28:03 +0000638 (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv,
Yaowu Xuf883b422016-08-30 14:01:10 -0700639 dequant_val[band_translate[i]]) <
Yaowu Xuc27fc142016-08-22 16:08:15 -0700640 (abs(coeff[rc]) << shift)));
641#else // CONFIG_NEW_QUANT
642#if CONFIG_AOM_QM
643 if ((abs(x) * dequant_ptr[rc != 0] * iwt >
644 ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
645 (abs(x) * dequant_ptr[rc != 0] * iwt <
646 (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
647 << AOM_QM_BITS)))
648#else
649 if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
650 (abs(x) * dequant_ptr[rc != 0] <
651 (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
652#endif // CONFIG_AOM_QM
653 shortcut = 1;
654 else
655 shortcut = 0;
656#endif // CONFIG_NEW_QUANT
657 }
658
659 if (shortcut) {
660 sz = -(x < 0);
661 x -= 2 * sz + 1;
662 } else {
663 tokens[i][1] = tokens[i][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700664 next = i;
665
666 if (UNLIKELY(!(--band_left))) {
667 --band_counts;
668 band_left = *band_counts;
669 --token_costs;
670 }
671 continue;
672 }
673
674 /* Consider both possible successor states. */
675 if (!x) {
676 /* If we reduced this coefficient to zero, check to see if
677 * we need to move the EOB back here.
678 */
679 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
680 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
681 base_bits = 0;
682 } else {
Alex Converseda3d94f2017-03-15 14:54:29 -0700683 base_bits = av1_get_token_cost(x, &t0, cat6_bits);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700684 t1 = t0;
685 }
686
687 if (next_shortcut) {
688 if (LIKELY(next < default_eob)) {
689 if (t0 != EOB_TOKEN) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700690 token_cache[rc] = av1_pt_energy_class[t0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700691 pt = get_coef_context(nb, token_cache, i + 1);
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000692 rate0 += get_token_bit_costs(*token_costs, !x, pt,
693 tokens[next][0].token);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700694 }
695 if (t1 != EOB_TOKEN) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700696 token_cache[rc] = av1_pt_energy_class[t1];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700697 pt = get_coef_context(nb, token_cache, i + 1);
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000698 rate1 += get_token_bit_costs(*token_costs, !x, pt,
699 tokens[next][1].token);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700700 }
701 }
702
703 UPDATE_RD_COST();
704 /* And pick the best. */
705 best = rd_cost1 < rd_cost0;
706 } else {
707 // The two states in next stage are identical.
708 if (next < default_eob && t0 != EOB_TOKEN) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700709 token_cache[rc] = av1_pt_energy_class[t0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700710 pt = get_coef_context(nb, token_cache, i + 1);
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000711 rate0 +=
712 get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700713 }
714 best = 0;
715 }
716
717#if CONFIG_NEW_QUANT
Thomas Daviese0f8c552016-11-15 17:28:03 +0000718 dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
Yaowu Xuc27fc142016-08-22 16:08:15 -0700719 (coeff[rc] << shift);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200720#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700721 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
722 dx >>= xd->bd - 8;
723 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200724#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700725#else // CONFIG_NEW_QUANT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200726#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700727 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Thomas Daviese0f8c552016-11-15 17:28:03 +0000728 dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700729 } else {
Thomas Daviese0f8c552016-11-15 17:28:03 +0000730 dx -= (dqv + sz) ^ sz;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700731 }
732#else
Thomas Daviese0f8c552016-11-15 17:28:03 +0000733 dx -= (dqv + sz) ^ sz;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200734#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700735#endif // CONFIG_NEW_QUANT
736 d2 = (int64_t)dx * dx;
737
738 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
739 tokens[i][1].error = d2 + (best ? error1 : error0);
740 tokens[i][1].next = next;
741 tokens[i][1].token = best ? t1 : t0;
742 tokens[i][1].qc = x;
743
744 if (x) {
745#if CONFIG_NEW_QUANT
Yaowu Xuf883b422016-08-30 14:01:10 -0700746 tokens[i][1].dqc = av1_dequant_abscoeff_nuq(
Thomas Daviese0f8c552016-11-15 17:28:03 +0000747 abs(x), dqv, dequant_val[band_translate[i]]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700748 tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
749 : tokens[i][1].dqc;
750 if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
751#else
Debargha Mukherjeeb98a7022016-11-15 16:07:12 -0800752// The 32x32 transform coefficient uses half quantization step size.
753// Account for the rounding difference in the dequantized coefficeint
754// value when the quantization index is dropped from an even number
755// to an odd number.
Thomas Daviese0f8c552016-11-15 17:28:03 +0000756
757#if CONFIG_AOM_QM
758 tran_low_t offset = dqv >> shift;
759#else
760 tran_low_t offset = dq_step[rc != 0];
761#endif
762 if (shift & x) offset += (dqv & 0x01);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700763
764 if (sz == 0)
765 tokens[i][1].dqc = dqcoeff[rc] - offset;
766 else
767 tokens[i][1].dqc = dqcoeff[rc] + offset;
768#endif // CONFIG_NEW_QUANT
769 } else {
770 tokens[i][1].dqc = 0;
771 }
772
Alex Conversec9d2fcc2017-03-17 17:32:56 -0700773 tokens[i][1].best_index = best;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700774 /* Finally, make this the new head of the trellis. */
775 next = i;
776 } else {
777 /* There's no choice to make for a zero coefficient, so we don't
778 * add a new trellis node, but we do need to update the costs.
779 */
780 t0 = tokens[next][0].token;
781 t1 = tokens[next][1].token;
782 pt = get_coef_context(nb, token_cache, i + 1);
783 /* Update the cost of each path if we're past the EOB token. */
784 if (t0 != EOB_TOKEN) {
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000785 tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700786 tokens[next][0].token = ZERO_TOKEN;
787 }
788 if (t1 != EOB_TOKEN) {
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000789 tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700790 tokens[next][1].token = ZERO_TOKEN;
791 }
Alex Conversec9d2fcc2017-03-17 17:32:56 -0700792 tokens[i][0].best_index = tokens[i][1].best_index = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700793 shortcut = (tokens[next][0].rate != tokens[next][1].rate);
794 /* Don't update next, because we didn't add a new node. */
795 }
796
797 if (UNLIKELY(!(--band_left))) {
798 --band_counts;
799 band_left = *band_counts;
800 --token_costs;
801 }
802 }
803
804 /* Now pick the best path through the whole trellis. */
805 rate0 = tokens[next][0].rate;
806 rate1 = tokens[next][1].rate;
807 error0 = tokens[next][0].error;
808 error1 = tokens[next][1].error;
809 t0 = tokens[next][0].token;
810 t1 = tokens[next][1].token;
Thomas Daviesed8e2d22017-01-04 16:42:09 +0000811 rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0);
812 rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700813 UPDATE_RD_COST();
814 best = rd_cost1 < rd_cost0;
815
816 final_eob = -1;
817
818 for (i = next; i < eob; i = next) {
819 const int x = tokens[i][best].qc;
820 const int rc = scan[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700821 if (x) final_eob = i;
822 qcoeff[rc] = x;
823 dqcoeff[rc] = tokens[i][best].dqc;
824
825 next = tokens[i][best].next;
Alex Conversec9d2fcc2017-03-17 17:32:56 -0700826 best = tokens[i][best].best_index;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700827 }
828 final_eob++;
829
830 mb->plane[plane].eobs[block] = final_eob;
831 assert(final_eob <= default_eob);
832 return final_eob;
833}
834
Dake He97f56642017-03-29 16:46:51 -0700835#endif // USE_GREEDY_OPTIMIZE_B
836
Angie Chiang7dec6c42017-05-03 17:58:17 -0700837int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
838 TX_SIZE tx_size, int ctx) {
839 MACROBLOCKD *const xd = &mb->e_mbd;
840 struct macroblock_plane *const p = &mb->plane[plane];
841 const int eob = p->eobs[block];
842 assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
843 if (eob == 0) return eob;
844 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
845#if CONFIG_PVQ
846 (void)cm;
847 (void)tx_size;
848 (void)ctx;
849 return eob;
850#endif
851
852#if USE_GREEDY_OPTIMIZE_B
853 return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
854#else // USE_GREEDY_OPTIMIZE_B
855 return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
856#endif // USE_GREEDY_OPTIMIZE_B
857}
858
Thomas Daede6ff6af62017-02-03 16:29:24 -0800859#if !CONFIG_PVQ
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200860#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700861typedef enum QUANT_FUNC {
862 QUANT_FUNC_LOWBD = 0,
863 QUANT_FUNC_HIGHBD = 1,
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800864 QUANT_FUNC_TYPES = 2
Yaowu Xuc27fc142016-08-22 16:08:15 -0700865} QUANT_FUNC;
866
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800867static AV1_QUANT_FACADE
868 quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
Angie Chiang6a71ad12017-04-03 11:19:00 -0700869#if !CONFIG_NEW_QUANT
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800870 { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
Yaowu Xuf883b422016-08-30 14:01:10 -0700871 { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
872 { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
Angie Chiang6a71ad12017-04-03 11:19:00 -0700873#else // !CONFIG_NEW_QUANT
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800874 { av1_quantize_fp_nuq_facade, av1_highbd_quantize_fp_nuq_facade },
875 { av1_quantize_b_nuq_facade, av1_highbd_quantize_b_nuq_facade },
876 { av1_quantize_dc_nuq_facade, av1_highbd_quantize_dc_nuq_facade },
Angie Chiang6a71ad12017-04-03 11:19:00 -0700877#endif // !CONFIG_NEW_QUANT
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800878 { NULL, NULL }
879 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700880
Thomas Daede6ff6af62017-02-03 16:29:24 -0800881#else
Yaowu Xu02d4c3b2016-11-07 10:45:56 -0800882
Yaowu Xuc27fc142016-08-22 16:08:15 -0700883typedef enum QUANT_FUNC {
884 QUANT_FUNC_LOWBD = 0,
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800885 QUANT_FUNC_TYPES = 1
Yaowu Xuc27fc142016-08-22 16:08:15 -0700886} QUANT_FUNC;
887
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800888static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
889 [QUANT_FUNC_TYPES] = {
Angie Chiang6a71ad12017-04-03 11:19:00 -0700890#if !CONFIG_NEW_QUANT
clang-format67948d32016-09-07 22:40:40 -0700891 { av1_quantize_fp_facade },
892 { av1_quantize_b_facade },
893 { av1_quantize_dc_facade },
Angie Chiang6a71ad12017-04-03 11:19:00 -0700894#else // !CONFIG_NEW_QUANT
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800895 { av1_quantize_fp_nuq_facade },
896 { av1_quantize_b_nuq_facade },
897 { av1_quantize_dc_nuq_facade },
Angie Chiang6a71ad12017-04-03 11:19:00 -0700898#endif // !CONFIG_NEW_QUANT
clang-format67948d32016-09-07 22:40:40 -0700899 { NULL }
900 };
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200901#endif // CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -0800902#endif // CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -0700903
Angie Chiangff6d8902016-10-21 11:02:09 -0700904void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
905 int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800906 TX_SIZE tx_size, int ctx,
907 AV1_XFORM_QUANT xform_quant_idx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700908 MACROBLOCKD *const xd = &x->e_mbd;
Luc Trudeaub6e94d92017-03-13 15:46:15 -0400909 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800910#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700911 const struct macroblock_plane *const p = &x->plane[plane];
912 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Cho77bba8d2016-11-04 16:36:56 -0700913#else
914 struct macroblock_plane *const p = &x->plane[plane];
915 struct macroblockd_plane *const pd = &xd->plane[plane];
916#endif
Luc Trudeau005feb62017-02-22 13:34:01 -0500917 PLANE_TYPE plane_type = get_plane_type(plane);
Angie Chiang752ccce2017-04-09 13:41:13 -0700918 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
Luc Trudeaub6e94d92017-03-13 15:46:15 -0400919 const int is_inter = is_inter_block(mbmi);
Angie Chiangff6d8902016-10-21 11:02:09 -0700920 const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700921 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
922 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
923 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
924 uint16_t *const eob = &p->eobs[block];
Jingning Hanae5cfde2016-11-30 12:01:44 -0800925 const int diff_stride = block_size_wide[plane_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700926#if CONFIG_AOM_QM
Luc Trudeaub6e94d92017-03-13 15:46:15 -0400927 int seg_id = mbmi->segment_id;
Debargha Mukherjee3c42c092016-09-29 09:17:36 -0700928 const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
929 const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700930#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -0700931
932 FWD_TXFM_PARAM fwd_txfm_param;
Yushin Cho77bba8d2016-11-04 16:36:56 -0700933
Yushin Cho7a428ba2017-01-12 16:28:49 -0800934#if CONFIG_PVQ || CONFIG_DAALA_DIST
935 uint8_t *dst;
936 int16_t *pred;
937 const int dst_stride = pd->dst.stride;
938 int tx_blk_size;
939 int i, j;
940#endif
941
Yushin Cho77bba8d2016-11-04 16:36:56 -0700942#if !CONFIG_PVQ
943 const int tx2d_size = tx_size_2d[tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700944 QUANT_PARAM qparam;
Yushin Cho77bba8d2016-11-04 16:36:56 -0700945 const int16_t *src_diff;
946
Jingning Han81492262016-12-05 15:48:47 -0800947 src_diff =
948 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Jingning Hanff705452017-04-27 11:32:15 -0700949 qparam.log_scale = av1_get_tx_scale(tx_size);
Debargha Mukherjeef0305582016-11-24 09:55:34 -0800950#if CONFIG_NEW_QUANT
951 qparam.tx_size = tx_size;
952 qparam.dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
953#endif // CONFIG_NEW_QUANT
954#if CONFIG_AOM_QM
955 qparam.qmatrix = qmatrix;
956 qparam.iqmatrix = iqmatrix;
957#endif // CONFIG_AOM_QM
Yushin Cho77bba8d2016-11-04 16:36:56 -0700958#else
Yushin Cho77bba8d2016-11-04 16:36:56 -0700959 tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
Yushin Cho77bba8d2016-11-04 16:36:56 -0700960 int skip = 1;
961 PVQ_INFO *pvq_info = NULL;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800962 uint8_t *src;
963 int16_t *src_int16;
964 const int src_stride = p->src.stride;
Yushin Cho77bba8d2016-11-04 16:36:56 -0700965
Yushin Cho6341f5c2017-03-24 14:36:28 -0700966 (void)ctx;
Yushin Cho77bba8d2016-11-04 16:36:56 -0700967 (void)scan_order;
968 (void)qcoeff;
969
970 if (x->pvq_coded) {
971 assert(block < MAX_PVQ_BLOCKS_IN_SB);
972 pvq_info = &x->pvq[block][plane];
973 }
Jingning Han81492262016-12-05 15:48:47 -0800974 src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
975 src_int16 =
976 &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho7a428ba2017-01-12 16:28:49 -0800977
978 // transform block size in pixels
979 tx_blk_size = tx_size_wide[tx_size];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200980#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -0800981 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
982 for (j = 0; j < tx_blk_size; j++)
983 for (i = 0; i < tx_blk_size; i++)
984 src_int16[diff_stride * j + i] =
985 CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
986 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200987#endif // CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -0800988 for (j = 0; j < tx_blk_size; j++)
989 for (i = 0; i < tx_blk_size; i++)
990 src_int16[diff_stride * j + i] = src[src_stride * j + i];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200991#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -0800992 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200993#endif // CONFIG_HIGHBITDEPTH
Yushin Cho7a428ba2017-01-12 16:28:49 -0800994#endif
995
996#if CONFIG_PVQ || CONFIG_DAALA_DIST
997 dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Jingning Han81492262016-12-05 15:48:47 -0800998 pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho77bba8d2016-11-04 16:36:56 -0700999
1000 // transform block size in pixels
1001 tx_blk_size = tx_size_wide[tx_size];
1002
Thomas Daede6ff6af62017-02-03 16:29:24 -08001003// copy uint8 orig and predicted block to int16 buffer
1004// in order to use existing VP10 transform functions
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001005#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001006 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1007 for (j = 0; j < tx_blk_size; j++)
1008 for (i = 0; i < tx_blk_size; i++)
1009 pred[diff_stride * j + i] =
1010 CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
1011 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001012#endif // CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001013 for (j = 0; j < tx_blk_size; j++)
1014 for (i = 0; i < tx_blk_size; i++)
1015 pred[diff_stride * j + i] = dst[dst_stride * j + i];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001016#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001017 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001018#endif // CONFIG_HIGHBITDEPTH
Yushin Cho77bba8d2016-11-04 16:36:56 -07001019#endif
Yushin Cho7a428ba2017-01-12 16:28:49 -08001020
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001021 (void)ctx;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001022
1023 fwd_txfm_param.tx_type = tx_type;
1024 fwd_txfm_param.tx_size = tx_size;
Luc Trudeaub6e94d92017-03-13 15:46:15 -04001025 fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001026
Thomas Daede6ff6af62017-02-03 16:29:24 -08001027#if !CONFIG_PVQ
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001028#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029 fwd_txfm_param.bd = xd->bd;
1030 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
hui suf11fb882017-03-27 14:56:33 -07001031 av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
Yaowu Xuf883b422016-08-30 14:01:10 -07001032 if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001033 if (LIKELY(!x->skip_block)) {
1034 quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001035 coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001036 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001037 av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001038 }
1039 }
Angie Chiang74e23072017-03-24 14:54:23 -07001040#if CONFIG_LV_MAP
1041 p->txb_entropy_ctx[block] =
1042 (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
1043#endif // CONFIG_LV_MAP
Yaowu Xuc27fc142016-08-22 16:08:15 -07001044 return;
1045 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001046#endif // CONFIG_HIGHBITDEPTH
hui suf11fb882017-03-27 14:56:33 -07001047 av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
Yaowu Xuf883b422016-08-30 14:01:10 -07001048 if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001049 if (LIKELY(!x->skip_block)) {
1050 quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001051 coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001052 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001053 av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001054 }
1055 }
Angie Chiang74e23072017-03-24 14:54:23 -07001056#if CONFIG_LV_MAP
1057 p->txb_entropy_ctx[block] =
1058 (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
1059#endif // CONFIG_LV_MAP
1060#else // #if !CONFIG_PVQ
Angie Chiang2cc057c2017-01-03 18:31:47 -08001061 (void)xform_quant_idx;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001062#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001063 fwd_txfm_param.bd = xd->bd;
1064 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
hui suf11fb882017-03-27 14:56:33 -07001065 av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
1066 av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
Thomas Daede6ff6af62017-02-03 16:29:24 -08001067 } else {
1068#endif
hui suf11fb882017-03-27 14:56:33 -07001069 av1_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
1070 av1_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001071#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001072 }
1073#endif
Yushin Cho77bba8d2016-11-04 16:36:56 -07001074
1075 // PVQ for inter mode block
ltrudeau472f63f2017-01-12 15:22:32 -05001076 if (!x->skip_block) {
ltrudeaue1c09292017-01-20 15:42:13 -05001077 PVQ_SKIP_TYPE ac_dc_coded =
Thomas Daede6ff6af62017-02-03 16:29:24 -08001078 av1_pvq_encode_helper(x,
ltrudeaue1c09292017-01-20 15:42:13 -05001079 coeff, // target original vector
1080 ref_coeff, // reference vector
1081 dqcoeff, // de-quantized vector
1082 eob, // End of Block marker
1083 pd->dequant, // aom's quantizers
1084 plane, // image plane
1085 tx_size, // block size in log_2 - 2
1086 tx_type,
1087 &x->rate, // rate measured
1088 x->pvq_speed,
1089 pvq_info); // PVQ info for a block
1090 skip = ac_dc_coded == PVQ_SKIP;
ltrudeau472f63f2017-01-12 15:22:32 -05001091 }
Yushin Cho77bba8d2016-11-04 16:36:56 -07001092 x->pvq_skip[plane] = skip;
1093
1094 if (!skip) mbmi->skip = 0;
1095#endif // #if !CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -07001096}
1097
Yaowu Xuc27fc142016-08-22 16:08:15 -07001098static void encode_block(int plane, int block, int blk_row, int blk_col,
1099 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
1100 struct encode_b_args *const args = arg;
Angie Chiangff6d8902016-10-21 11:02:09 -07001101 AV1_COMMON *cm = args->cm;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001102 MACROBLOCK *const x = args->x;
1103 MACROBLOCKD *const xd = &x->e_mbd;
1104 int ctx;
1105 struct macroblock_plane *const p = &x->plane[plane];
1106 struct macroblockd_plane *const pd = &xd->plane[plane];
1107 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1108 uint8_t *dst;
Yushin Cho6341f5c2017-03-24 14:36:28 -07001109#if !CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -07001110 ENTROPY_CONTEXT *a, *l;
Yushin Cho6341f5c2017-03-24 14:36:28 -07001111#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001112#if CONFIG_VAR_TX
Jingning Han9ca05b72017-01-03 14:41:36 -08001113 int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001114#endif
Jingning Han81492262016-12-05 15:48:47 -08001115 dst = &pd->dst
1116 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho6341f5c2017-03-24 14:36:28 -07001117
1118#if !CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -07001119 a = &args->ta[blk_col];
1120 l = &args->tl[blk_row];
1121#if CONFIG_VAR_TX
1122 ctx = get_entropy_context(tx_size, a, l);
1123#else
1124 ctx = combine_entropy_contexts(*a, *l);
1125#endif
Yushin Cho6341f5c2017-03-24 14:36:28 -07001126#else
1127 ctx = 0;
1128#endif // CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -07001129
1130#if CONFIG_VAR_TX
Yue Chena1e48dc2016-08-29 17:29:33 -07001131 // Assert not magic number (uninitialized).
Jingning Han9ca05b72017-01-03 14:41:36 -08001132 assert(x->blk_skip[plane][blk_row * bw + blk_col] != 234);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001133
Jingning Han9ca05b72017-01-03 14:41:36 -08001134 if (x->blk_skip[plane][blk_row * bw + blk_col] == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001135#else
1136 {
1137#endif
Angie Chiangff6d8902016-10-21 11:02:09 -07001138 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001139 ctx, AV1_XFORM_QUANT_FP);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001140 }
1141#if CONFIG_VAR_TX
1142 else {
1143 p->eobs[block] = 0;
1144 }
1145#endif
Yushin Cho6341f5c2017-03-24 14:36:28 -07001146
Yushin Cho77bba8d2016-11-04 16:36:56 -07001147#if !CONFIG_PVQ
Angie Chiang7dec6c42017-05-03 17:58:17 -07001148 av1_optimize_b(cm, x, plane, block, tx_size, ctx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001149
Angie Chiangdb0c7d42017-03-23 16:05:37 -07001150 av1_set_txb_context(x, plane, block, tx_size, a, l);
1151
Yaowu Xuc27fc142016-08-22 16:08:15 -07001152 if (p->eobs[block]) *(args->skip) = 0;
1153
1154 if (p->eobs[block] == 0) return;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001155#else
1156 (void)ctx;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001157 if (!x->pvq_skip[plane]) *(args->skip) = 0;
1158
1159 if (x->pvq_skip[plane]) return;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001160#endif
Angie Chiang752ccce2017-04-09 13:41:13 -07001161 TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
Angie Chiang44d7e662017-04-06 11:07:53 -07001162 av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst,
1163 pd->dst.stride, p->eobs[block]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001164}
1165
1166#if CONFIG_VAR_TX
1167static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
1168 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1169 void *arg) {
1170 struct encode_b_args *const args = arg;
1171 MACROBLOCK *const x = args->x;
1172 MACROBLOCKD *const xd = &x->e_mbd;
1173 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1174 const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
1175 const struct macroblockd_plane *const pd = &xd->plane[plane];
1176 const int tx_row = blk_row >> (1 - pd->subsampling_y);
1177 const int tx_col = blk_col >> (1 - pd->subsampling_x);
1178 TX_SIZE plane_tx_size;
Jingning Hanf65b8702016-10-31 12:13:20 -07001179 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
1180 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001181
1182 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
1183
Debargha Mukherjee2f123402016-08-30 17:43:38 -07001184 plane_tx_size =
1185 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
1186 : mbmi->inter_tx_size[tx_row][tx_col];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001187
1188 if (tx_size == plane_tx_size) {
1189 encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
1190 } else {
Jingning Hana9336322016-11-02 15:45:07 -07001191 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
1192 // This is the square transform block partition entry point.
1193 int bsl = tx_size_wide_unit[sub_txs];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001194 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001195 assert(bsl > 0);
Jingning Hand3fada82016-11-22 10:46:55 -08001196 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001197
1198 for (i = 0; i < 4; ++i) {
Jingning Hande953b92016-10-25 12:35:43 -07001199 const int offsetr = blk_row + ((i >> 1) * bsl);
1200 const int offsetc = blk_col + ((i & 0x01) * bsl);
Jingning Hande953b92016-10-25 12:35:43 -07001201 int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001202
1203 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
1204
Jingning Han98d6a1f2016-11-03 12:47:47 -07001205 encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
1206 arg);
1207 block += step;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001208 }
1209 }
1210}
1211#endif
1212
Angie Chiangff6d8902016-10-21 11:02:09 -07001213typedef struct encode_block_pass1_args {
1214 AV1_COMMON *cm;
1215 MACROBLOCK *x;
1216} encode_block_pass1_args;
1217
Yaowu Xuc27fc142016-08-22 16:08:15 -07001218static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
1219 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1220 void *arg) {
Angie Chiangff6d8902016-10-21 11:02:09 -07001221 encode_block_pass1_args *args = (encode_block_pass1_args *)arg;
1222 AV1_COMMON *cm = args->cm;
1223 MACROBLOCK *const x = args->x;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001224 MACROBLOCKD *const xd = &x->e_mbd;
1225 struct macroblock_plane *const p = &x->plane[plane];
1226 struct macroblockd_plane *const pd = &xd->plane[plane];
1227 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1228 uint8_t *dst;
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001229 int ctx = 0;
Jingning Han81492262016-12-05 15:48:47 -08001230 dst = &pd->dst
1231 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001232
Angie Chiangff6d8902016-10-21 11:02:09 -07001233 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001234 ctx, AV1_XFORM_QUANT_B);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001235#if !CONFIG_PVQ
Yaowu Xuc27fc142016-08-22 16:08:15 -07001236 if (p->eobs[block] > 0) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07001237#else
1238 if (!x->pvq_skip[plane]) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07001239 {
1240 int tx_blk_size;
1241 int i, j;
1242 // transform block size in pixels
1243 tx_blk_size = tx_size_wide[tx_size];
1244
Thomas Daede6ff6af62017-02-03 16:29:24 -08001245// Since av1 does not have separate function which does inverse transform
1246// but av1_inv_txfm_add_*x*() also does addition of predicted image to
1247// inverse transformed image,
1248// pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001249#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001250 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1251 for (j = 0; j < tx_blk_size; j++)
1252 for (i = 0; i < tx_blk_size; i++)
1253 CONVERT_TO_SHORTPTR(dst)[j * pd->dst.stride + i] = 0;
1254 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001255#endif // CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001256 for (j = 0; j < tx_blk_size; j++)
1257 for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001258#if CONFIG_HIGHBITDEPTH
Thomas Daede6ff6af62017-02-03 16:29:24 -08001259 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001260#endif // CONFIG_HIGHBITDEPTH
Yushin Cho77bba8d2016-11-04 16:36:56 -07001261 }
Jingning Handf072642016-11-08 16:43:38 -08001262#endif // !CONFIG_PVQ
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001263#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001264 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1265 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001266 av1_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
1267 xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001268 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001269 av1_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
1270 xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001271 }
1272 return;
1273 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001274#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001275 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Yaowu Xuf883b422016-08-30 14:01:10 -07001276 av1_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001277 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001278 av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001279 }
1280 }
1281}
1282
Angie Chiangff6d8902016-10-21 11:02:09 -07001283void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
1284 encode_block_pass1_args args = { cm, x };
Yaowu Xuf883b422016-08-30 14:01:10 -07001285 av1_subtract_plane(x, bsize, 0);
1286 av1_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
Angie Chiangff6d8902016-10-21 11:02:09 -07001287 encode_block_pass1, &args);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001288}
1289
Yaowu Xu1e4e5b92017-05-05 11:06:42 -07001290void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
1291 int mi_col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001292 MACROBLOCKD *const xd = &x->e_mbd;
1293 struct optimize_ctx ctx;
1294 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Angie Chiangff6d8902016-10-21 11:02:09 -07001295 struct encode_b_args arg = { cm, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001296 int plane;
1297
1298 mbmi->skip = 1;
1299
1300 if (x->skip) return;
1301
1302 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
Jingning Han31b6a4f2017-02-23 11:05:53 -08001303#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
Jingning Han2d2dac22017-04-11 09:41:10 -07001304 const int subsampling_x = xd->plane[plane].subsampling_x;
1305 const int subsampling_y = xd->plane[plane].subsampling_y;
1306
1307 if (!is_chroma_reference(mi_row, mi_col, bsize, subsampling_x,
1308 subsampling_y))
Jingning Hanea576f32017-02-21 23:05:09 -08001309 continue;
Jingning Han2d2dac22017-04-11 09:41:10 -07001310
1311 bsize = scale_chroma_bsize(bsize, subsampling_x, subsampling_y);
Jingning Hanea576f32017-02-21 23:05:09 -08001312#else
1313 (void)mi_row;
1314 (void)mi_col;
1315#endif
1316
Yaowu Xuc27fc142016-08-22 16:08:15 -07001317#if CONFIG_VAR_TX
1318 // TODO(jingning): Clean this up.
1319 const struct macroblockd_plane *const pd = &xd->plane[plane];
1320 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Jingning Hande953b92016-10-25 12:35:43 -07001321 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
1322 const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
Sarah Parker106b3cb2017-04-21 12:13:37 -07001323 const TX_SIZE max_tx_size = get_vartx_max_txsize(mbmi, plane_bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001324 const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
Jingning Hande953b92016-10-25 12:35:43 -07001325 const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
1326 const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001327 int idx, idy;
1328 int block = 0;
Jingning Hande953b92016-10-25 12:35:43 -07001329 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
Jingning Han581d1692017-01-05 16:03:54 -08001330 av1_get_entropy_contexts(bsize, 0, pd, ctx.ta[plane], ctx.tl[plane]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001331#else
1332 const struct macroblockd_plane *const pd = &xd->plane[plane];
Angie Chiang7fcfee42017-02-24 15:51:03 -08001333 const TX_SIZE tx_size = get_tx_size(plane, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07001334 av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001335#endif
Jingning Hanea576f32017-02-21 23:05:09 -08001336
Yushin Cho77bba8d2016-11-04 16:36:56 -07001337#if !CONFIG_PVQ
Yaowu Xuf883b422016-08-30 14:01:10 -07001338 av1_subtract_plane(x, bsize, plane);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001339#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001340 arg.ta = ctx.ta[plane];
1341 arg.tl = ctx.tl[plane];
1342
1343#if CONFIG_VAR_TX
Jingning Hanfe45b212016-11-22 10:30:23 -08001344 for (idy = 0; idy < mi_height; idy += bh) {
1345 for (idx = 0; idx < mi_width; idx += bw) {
1346 encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
1347 &arg);
1348 block += step;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001349 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001350 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001351#else
Yaowu Xuf883b422016-08-30 14:01:10 -07001352 av1_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
1353 &arg);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001354#endif
1355 }
1356}
1357
1358#if CONFIG_SUPERTX
Angie Chiangff6d8902016-10-21 11:02:09 -07001359void av1_encode_sb_supertx(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 MACROBLOCKD *const xd = &x->e_mbd;
1361 struct optimize_ctx ctx;
1362 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Angie Chiangff6d8902016-10-21 11:02:09 -07001363 struct encode_b_args arg = { cm, x, &ctx, &mbmi->skip, NULL, NULL, 1 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001364 int plane;
1365
1366 mbmi->skip = 1;
1367 if (x->skip) return;
1368
1369 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
1370 const struct macroblockd_plane *const pd = &xd->plane[plane];
1371#if CONFIG_VAR_TX
1372 const TX_SIZE tx_size = TX_4X4;
1373#else
Angie Chiang7fcfee42017-02-24 15:51:03 -08001374 const TX_SIZE tx_size = get_tx_size(plane, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001375#endif
Yaowu Xuf883b422016-08-30 14:01:10 -07001376 av1_subtract_plane(x, bsize, plane);
1377 av1_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001378 arg.ta = ctx.ta[plane];
1379 arg.tl = ctx.tl[plane];
Yaowu Xuf883b422016-08-30 14:01:10 -07001380 av1_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
1381 &arg);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001382 }
1383}
1384#endif // CONFIG_SUPERTX
1385
Yushin Cho6341f5c2017-03-24 14:36:28 -07001386#if !CONFIG_PVQ
Angie Chiangdb0c7d42017-03-23 16:05:37 -07001387void av1_set_txb_context(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
Angie Chiang36aca332017-03-23 14:16:24 -07001388 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
Angie Chiang36aca332017-03-23 14:16:24 -07001389 (void)tx_size;
Angie Chiang36aca332017-03-23 14:16:24 -07001390 struct macroblock_plane *p = &x->plane[plane];
Yushin Cho6341f5c2017-03-24 14:36:28 -07001391
Angie Chiang74e23072017-03-24 14:54:23 -07001392#if !CONFIG_LV_MAP
Angie Chiang36aca332017-03-23 14:16:24 -07001393 *a = *l = p->eobs[block] > 0;
Angie Chiang74e23072017-03-24 14:54:23 -07001394#else // !CONFIG_LV_MAP
1395 *a = *l = p->txb_entropy_ctx[block];
1396#endif // !CONFIG_LV_MAP
Angie Chiangdb0c7d42017-03-23 16:05:37 -07001397
1398#if CONFIG_VAR_TX || CONFIG_LV_MAP
1399 int i;
1400 for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) a[i] = a[0];
1401
1402 for (i = 0; i < tx_size_high_unit[tx_size]; ++i) l[i] = l[0];
1403#endif
Angie Chiang36aca332017-03-23 14:16:24 -07001404}
Yushin Cho6341f5c2017-03-24 14:36:28 -07001405#endif
Angie Chiang36aca332017-03-23 14:16:24 -07001406
1407static void encode_block_intra_and_set_context(int plane, int block,
1408 int blk_row, int blk_col,
1409 BLOCK_SIZE plane_bsize,
1410 TX_SIZE tx_size, void *arg) {
1411 av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
1412 arg);
Yushin Cho6341f5c2017-03-24 14:36:28 -07001413#if !CONFIG_PVQ
Angie Chiang36aca332017-03-23 14:16:24 -07001414 struct encode_b_args *const args = arg;
1415 MACROBLOCK *x = args->x;
1416 ENTROPY_CONTEXT *a = &args->ta[blk_col];
1417 ENTROPY_CONTEXT *l = &args->tl[blk_row];
Angie Chiangdb0c7d42017-03-23 16:05:37 -07001418 av1_set_txb_context(x, plane, block, tx_size, a, l);
Yushin Cho6341f5c2017-03-24 14:36:28 -07001419#endif
Angie Chiang36aca332017-03-23 14:16:24 -07001420}
1421
Yaowu Xuf883b422016-08-30 14:01:10 -07001422void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
1423 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1424 void *arg) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001425 struct encode_b_args *const args = arg;
Angie Chiangff6d8902016-10-21 11:02:09 -07001426 AV1_COMMON *cm = args->cm;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001427 MACROBLOCK *const x = args->x;
1428 MACROBLOCKD *const xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001429 struct macroblock_plane *const p = &x->plane[plane];
1430 struct macroblockd_plane *const pd = &xd->plane[plane];
1431 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Luc Trudeau005feb62017-02-22 13:34:01 -05001432 PLANE_TYPE plane_type = get_plane_type(plane);
Angie Chiang752ccce2017-04-09 13:41:13 -07001433 const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001434 uint16_t *eob = &p->eobs[block];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001435 const int dst_stride = pd->dst.stride;
Angie Chiangf87e43f2017-04-02 16:51:19 -07001436 uint8_t *dst =
1437 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Luc Trudeauf5334002017-04-25 12:21:26 -04001438#if CONFIG_CFL
David Michael Barrd3b01b82017-04-27 19:58:52 +09001439 av1_predict_intra_block_encoder_facade(x, plane, block, blk_col, blk_row,
1440 tx_size, plane_bsize);
Luc Trudeauf5334002017-04-25 12:21:26 -04001441#else
Angie Chiang752ccce2017-04-09 13:41:13 -07001442 av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
Luc Trudeauf5334002017-04-25 12:21:26 -04001443#endif
Angie Chiangf87e43f2017-04-02 16:51:19 -07001444 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001445
Angie Chiang36aca332017-03-23 14:16:24 -07001446 const ENTROPY_CONTEXT *a = &args->ta[blk_col];
1447 const ENTROPY_CONTEXT *l = &args->tl[blk_row];
Angie Chiang57605532017-04-03 11:51:15 -07001448 int ctx = combine_entropy_contexts(*a, *l);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001449 if (args->enable_optimize_b) {
Angie Chiangff6d8902016-10-21 11:02:09 -07001450 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001451 ctx, AV1_XFORM_QUANT_FP);
Angie Chiang7dec6c42017-05-03 17:58:17 -07001452 av1_optimize_b(cm, x, plane, block, tx_size, ctx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001453 } else {
Debargha Mukherjeef0305582016-11-24 09:55:34 -08001454 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1455 ctx, AV1_XFORM_QUANT_B);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001456 }
1457
Angie Chiang57605532017-04-03 11:51:15 -07001458#if CONFIG_PVQ
Yushin Cho3827fdd2016-11-09 15:50:23 -08001459 // *(args->skip) == mbmi->skip
1460 if (!x->pvq_skip[plane]) *(args->skip) = 0;
1461
1462 if (x->pvq_skip[plane]) return;
Angie Chiang50910f62017-04-03 12:31:34 -07001463#endif // CONFIG_PVQ
Angie Chiangd92d4bf2017-04-02 17:49:18 -07001464 av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, dst, dst_stride,
1465 *eob);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001466#if !CONFIG_PVQ
1467 if (*eob) *(args->skip) = 0;
1468#else
1469// Note : *(args->skip) == mbmi->skip
1470#endif
Luc Trudeaue3980282017-04-25 23:17:21 -04001471#if CONFIG_CFL
Luc Trudeauf5334002017-04-25 12:21:26 -04001472 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Luc Trudeaue3980282017-04-25 23:17:21 -04001473 if (plane == AOM_PLANE_Y && x->cfl_store_y) {
1474 cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
1475 }
Luc Trudeauf5334002017-04-25 12:21:26 -04001476
1477 if (mbmi->uv_mode == DC_PRED) {
1478 // TODO(ltrudeau) find a cleaner way to detect last transform block
1479 if (plane == AOM_PLANE_U) {
1480 xd->cfl->num_tx_blk[CFL_PRED_U] =
1481 (blk_row == 0 && blk_col == 0) ? 1
1482 : xd->cfl->num_tx_blk[CFL_PRED_U] + 1;
1483 }
1484
1485 if (plane == AOM_PLANE_V) {
1486 xd->cfl->num_tx_blk[CFL_PRED_V] =
1487 (blk_row == 0 && blk_col == 0) ? 1
1488 : xd->cfl->num_tx_blk[CFL_PRED_V] + 1;
1489
1490 if (mbmi->skip &&
1491 xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) {
1492 assert(plane_bsize != BLOCK_INVALID);
1493 const int block_width = block_size_wide[plane_bsize];
1494 const int block_height = block_size_high[plane_bsize];
1495
1496 // if SKIP is chosen at the block level, and ind != 0, we must change
1497 // the prediction
1498 if (mbmi->cfl_alpha_ind != 0) {
1499 const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U];
1500 uint8_t *const dst_cb = pd_cb->dst.buf;
1501 const int dst_stride_cb = pd_cb->dst.stride;
1502 uint8_t *const dst_cr = pd->dst.buf;
1503 const int dst_stride_cr = pd->dst.stride;
1504 for (int j = 0; j < block_height; j++) {
1505 for (int i = 0; i < block_width; i++) {
1506 dst_cb[dst_stride_cb * j + i] =
1507 (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5);
1508 dst_cr[dst_stride_cr * j + i] =
1509 (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5);
1510 }
1511 }
1512 mbmi->cfl_alpha_ind = 0;
1513 mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
1514 mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
1515 }
1516 }
1517 }
1518 }
Luc Trudeaue3980282017-04-25 23:17:21 -04001519#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001520}
1521
Luc Trudeauf5334002017-04-25 12:21:26 -04001522#if CONFIG_CFL
David Michael Barrd3b01b82017-04-27 19:58:52 +09001523static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
1524 const uint8_t *src, int src_stride, int blk_width,
1525 int blk_height, double dc_pred, double alpha,
1526 int *dist_neg_out) {
1527 const double dc_pred_bias = dc_pred + 0.5;
1528 int dist = 0;
1529 int diff;
Luc Trudeauf5334002017-04-25 12:21:26 -04001530
David Michael Barrd3b01b82017-04-27 19:58:52 +09001531 if (alpha == 0.0) {
1532 const int dc_pred_i = (int)dc_pred_bias;
1533 for (int j = 0; j < blk_height; j++) {
1534 for (int i = 0; i < blk_width; i++) {
1535 diff = src[i] - dc_pred_i;
1536 dist += diff * diff;
1537 }
1538 src += src_stride;
1539 }
1540
1541 if (dist_neg_out) *dist_neg_out = dist;
1542
1543 return dist;
1544 }
1545
1546 int dist_neg = 0;
1547 for (int j = 0; j < blk_height; j++) {
1548 for (int i = 0; i < blk_width; i++) {
1549 const double scaled_luma = alpha * (y_pix[i] - y_avg);
1550 const int uv = src[i];
1551 diff = uv - (int)(scaled_luma + dc_pred_bias);
1552 dist += diff * diff;
1553 diff = uv + (int)(scaled_luma - dc_pred_bias);
1554 dist_neg += diff * diff;
1555 }
1556 y_pix += y_stride;
1557 src += src_stride;
1558 }
1559
1560 if (dist_neg_out) *dist_neg_out = dist_neg;
1561
1562 return dist;
1563}
1564
1565static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
1566 BLOCK_SIZE bsize, int *const cfl_cost,
1567 CFL_SIGN_TYPE *signs) {
1568 const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
1569 const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
1570 const uint8_t *const src_u = p_u->src.buf;
1571 const uint8_t *const src_v = p_v->src.buf;
1572 const int src_stride_u = p_u->src.stride;
1573 const int src_stride_v = p_v->src.stride;
1574 const int block_width = block_size_wide[bsize];
1575 const int block_height = block_size_high[bsize];
1576 const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
1577 const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
1578
1579 // Temporary pixel buffer used to store the CfL prediction when we compute the
1580 // alpha index.
1581 uint8_t tmp_pix[MAX_SB_SQUARE];
1582 // Load CfL Prediction over the entire block
1583 const double y_avg =
Luc Trudeau30596fb2017-05-08 11:35:25 -04001584 cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height);
David Michael Barrd3b01b82017-04-27 19:58:52 +09001585
1586 int dist_u, dist_v;
1587 int dist_u_neg, dist_v_neg;
1588 int dist;
1589 int64_t cost;
1590 int64_t best_cost;
1591
1592 // Compute least squares parameter of the entire block
1593 // IMPORTANT: We assume that the first code is 0,0
1594 int ind = 0;
1595 signs[CFL_PRED_U] = CFL_SIGN_POS;
1596 signs[CFL_PRED_V] = CFL_SIGN_POS;
1597
1598 dist = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
1599 block_width, block_height, dc_pred_u, 0, NULL) +
1600 cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
1601 block_width, block_height, dc_pred_v, 0, NULL);
1602 dist *= 16;
1603 best_cost = RDCOST(x->rdmult, x->rddiv, *cfl_cost, dist);
1604
1605 for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
1606 dist_u = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
1607 block_width, block_height, dc_pred_u,
1608 cfl_alpha_codes[c][CFL_PRED_U], &dist_u_neg);
1609 dist_v = cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
1610 block_width, block_height, dc_pred_v,
1611 cfl_alpha_codes[c][CFL_PRED_V], &dist_v_neg);
1612 for (int sign_u = cfl_alpha_codes[c][CFL_PRED_U] == 0.0; sign_u < CFL_SIGNS;
1613 sign_u++) {
1614 for (int sign_v = cfl_alpha_codes[c][CFL_PRED_V] == 0.0;
1615 sign_v < CFL_SIGNS; sign_v++) {
1616 dist = (sign_u == CFL_SIGN_POS ? dist_u : dist_u_neg) +
1617 (sign_v == CFL_SIGN_POS ? dist_v : dist_v_neg);
1618 dist *= 16;
1619 cost = RDCOST(x->rdmult, x->rddiv, cfl_cost[c], dist);
1620 if (cost < best_cost) {
1621 best_cost = cost;
1622 ind = c;
1623 signs[CFL_PRED_U] = sign_u;
1624 signs[CFL_PRED_V] = sign_v;
1625 }
1626 }
1627 }
1628 }
1629
1630 return ind;
1631}
1632
1633void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, int plane,
1634 int block_idx, int blk_col,
1635 int blk_row, TX_SIZE tx_size,
1636 BLOCK_SIZE plane_bsize) {
1637 MACROBLOCKD *const xd = &x->e_mbd;
1638 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1639 if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_Y) {
1640 mbmi->cfl_alpha_ind = 0;
1641 mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS;
1642 mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS;
1643 }
1644 if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
1645 if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
1646#if !CONFIG_EC_ADAPT
1647#error "CfL rate estimation requires ec_adapt."
1648#endif
1649 FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
1650 assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == AOM_ICDF(32768U));
1651 const int prob_den = 32768U;
1652
1653 CFL_CTX *const cfl = xd->cfl;
1654 int cfl_costs[CFL_ALPHABET_SIZE];
1655 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
1656 int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0.0) +
1657 (cfl_alpha_codes[c][CFL_PRED_V] != 0.0);
1658 int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
1659 if (c > 0) prob_num -= AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]);
1660 cfl_costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) +
1661 av1_cost_literal(sign_bit_cost);
1662 }
1663 cfl_dc_pred(xd, plane_bsize, tx_size);
1664 mbmi->cfl_alpha_ind = cfl_compute_alpha_ind(
1665 x, cfl, plane_bsize, cfl_costs, mbmi->cfl_alpha_signs);
1666 }
1667 }
Luc Trudeauf5334002017-04-25 12:21:26 -04001668 av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
1669 tx_size);
1670}
1671#endif
1672
Angie Chiangff6d8902016-10-21 11:02:09 -07001673void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
1674 BLOCK_SIZE bsize, int plane,
Yaowu Xu1e4e5b92017-05-05 11:06:42 -07001675 int enable_optimize_b, int mi_row,
1676 int mi_col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001677 const MACROBLOCKD *const xd = &x->e_mbd;
Guillaume Martrese50d9dd2016-12-18 18:26:47 +01001678 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE] = { 0 };
1679 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07001680
Angie Chiangff6d8902016-10-21 11:02:09 -07001681 struct encode_b_args arg = {
1682 cm, x, NULL, &xd->mi[0]->mbmi.skip, ta, tl, enable_optimize_b
1683 };
Jingning Han18c53c82017-02-17 14:49:57 -08001684
1685#if CONFIG_CB4X4
Jingning Hand3a64432017-04-06 17:04:17 -07001686 if (!is_chroma_reference(mi_row, mi_col, bsize,
1687 xd->plane[plane].subsampling_x,
1688 xd->plane[plane].subsampling_y))
Jingning Han18c53c82017-02-17 14:49:57 -08001689 return;
1690#else
1691 (void)mi_row;
1692 (void)mi_col;
1693#endif
1694
Yaowu Xuc27fc142016-08-22 16:08:15 -07001695 if (enable_optimize_b) {
1696 const struct macroblockd_plane *const pd = &xd->plane[plane];
Angie Chiang7fcfee42017-02-24 15:51:03 -08001697 const TX_SIZE tx_size = get_tx_size(plane, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07001698 av1_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001699 }
Angie Chiang36aca332017-03-23 14:16:24 -07001700 av1_foreach_transformed_block_in_plane(
1701 xd, bsize, plane, encode_block_intra_and_set_context, &arg);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001702}
Yushin Cho77bba8d2016-11-04 16:36:56 -07001703
1704#if CONFIG_PVQ
Thomas Daede6ff6af62017-02-03 16:29:24 -08001705PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
1706 tran_low_t *ref_coeff,
1707 tran_low_t *const dqcoeff, uint16_t *eob,
1708 const int16_t *quant, int plane,
1709 int tx_size, TX_TYPE tx_type, int *rate,
1710 int speed, PVQ_INFO *pvq_info) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07001711 const int tx_blk_size = tx_size_wide[tx_size];
Thomas Daede6ff6af62017-02-03 16:29:24 -08001712 daala_enc_ctx *daala_enc = &x->daala_enc;
ltrudeaue1c09292017-01-20 15:42:13 -05001713 PVQ_SKIP_TYPE ac_dc_coded;
Jingning Hanff705452017-04-27 11:32:15 -07001714 int coeff_shift = 3 - av1_get_tx_scale(tx_size);
Thomas Daede6ff6af62017-02-03 16:29:24 -08001715 int hbd_downshift = 0;
Timothy B. Terriberrye93acb22017-02-06 13:55:53 -08001716 int rounding_mask;
Yushin Cho70669122016-12-08 09:53:14 -10001717 int pvq_dc_quant;
1718 int use_activity_masking = daala_enc->use_activity_masking;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001719 int tell;
1720 int has_dc_skip = 1;
1721 int i;
1722 int off = od_qm_offset(tx_size, plane ? 1 : 0);
Yushin Cho70669122016-12-08 09:53:14 -10001723
Thomas Daede1dbda1b2017-02-06 16:06:29 -08001724 DECLARE_ALIGNED(16, tran_low_t, coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
1725 DECLARE_ALIGNED(16, tran_low_t, ref_coeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
1726 DECLARE_ALIGNED(16, tran_low_t, dqcoeff_pvq[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001727
Yushin Cho48f84db2016-11-07 21:20:17 -08001728 DECLARE_ALIGNED(16, int32_t, in_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
1729 DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
1730 DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001731
Thomas Daede6ff6af62017-02-03 16:29:24 -08001732 hbd_downshift = x->e_mbd.bd - 8;
Thomas Daede6ff6af62017-02-03 16:29:24 -08001733
1734 assert(OD_COEFF_SHIFT >= 4);
Yushin Cho70669122016-12-08 09:53:14 -10001735 // DC quantizer for PVQ
1736 if (use_activity_masking)
clang-format55ce9e02017-02-15 22:27:12 -08001737 pvq_dc_quant =
Thomas Daede6ff6af62017-02-03 16:29:24 -08001738 OD_MAXI(1, (quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift) *
clang-format55ce9e02017-02-15 22:27:12 -08001739 daala_enc->state
1740 .pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >>
1741 4);
Yushin Cho70669122016-12-08 09:53:14 -10001742 else
Thomas Daede6ff6af62017-02-03 16:29:24 -08001743 pvq_dc_quant =
1744 OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3) >> hbd_downshift);
Yushin Cho70669122016-12-08 09:53:14 -10001745
Yushin Cho77bba8d2016-11-04 16:36:56 -07001746 *eob = 0;
1747
Nathan E. Egge6675be02016-12-21 13:02:43 -05001748#if CONFIG_DAALA_EC
1749 tell = od_ec_enc_tell_frac(&daala_enc->w.ec);
1750#else
1751#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
1752#endif
Yushin Cho77bba8d2016-11-04 16:36:56 -07001753
1754 // Change coefficient ordering for pvq encoding.
1755 od_raster_to_coding_order(coeff_pvq, tx_blk_size, tx_type, coeff,
1756 tx_blk_size);
1757 od_raster_to_coding_order(ref_coeff_pvq, tx_blk_size, tx_type, ref_coeff,
1758 tx_blk_size);
1759
1760 // copy int16 inputs to int32
1761 for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
Timothy B. Terriberry4e6a8f32017-02-24 11:00:59 -08001762 ref_int32[i] =
Thomas Daede6ff6af62017-02-03 16:29:24 -08001763 AOM_SIGNED_SHL(ref_coeff_pvq[i], OD_COEFF_SHIFT - coeff_shift) >>
1764 hbd_downshift;
1765 in_int32[i] = AOM_SIGNED_SHL(coeff_pvq[i], OD_COEFF_SHIFT - coeff_shift) >>
1766 hbd_downshift;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001767 }
1768
Yushin Cho77bba8d2016-11-04 16:36:56 -07001769 if (abs(in_int32[0] - ref_int32[0]) < pvq_dc_quant * 141 / 256) { /* 0.55 */
1770 out_int32[0] = 0;
1771 } else {
1772 out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant);
1773 }
1774
Thomas Daede6ff6af62017-02-03 16:29:24 -08001775 ac_dc_coded =
1776 od_pvq_encode(daala_enc, ref_int32, in_int32, out_int32,
1777 OD_MAXI(1, quant[0] << (OD_COEFF_SHIFT - 3) >>
1778 hbd_downshift), // scale/quantizer
1779 OD_MAXI(1, quant[1] << (OD_COEFF_SHIFT - 3) >>
1780 hbd_downshift), // scale/quantizer
1781 plane,
1782 tx_size, OD_PVQ_BETA[use_activity_masking][plane][tx_size],
Yushin Cho31b980a2017-03-23 10:28:09 -07001783 0, // is_keyframe,
Thomas Daede6ff6af62017-02-03 16:29:24 -08001784 daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
1785 speed, // speed
1786 pvq_info);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001787
1788 // Encode residue of DC coeff, if required.
1789 if (!has_dc_skip || out_int32[0]) {
Yushin Choc49ef3a2017-03-13 17:27:25 -07001790 generic_encode(&daala_enc->w, &daala_enc->state.adapt->model_dc[plane],
Timothy B. Terriberry44bb6d02017-04-07 15:44:14 -07001791 abs(out_int32[0]) - has_dc_skip,
Yushin Choc49ef3a2017-03-13 17:27:25 -07001792 &daala_enc->state.adapt->ex_dc[plane][tx_size][0], 2);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001793 }
1794 if (out_int32[0]) {
Nathan E. Egge6675be02016-12-21 13:02:43 -05001795 aom_write_bit(&daala_enc->w, out_int32[0] < 0);
Yushin Cho77bba8d2016-11-04 16:36:56 -07001796 }
1797
1798 // need to save quantized residue of DC coeff
1799 // so that final pvq bitstream writing can know whether DC is coded.
1800 if (pvq_info) pvq_info->dq_dc_residue = out_int32[0];
1801
1802 out_int32[0] = out_int32[0] * pvq_dc_quant;
1803 out_int32[0] += ref_int32[0];
1804
1805 // copy int32 result back to int16
Timothy B. Terriberrye93acb22017-02-06 13:55:53 -08001806 assert(OD_COEFF_SHIFT > coeff_shift);
1807 rounding_mask = (1 << (OD_COEFF_SHIFT - coeff_shift - 1)) - 1;
1808 for (i = 0; i < tx_blk_size * tx_blk_size; i++) {
Thomas Daede6ff6af62017-02-03 16:29:24 -08001809 out_int32[i] = AOM_SIGNED_SHL(out_int32[i], hbd_downshift);
Timothy B. Terriberrye93acb22017-02-06 13:55:53 -08001810 dqcoeff_pvq[i] = (out_int32[i] + (out_int32[i] < 0) + rounding_mask) >>
1811 (OD_COEFF_SHIFT - coeff_shift);
1812 }
Yushin Cho77bba8d2016-11-04 16:36:56 -07001813
1814 // Back to original coefficient order
1815 od_coding_order_to_raster(dqcoeff, tx_blk_size, tx_type, dqcoeff_pvq,
1816 tx_blk_size);
1817
1818 *eob = tx_blk_size * tx_blk_size;
1819
Nathan E. Egge6675be02016-12-21 13:02:43 -05001820#if CONFIG_DAALA_EC
1821 *rate = (od_ec_enc_tell_frac(&daala_enc->w.ec) - tell)
Yushin Cho77bba8d2016-11-04 16:36:56 -07001822 << (AV1_PROB_COST_SHIFT - OD_BITRES);
Nathan E. Egge6675be02016-12-21 13:02:43 -05001823#else
1824#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
1825#endif
Yushin Cho77bba8d2016-11-04 16:36:56 -07001826 assert(*rate >= 0);
Yushin Cho5c207292017-02-16 15:01:33 -08001827
ltrudeau472f63f2017-01-12 15:22:32 -05001828 return ac_dc_coded;
Yushin Cho77bba8d2016-11-04 16:36:56 -07001829}
1830
Timothy B. Terriberry44bb6d02017-04-07 15:44:14 -07001831void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
1832 od_coeff *y, int nb_bands, const int *off,
1833 int *size, int skip_rest, int skip_dir,
Yushin Cho77bba8d2016-11-04 16:36:56 -07001834 int bs) { // block size in log_2 -2
1835 int i;
1836 const int tx_blk_size = tx_size_wide[bs];
1837
1838 for (i = 0; i < nb_bands; i++) {
1839 pvq_info->qg[i] = qg[i];
1840 pvq_info->theta[i] = theta[i];
Yushin Cho77bba8d2016-11-04 16:36:56 -07001841 pvq_info->k[i] = k[i];
1842 pvq_info->off[i] = off[i];
1843 pvq_info->size[i] = size[i];
1844 }
1845
1846 memcpy(pvq_info->y, y, tx_blk_size * tx_blk_size * sizeof(od_coeff));
1847
1848 pvq_info->nb_bands = nb_bands;
1849 pvq_info->skip_rest = skip_rest;
1850 pvq_info->skip_dir = skip_dir;
1851 pvq_info->bs = bs;
1852}
1853#endif