blob: 6ab5ad9b8c810691b71d2a2f7e58819d72acf1a5 [file] [log] [blame]
Luc Trudeauf8164152017-04-11 16:20:51 -04001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12#include "av1/common/cfl.h"
13#include "av1/common/common_data.h"
Luc Trudeaubaeb3752017-04-24 11:19:25 -040014#include "av1/common/onyxc_int.h"
15
Luc Trudeaub4faea72017-12-15 16:44:01 -050016#include "./av1_rtcd.h"
17
Luc Trudeaudac5e392017-06-05 15:52:02 -040018void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
Luc Trudeau467205a2017-12-12 23:23:47 -050019 assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
20 assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
Luc Trudeau06b47082017-10-31 10:42:36 -040021 if ((cm->subsampling_x != 0 && cm->subsampling_x != 1) ||
22 (cm->subsampling_y != 0 && cm->subsampling_y != 1)) {
23 aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
24 "Only 4:4:4, 4:4:0, 4:2:2 and 4:2:0 are currently "
25 "supported by CfL, %d %d "
26 "subsampling is not supported.\n",
27 cm->subsampling_x, cm->subsampling_y);
Luc Trudeaubaeb3752017-04-24 11:19:25 -040028 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040029 memset(&cfl->pred_buf_q3, 0, sizeof(cfl->pred_buf_q3));
Luc Trudeaudac5e392017-06-05 15:52:02 -040030 cfl->subsampling_x = cm->subsampling_x;
31 cfl->subsampling_y = cm->subsampling_y;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040032 cfl->are_parameters_computed = 0;
Luc Trudeaufcca37a2017-08-14 15:05:07 -040033 cfl->store_y = 0;
Luc Trudeau467205a2017-12-12 23:23:47 -050034 // The DC_PRED cache is disabled by default and is only enabled in
35 // cfl_rd_pick_alpha
36 cfl->use_dc_pred_cache = 0;
37 cfl->dc_pred_is_cached[CFL_PRED_U] = 0;
38 cfl->dc_pred_is_cached[CFL_PRED_V] = 0;
David Michael Barrd27f1e62018-01-11 23:03:30 +090039#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeauc84c21c2017-07-25 19:40:34 -040040 cfl_clear_sub8x8_val(cfl);
Luc Trudeauc7af36d2017-10-11 21:01:00 -040041 cfl->store_counter = 0;
42 cfl->last_compute_counter = 0;
David Michael Barrd27f1e62018-01-11 23:03:30 +090043#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeau3dc55e02017-06-22 14:03:47 -040044}
45
Luc Trudeau467205a2017-12-12 23:23:47 -050046void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
47 CFL_PRED_TYPE pred_plane, int width) {
48 assert(pred_plane < CFL_PRED_PLANES);
49 assert(width <= CFL_BUF_LINE);
Yaowu Xud3e7c682017-12-21 14:08:25 -080050
Luc Trudeau467205a2017-12-12 23:23:47 -050051 if (get_bitdepth_data_path_index(xd)) {
52 uint16_t *const input_16 = CONVERT_TO_SHORTPTR(input);
53 memcpy(xd->cfl.dc_pred_cache[pred_plane], input_16, width << 1);
54 return;
55 }
Yaowu Xud3e7c682017-12-21 14:08:25 -080056
Luc Trudeau467205a2017-12-12 23:23:47 -050057 memcpy(xd->cfl.dc_pred_cache[pred_plane], input, width);
58}
59
60static void cfl_load_dc_pred_lbd(const int16_t *dc_pred_cache, uint8_t *dst,
61 int dst_stride, int width, int height) {
62 for (int j = 0; j < height; j++) {
63 memcpy(dst, dc_pred_cache, width);
64 dst += dst_stride;
65 }
66}
67
68static void cfl_load_dc_pred_hbd(const int16_t *dc_pred_cache, uint16_t *dst,
69 int dst_stride, int width, int height) {
70 const size_t num_bytes = width << 1;
71 for (int j = 0; j < height; j++) {
72 memcpy(dst, dc_pred_cache, num_bytes);
73 dst += dst_stride;
74 }
75}
Luc Trudeau467205a2017-12-12 23:23:47 -050076void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
77 TX_SIZE tx_size, CFL_PRED_TYPE pred_plane) {
78 const int width = tx_size_wide[tx_size];
79 const int height = tx_size_high[tx_size];
80 assert(pred_plane < CFL_PRED_PLANES);
81 assert(width <= CFL_BUF_LINE);
82 assert(height <= CFL_BUF_LINE);
Luc Trudeau467205a2017-12-12 23:23:47 -050083 if (get_bitdepth_data_path_index(xd)) {
84 uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
85 cfl_load_dc_pred_hbd(xd->cfl.dc_pred_cache[pred_plane], dst_16, dst_stride,
86 width, height);
87 return;
88 }
Luc Trudeau467205a2017-12-12 23:23:47 -050089 cfl_load_dc_pred_lbd(xd->cfl.dc_pred_cache[pred_plane], dst, dst_stride,
90 width, height);
91}
92
Luc Trudeau4e26d662017-09-11 13:08:40 -040093// Due to frame boundary issues, it is possible that the total area covered by
94// chroma exceeds that of luma. When this happens, we fill the missing pixels by
95// repeating the last columns and/or rows.
96static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
97 const int diff_width = width - cfl->buf_width;
98 const int diff_height = height - cfl->buf_height;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040099
100 if (diff_width > 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400101 const int min_height = height - diff_height;
102 int16_t *pred_buf_q3 = cfl->pred_buf_q3 + (width - diff_width);
103 for (int j = 0; j < min_height; j++) {
Luc Trudeaue67377b2017-10-31 16:08:05 -0400104 const int16_t last_pixel = pred_buf_q3[-1];
Luc Trudeau467205a2017-12-12 23:23:47 -0500105 assert(pred_buf_q3 + diff_width <= cfl->pred_buf_q3 + CFL_BUF_SQUARE);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400106 for (int i = 0; i < diff_width; i++) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400107 pred_buf_q3[i] = last_pixel;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400108 }
Luc Trudeau467205a2017-12-12 23:23:47 -0500109 pred_buf_q3 += CFL_BUF_LINE;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400110 }
Luc Trudeau4e26d662017-09-11 13:08:40 -0400111 cfl->buf_width = width;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400112 }
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400113 if (diff_height > 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400114 int16_t *pred_buf_q3 =
Luc Trudeau467205a2017-12-12 23:23:47 -0500115 cfl->pred_buf_q3 + ((height - diff_height) * CFL_BUF_LINE);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400116 for (int j = 0; j < diff_height; j++) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500117 const int16_t *last_row_q3 = pred_buf_q3 - CFL_BUF_LINE;
118 assert(pred_buf_q3 + width <= cfl->pred_buf_q3 + CFL_BUF_SQUARE);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400119 for (int i = 0; i < width; i++) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400120 pred_buf_q3[i] = last_row_q3[i];
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400121 }
Luc Trudeau467205a2017-12-12 23:23:47 -0500122 pred_buf_q3 += CFL_BUF_LINE;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400123 }
Luc Trudeau4e26d662017-09-11 13:08:40 -0400124 cfl->buf_height = height;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400125 }
Luc Trudeaubaeb3752017-04-24 11:19:25 -0400126}
Luc Trudeauf8164152017-04-11 16:20:51 -0400127
Luc Trudeaub4faea72017-12-15 16:44:01 -0500128void av1_cfl_subtract_c(int16_t *pred_buf_q3, int width, int height,
129 int16_t avg_q3) {
130 for (int j = 0; j < height; j++) {
131 for (int i = 0; i < width; i++) {
132 pred_buf_q3[i] -= avg_q3;
133 }
134 pred_buf_q3 += CFL_BUF_LINE;
135 }
136}
137
Luc Trudeau3ae12352017-12-06 14:11:34 -0500138static void cfl_subtract_average(CFL_CTX *cfl, TX_SIZE tx_size) {
Yaowu Xua73bdb02017-11-23 15:31:38 -0800139 const int tx_height = tx_size_high[tx_size];
140 const int tx_width = tx_size_wide[tx_size];
Yaowu Xua73bdb02017-11-23 15:31:38 -0800141 const int num_pel_log2 =
Luc Trudeau3ae12352017-12-06 14:11:34 -0500142 tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size];
143
Luc Trudeau4e26d662017-09-11 13:08:40 -0400144 int16_t *pred_buf_q3 = cfl->pred_buf_q3;
Luc Trudeau3ae12352017-12-06 14:11:34 -0500145 int sum_q3 = 0;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400146
Luc Trudeau3ae12352017-12-06 14:11:34 -0500147 cfl_pad(cfl, tx_width, tx_height);
Luc Trudeau4e26d662017-09-11 13:08:40 -0400148
Luc Trudeau3ae12352017-12-06 14:11:34 -0500149 for (int j = 0; j < tx_height; j++) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500150 assert(pred_buf_q3 + tx_width <= cfl->pred_buf_q3 + CFL_BUF_SQUARE);
Luc Trudeau3ae12352017-12-06 14:11:34 -0500151 for (int i = 0; i < tx_width; i++) {
152 sum_q3 += pred_buf_q3[i];
Luc Trudeau3e18e4a2017-06-13 13:54:14 -0400153 }
Luc Trudeau467205a2017-12-12 23:23:47 -0500154 pred_buf_q3 += CFL_BUF_LINE;
Luc Trudeau3ae12352017-12-06 14:11:34 -0500155 }
156 const int avg_q3 = (sum_q3 + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
157 // Loss is never more than 1/2 (in Q3)
158 assert(abs((avg_q3 * (1 << num_pel_log2)) - sum_q3) <= 1 << num_pel_log2 >>
159 1);
Luc Trudeaub4faea72017-12-15 16:44:01 -0500160 av1_cfl_subtract(cfl->pred_buf_q3, tx_width, tx_height, avg_q3);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400161}
162
David Michael Barrf6eaa152017-07-19 19:42:28 +0900163static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
Luc Trudeau4e81d922017-07-05 17:17:06 -0400164 CFL_PRED_TYPE pred_type) {
David Michael Barrf6eaa152017-07-19 19:42:28 +0900165 const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
166 : CFL_SIGN_V(joint_sign);
167 if (alpha_sign == CFL_SIGN_ZERO) return 0;
168 const int abs_alpha_q3 =
169 (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx);
170 return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
Luc Trudeau3e18e4a2017-06-13 13:54:14 -0400171}
172
Luc Trudeau056d1f42017-09-15 17:38:14 -0400173static void cfl_build_prediction_lbd(const int16_t *pred_buf_q3, uint8_t *dst,
David Michael Barr16f38c22017-12-21 01:56:03 +0900174 int dst_stride, TX_SIZE tx_size,
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400175 int alpha_q3) {
David Michael Barr16f38c22017-12-21 01:56:03 +0900176 const int height = tx_size_high[tx_size];
177 const int width = tx_size_wide[tx_size];
Luc Trudeau67914b52017-09-14 17:13:28 -0400178 for (int j = 0; j < height; j++) {
179 for (int i = 0; i < width; i++) {
180 dst[i] =
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400181 clip_pixel(get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dst[i]);
Luc Trudeau67914b52017-09-14 17:13:28 -0400182 }
183 dst += dst_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500184 pred_buf_q3 += CFL_BUF_LINE;
Luc Trudeau67914b52017-09-14 17:13:28 -0400185 }
186}
187
Luc Trudeau056d1f42017-09-15 17:38:14 -0400188static void cfl_build_prediction_hbd(const int16_t *pred_buf_q3, uint16_t *dst,
David Michael Barrc363ab72018-01-12 16:53:38 +0900189 int dst_stride, TX_SIZE tx_size,
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400190 int alpha_q3, int bit_depth) {
David Michael Barrc363ab72018-01-12 16:53:38 +0900191 const int height = tx_size_high[tx_size];
192 const int width = tx_size_wide[tx_size];
Luc Trudeau056d1f42017-09-15 17:38:14 -0400193 for (int j = 0; j < height; j++) {
194 for (int i = 0; i < width; i++) {
195 dst[i] = clip_pixel_highbd(
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400196 get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dst[i], bit_depth);
Luc Trudeau056d1f42017-09-15 17:38:14 -0400197 }
198 dst += dst_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500199 pred_buf_q3 += CFL_BUF_LINE;
Luc Trudeau056d1f42017-09-15 17:38:14 -0400200 }
201}
Luc Trudeau056d1f42017-09-15 17:38:14 -0400202
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400203static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500204 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400205 // Do not call cfl_compute_parameters multiple time on the same values.
206 assert(cfl->are_parameters_computed == 0);
207
David Michael Barrd27f1e62018-01-11 23:03:30 +0900208#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeau3ae12352017-12-06 14:11:34 -0500209 BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400210 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
211 const uint16_t compute_counter = cfl->sub8x8_val[0];
212 assert(compute_counter != cfl->last_compute_counter);
213 bsize = scale_chroma_bsize(bsize, cfl->subsampling_x, cfl->subsampling_y);
214 const int val_wide = mi_size_wide[bsize];
215 const int val_high = mi_size_high[bsize];
216 assert(val_wide <= CFL_SUB8X8_VAL_MI_SIZE);
217 assert(val_high <= CFL_SUB8X8_VAL_MI_SIZE);
218 for (int val_r = 0; val_r < val_high; val_r++) {
219 for (int val_c = 0; val_c < val_wide; val_c++) {
220 // If all counters in the validation buffer are equal then they are all
221 // related to the same chroma reference block.
222 assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] ==
223 compute_counter);
224 }
225 }
226 cfl->last_compute_counter = compute_counter;
227 }
David Michael Barrd27f1e62018-01-11 23:03:30 +0900228#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400229
Luc Trudeau3ae12352017-12-06 14:11:34 -0500230 cfl_subtract_average(cfl, tx_size);
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400231 cfl->are_parameters_computed = 1;
232}
233
David Michael Barr16f38c22017-12-21 01:56:03 +0900234cfl_predict_lbd_fn get_predict_lbd_fn_c(TX_SIZE tx_size) {
235 (void)tx_size;
236 return cfl_build_prediction_lbd;
237}
238
David Michael Barrc363ab72018-01-12 16:53:38 +0900239cfl_predict_hbd_fn get_predict_hbd_fn_c(TX_SIZE tx_size) {
240 (void)tx_size;
241 return cfl_build_prediction_hbd;
242}
243
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400244void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
Luc Trudeaud1941f32017-11-22 14:17:21 -0500245 TX_SIZE tx_size, int plane) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500246 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400247 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Luc Trudeaue425f472017-12-08 14:48:46 -0500248 assert(is_cfl_allowed(mbmi));
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400249
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400250 if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400251
David Michael Barrf6eaa152017-07-19 19:42:28 +0900252 const int alpha_q3 =
253 cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
David Michael Barrc363ab72018-01-12 16:53:38 +0900254 assert((tx_size_high[tx_size] - 1) * CFL_BUF_LINE + tx_size_wide[tx_size] <=
255 CFL_BUF_SQUARE);
Luc Trudeau6d3befb2017-10-02 13:52:22 -0400256 if (get_bitdepth_data_path_index(xd)) {
257 uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
David Michael Barrc363ab72018-01-12 16:53:38 +0900258 get_predict_hbd_fn(tx_size)(cfl->pred_buf_q3, dst_16, dst_stride, tx_size,
259 alpha_q3, xd->bd);
Luc Trudeau6d3befb2017-10-02 13:52:22 -0400260 return;
261 }
David Michael Barr16f38c22017-12-21 01:56:03 +0900262 get_predict_lbd_fn(tx_size)(cfl->pred_buf_q3, dst, dst_stride, tx_size,
263 alpha_q3);
Luc Trudeau4e26d662017-09-11 13:08:40 -0400264}
265
Luc Trudeau056d1f42017-09-15 17:38:14 -0400266static void cfl_luma_subsampling_420_lbd(const uint8_t *input, int input_stride,
267 int16_t *output_q3, int width,
268 int height) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400269 for (int j = 0; j < height; j++) {
270 for (int i = 0; i < width; i++) {
271 int top = i << 1;
272 int bot = top + input_stride;
273 output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
274 << 1;
275 }
276 input += input_stride << 1;
Luc Trudeau467205a2017-12-12 23:23:47 -0500277 output_q3 += CFL_BUF_LINE;
Luc Trudeau4e26d662017-09-11 13:08:40 -0400278 }
279}
280
Luc Trudeau9bd42782017-12-19 23:15:28 -0500281void cfl_luma_subsampling_422_lbd(const uint8_t *input, int input_stride,
282 int16_t *output_q3, int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500283 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeauc8323c02017-10-11 21:05:54 -0400284 for (int j = 0; j < height; j++) {
285 for (int i = 0; i < width; i++) {
286 int left = i << 1;
287 output_q3[i] = (input[left] + input[left + 1]) << 2;
288 }
289 input += input_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500290 output_q3 += CFL_BUF_LINE;
Luc Trudeauc8323c02017-10-11 21:05:54 -0400291 }
292}
293
Luc Trudeau9bd42782017-12-19 23:15:28 -0500294void cfl_luma_subsampling_440_lbd(const uint8_t *input, int input_stride,
295 int16_t *output_q3, int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500296 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeau06b47082017-10-31 10:42:36 -0400297 for (int j = 0; j < height; j++) {
298 for (int i = 0; i < width; i++) {
299 output_q3[i] = (input[i] + input[i + input_stride]) << 2;
300 }
301 input += input_stride << 1;
Luc Trudeau467205a2017-12-12 23:23:47 -0500302 output_q3 += CFL_BUF_LINE;
Luc Trudeau06b47082017-10-31 10:42:36 -0400303 }
304}
305
Luc Trudeau9bd42782017-12-19 23:15:28 -0500306void cfl_luma_subsampling_444_lbd(const uint8_t *input, int input_stride,
307 int16_t *output_q3, int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500308 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeau69d9e872017-09-15 20:40:47 -0400309 for (int j = 0; j < height; j++) {
310 for (int i = 0; i < width; i++) {
311 output_q3[i] = input[i] << 3;
312 }
313 input += input_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500314 output_q3 += CFL_BUF_LINE;
Luc Trudeau69d9e872017-09-15 20:40:47 -0400315 }
316}
317
Luc Trudeau9bd42782017-12-19 23:15:28 -0500318cfl_subsample_lbd_fn get_subsample_lbd_fn_c(int sub_x, int sub_y) {
319 static const cfl_subsample_lbd_fn subsample_lbd[2][2] = {
320 // (sub_y == 0, sub_x == 0) (sub_y == 0, sub_x == 1)
321 // (sub_y == 1, sub_x == 0) (sub_y == 1, sub_x == 1)
322 { cfl_luma_subsampling_444_lbd, cfl_luma_subsampling_422_lbd },
323 { cfl_luma_subsampling_440_lbd, cfl_luma_subsampling_420_lbd },
324 };
325 // AND sub_x and sub_y with 1 to ensures that an attacker won't be able to
326 // index the function pointer array out of bounds.
327 return subsample_lbd[sub_y & 1][sub_x & 1];
328}
Luc Trudeau43ed5712017-10-31 12:29:28 -0400329
Luc Trudeau056d1f42017-09-15 17:38:14 -0400330static void cfl_luma_subsampling_420_hbd(const uint16_t *input,
331 int input_stride, int16_t *output_q3,
332 int width, int height) {
Luc Trudeau056d1f42017-09-15 17:38:14 -0400333 for (int j = 0; j < height; j++) {
334 for (int i = 0; i < width; i++) {
335 int top = i << 1;
336 int bot = top + input_stride;
337 output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
338 << 1;
339 }
340 input += input_stride << 1;
Luc Trudeau467205a2017-12-12 23:23:47 -0500341 output_q3 += CFL_BUF_LINE;
Luc Trudeau056d1f42017-09-15 17:38:14 -0400342 }
343}
Luc Trudeau69d9e872017-09-15 20:40:47 -0400344
Luc Trudeauc8323c02017-10-11 21:05:54 -0400345static void cfl_luma_subsampling_422_hbd(const uint16_t *input,
346 int input_stride, int16_t *output_q3,
347 int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500348 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeauc8323c02017-10-11 21:05:54 -0400349 for (int j = 0; j < height; j++) {
350 for (int i = 0; i < width; i++) {
351 int left = i << 1;
352 output_q3[i] = (input[left] + input[left + 1]) << 2;
353 }
354 input += input_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500355 output_q3 += CFL_BUF_LINE;
Luc Trudeauc8323c02017-10-11 21:05:54 -0400356 }
357}
358
Luc Trudeau06b47082017-10-31 10:42:36 -0400359static void cfl_luma_subsampling_440_hbd(const uint16_t *input,
360 int input_stride, int16_t *output_q3,
361 int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500362 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeau06b47082017-10-31 10:42:36 -0400363 for (int j = 0; j < height; j++) {
364 for (int i = 0; i < width; i++) {
Luc Trudeau6acb3002017-11-02 14:09:28 -0400365 output_q3[i] = (input[i] + input[i + input_stride]) << 2;
Luc Trudeau06b47082017-10-31 10:42:36 -0400366 }
367 input += input_stride << 1;
Luc Trudeau467205a2017-12-12 23:23:47 -0500368 output_q3 += CFL_BUF_LINE;
Luc Trudeau06b47082017-10-31 10:42:36 -0400369 }
370}
371
Luc Trudeau69d9e872017-09-15 20:40:47 -0400372static void cfl_luma_subsampling_444_hbd(const uint16_t *input,
373 int input_stride, int16_t *output_q3,
374 int width, int height) {
Luc Trudeau467205a2017-12-12 23:23:47 -0500375 assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
Luc Trudeau69d9e872017-09-15 20:40:47 -0400376 for (int j = 0; j < height; j++) {
377 for (int i = 0; i < width; i++) {
378 output_q3[i] = input[i] << 3;
379 }
380 input += input_stride;
Luc Trudeau467205a2017-12-12 23:23:47 -0500381 output_q3 += CFL_BUF_LINE;
Luc Trudeau69d9e872017-09-15 20:40:47 -0400382 }
383}
Luc Trudeau43ed5712017-10-31 12:29:28 -0400384
385typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
386 int16_t *output_q3, int width, int height);
387
388static const cfl_subsample_hbd_fn subsample_hbd[2][2] = {
389 // (sub_y == 0, sub_x == 0) (sub_y == 0, sub_x == 1)
390 // (sub_y == 1, sub_x == 0) (sub_y == 1, sub_x == 1)
391 { cfl_luma_subsampling_444_hbd, cfl_luma_subsampling_422_hbd },
392 { cfl_luma_subsampling_440_hbd, cfl_luma_subsampling_420_hbd },
393};
Luc Trudeau056d1f42017-09-15 17:38:14 -0400394
Luc Trudeau43ed5712017-10-31 12:29:28 -0400395static void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride,
396 int row, int col, int width, int height, int use_hbd) {
Luc Trudeaue3980282017-04-25 23:17:21 -0400397 const int tx_off_log2 = tx_size_wide_log2[0];
Luc Trudeau4e26d662017-09-11 13:08:40 -0400398 const int sub_x = cfl->subsampling_x;
399 const int sub_y = cfl->subsampling_y;
400 const int store_row = row << (tx_off_log2 - sub_y);
401 const int store_col = col << (tx_off_log2 - sub_x);
402 const int store_height = height >> sub_y;
403 const int store_width = width >> sub_x;
Luc Trudeaue3980282017-04-25 23:17:21 -0400404
Luc Trudeau780d2492017-06-15 22:26:41 -0400405 // Invalidate current parameters
406 cfl->are_parameters_computed = 0;
Luc Trudeaue3980282017-04-25 23:17:21 -0400407
408 // Store the surface of the pixel buffer that was written to, this way we
409 // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
410 // frame boundary)
411 if (col == 0 && row == 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400412 cfl->buf_width = store_width;
413 cfl->buf_height = store_height;
Luc Trudeaue3980282017-04-25 23:17:21 -0400414 } else {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400415 cfl->buf_width = OD_MAXI(store_col + store_width, cfl->buf_width);
416 cfl->buf_height = OD_MAXI(store_row + store_height, cfl->buf_height);
Luc Trudeaue3980282017-04-25 23:17:21 -0400417 }
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400418
Luc Trudeau780d2492017-06-15 22:26:41 -0400419 // Check that we will remain inside the pixel buffer.
Luc Trudeau467205a2017-12-12 23:23:47 -0500420 assert(store_row + store_height <= CFL_BUF_LINE);
421 assert(store_col + store_width <= CFL_BUF_LINE);
Luc Trudeau780d2492017-06-15 22:26:41 -0400422
423 // Store the input into the CfL pixel buffer
Luc Trudeau4e26d662017-09-11 13:08:40 -0400424 int16_t *pred_buf_q3 =
Luc Trudeau467205a2017-12-12 23:23:47 -0500425 cfl->pred_buf_q3 + (store_row * CFL_BUF_LINE + store_col);
Luc Trudeau780d2492017-06-15 22:26:41 -0400426
Luc Trudeau43ed5712017-10-31 12:29:28 -0400427 if (use_hbd) {
428 const uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
429 // AND sub_x and sub_y with 1 to ensures that an attacker won't be able to
430 // index the function pointer array out of bounds.
431 subsample_hbd[sub_y & 1][sub_x & 1](input_16, input_stride, pred_buf_q3,
432 store_width, store_height);
433 return;
Luc Trudeau780d2492017-06-15 22:26:41 -0400434 }
Luc Trudeau43ed5712017-10-31 12:29:28 -0400435 (void)use_hbd;
Luc Trudeau9bd42782017-12-19 23:15:28 -0500436 get_subsample_lbd_fn(sub_x, sub_y)(input, input_stride, pred_buf_q3,
437 store_width, store_height);
Luc Trudeaue3980282017-04-25 23:17:21 -0400438}
Luc Trudeau4e26d662017-09-11 13:08:40 -0400439
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400440// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced
441// and non-chroma-referenced blocks are stored together in the CfL buffer.
442static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
443 int *col_out) {
444 // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s.
445 if ((cfl->mi_row & 0x01) && cfl->subsampling_y) {
446 assert(*row_out == 0);
447 (*row_out)++;
448 }
449
450 // Increment col index for right: 4x8, 4x16 or both right 4x4s.
451 if ((cfl->mi_col & 0x01) && cfl->subsampling_x) {
452 assert(*col_out == 0);
453 (*col_out)++;
454 }
455}
David Michael Barrd27f1e62018-01-11 23:03:30 +0900456#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400457// Since the chroma surface of sub8x8 block span across multiple luma blocks,
458// this function validates that the reconstructed luma area required to predict
459// the chroma block using CfL has been stored during the previous luma encode.
460//
461// Issue 1: Chroma intra prediction is not always performed after luma. One
462// such example is when luma RD cost is really high and the mode decision
463// algorithm decides to terminate instead of evaluating chroma.
464//
465// Issue 2: When multiple CfL predictions are computed for a given sub8x8
466// block. The reconstructed luma that belongs to the non-reference sub8x8
467// blocks must remain in the buffer (we cannot clear the buffer when we
468// compute the CfL prediction
469//
470// To resolve these issues, we increment the store_counter on each store. if
471// other sub8x8 blocks have already been coded and the counter corresponds to
472// the previous value they are also set to the current value. If a sub8x8 block
473// is not stored the store_counter won't match which will be detected when the
474// CfL parements are computed.
475static void sub8x8_set_val(CFL_CTX *cfl, int row, int col, TX_SIZE y_tx_size) {
476 const int y_tx_wide_unit = tx_size_wide_unit[y_tx_size];
477 const int y_tx_high_unit = tx_size_high_unit[y_tx_size];
478
479 // How many 4x4 are in tx_size
480 const int y_tx_unit_len = y_tx_wide_unit * y_tx_high_unit;
481 assert(y_tx_unit_len == 1 || y_tx_unit_len == 2 || y_tx_unit_len == 4);
482
483 // Invalidate other counters if (0,0)
484 const int is_first = row + col == 0;
485 cfl->store_counter += is_first ? 2 : 1;
486
487 const int inc =
488 (y_tx_wide_unit >= y_tx_high_unit) ? 1 : CFL_SUB8X8_VAL_MI_SIZE;
489 uint16_t *sub8x8_val = cfl->sub8x8_val + (row * CFL_SUB8X8_VAL_MI_SIZE + col);
490 for (int i = 0; i < y_tx_unit_len; i++) {
491 *sub8x8_val = cfl->store_counter;
492 sub8x8_val += inc;
493 }
494
495 if (!is_first) {
496 const uint16_t prev_store_counter = cfl->store_counter - 1;
497 int found = 0;
Linfeng Zhang7352ca72017-11-20 15:10:34 -0800498 (void)found;
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400499 sub8x8_val = cfl->sub8x8_val;
500 for (int y = 0; y < CFL_SUB8X8_VAL_MI_SIZE; y++) {
501 for (int x = 0; x < CFL_SUB8X8_VAL_MI_SIZE; x++) {
502 if (sub8x8_val[x] == prev_store_counter) {
503 sub8x8_val[x] = cfl->store_counter;
504 found = 1;
505 }
506 }
507 sub8x8_val += CFL_SUB8X8_VAL_MI_SIZE;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400508 }
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400509 // Something is wrong if (0,0) is missing
510 assert(found);
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400511 }
512}
David Michael Barrd27f1e62018-01-11 23:03:30 +0900513#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400514
515void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
516 BLOCK_SIZE bsize) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500517 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400518 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
519 uint8_t *dst =
520 &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
Luc Trudeaua8474b62017-12-07 12:13:45 -0500521
Luc Trudeaue425f472017-12-08 14:48:46 -0500522 assert(is_cfl_allowed(&xd->mi[0]->mbmi));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400523 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
524 // Only dimensions of size 4 can have an odd offset.
525 assert(!((col & 1) && tx_size_wide[tx_size] != 4));
526 assert(!((row & 1) && tx_size_high[tx_size] != 4));
527 sub8x8_adjust_offset(cfl, &row, &col);
David Michael Barrd27f1e62018-01-11 23:03:30 +0900528#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400529 sub8x8_set_val(cfl, row, col, tx_size);
David Michael Barrd27f1e62018-01-11 23:03:30 +0900530#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400531 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400532 cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
Luc Trudeau056d1f42017-09-15 17:38:14 -0400533 tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400534}
535
536void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500537 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400538 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
539 int row = 0;
540 int col = 0;
Luc Trudeaua8474b62017-12-07 12:13:45 -0500541
Luc Trudeaue425f472017-12-08 14:48:46 -0500542 assert(is_cfl_allowed(&xd->mi[0]->mbmi));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400543 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
544 sub8x8_adjust_offset(cfl, &row, &col);
David Michael Barrd27f1e62018-01-11 23:03:30 +0900545#if CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400546 // Point to the last transform block inside the partition.
547 const int off_row =
548 row + (mi_size_high[bsize] - tx_size_high_unit[tx_size]);
549 const int off_col =
550 col + (mi_size_wide[bsize] - tx_size_wide_unit[tx_size]);
551 sub8x8_set_val(cfl, off_row, off_col, tx_size);
David Michael Barrd27f1e62018-01-11 23:03:30 +0900552#endif // CONFIG_DEBUG && !CONFIG_RECT_TX_EXT_INTRA
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400553 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400554 const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
555 const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
Luc Trudeau056d1f42017-09-15 17:38:14 -0400556 cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height,
557 get_bitdepth_data_path_index(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400558}