blob: 29a7d0d45509938afb10da0cdc812c62689d2229 [file] [log] [blame]
Luc Trudeauf8164152017-04-11 16:20:51 -04001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12#include "av1/common/cfl.h"
13#include "av1/common/common_data.h"
Luc Trudeaubaeb3752017-04-24 11:19:25 -040014#include "av1/common/onyxc_int.h"
15
Luc Trudeaudac5e392017-06-05 15:52:02 -040016void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
Luc Trudeau06b47082017-10-31 10:42:36 -040017 if ((cm->subsampling_x != 0 && cm->subsampling_x != 1) ||
18 (cm->subsampling_y != 0 && cm->subsampling_y != 1)) {
19 aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
20 "Only 4:4:4, 4:4:0, 4:2:2 and 4:2:0 are currently "
21 "supported by CfL, %d %d "
22 "subsampling is not supported.\n",
23 cm->subsampling_x, cm->subsampling_y);
Luc Trudeaubaeb3752017-04-24 11:19:25 -040024 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040025 memset(&cfl->pred_buf_q3, 0, sizeof(cfl->pred_buf_q3));
Luc Trudeaudac5e392017-06-05 15:52:02 -040026 cfl->subsampling_x = cm->subsampling_x;
27 cfl->subsampling_y = cm->subsampling_y;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040028 cfl->are_parameters_computed = 0;
Luc Trudeaufcca37a2017-08-14 15:05:07 -040029 cfl->store_y = 0;
Hui Su9fa96232017-10-23 15:46:04 -070030#if CONFIG_DEBUG
Luc Trudeauc84c21c2017-07-25 19:40:34 -040031 cfl_clear_sub8x8_val(cfl);
Luc Trudeauc7af36d2017-10-11 21:01:00 -040032 cfl->store_counter = 0;
33 cfl->last_compute_counter = 0;
Hui Su9fa96232017-10-23 15:46:04 -070034#endif // CONFIG_DEBUG
Luc Trudeau3dc55e02017-06-22 14:03:47 -040035}
36
Luc Trudeau4e26d662017-09-11 13:08:40 -040037// Due to frame boundary issues, it is possible that the total area covered by
38// chroma exceeds that of luma. When this happens, we fill the missing pixels by
39// repeating the last columns and/or rows.
40static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
41 const int diff_width = width - cfl->buf_width;
42 const int diff_height = height - cfl->buf_height;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040043
44 if (diff_width > 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -040045 const int min_height = height - diff_height;
46 int16_t *pred_buf_q3 = cfl->pred_buf_q3 + (width - diff_width);
47 for (int j = 0; j < min_height; j++) {
Luc Trudeaue67377b2017-10-31 16:08:05 -040048 const int16_t last_pixel = pred_buf_q3[-1];
Luc Trudeau3dc55e02017-06-22 14:03:47 -040049 for (int i = 0; i < diff_width; i++) {
Luc Trudeau4e26d662017-09-11 13:08:40 -040050 pred_buf_q3[i] = last_pixel;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040051 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040052 pred_buf_q3 += MAX_SB_SIZE;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040053 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040054 cfl->buf_width = width;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040055 }
Luc Trudeau3dc55e02017-06-22 14:03:47 -040056 if (diff_height > 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -040057 int16_t *pred_buf_q3 =
58 cfl->pred_buf_q3 + ((height - diff_height) * MAX_SB_SIZE);
Luc Trudeau3dc55e02017-06-22 14:03:47 -040059 for (int j = 0; j < diff_height; j++) {
Luc Trudeau4e26d662017-09-11 13:08:40 -040060 const int16_t *last_row_q3 = pred_buf_q3 - MAX_SB_SIZE;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040061 for (int i = 0; i < width; i++) {
Luc Trudeau4e26d662017-09-11 13:08:40 -040062 pred_buf_q3[i] = last_row_q3[i];
Luc Trudeau3dc55e02017-06-22 14:03:47 -040063 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040064 pred_buf_q3 += MAX_SB_SIZE;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040065 }
Luc Trudeau4e26d662017-09-11 13:08:40 -040066 cfl->buf_height = height;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040067 }
Luc Trudeaubaeb3752017-04-24 11:19:25 -040068}
Luc Trudeauf8164152017-04-11 16:20:51 -040069
Luc Trudeau593d02c2017-09-08 11:29:37 -040070static void cfl_subtract_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
Luc Trudeau3dc55e02017-06-22 14:03:47 -040071 const int width = cfl->uv_width;
72 const int height = cfl->uv_height;
Yaowu Xua73bdb02017-11-23 15:31:38 -080073 const int tx_height = tx_size_high[tx_size];
74 const int tx_width = tx_size_wide[tx_size];
75 const int block_row_stride = MAX_SB_SIZE << tx_size_high_log2[tx_size];
76 const int num_pel_log2 =
77 (tx_size_high_log2[tx_size] + tx_size_wide_log2[tx_size]);
Luc Trudeau4e26d662017-09-11 13:08:40 -040078 int16_t *pred_buf_q3 = cfl->pred_buf_q3;
Luc Trudeau3dc55e02017-06-22 14:03:47 -040079
Luc Trudeau4e26d662017-09-11 13:08:40 -040080 cfl_pad(cfl, width, height);
81
Yaowu Xua73bdb02017-11-23 15:31:38 -080082 for (int b_j = 0; b_j < height; b_j += tx_height) {
83 for (int b_i = 0; b_i < width; b_i += tx_width) {
Luc Trudeaua0af3b52017-09-06 13:37:33 -040084 int sum_q3 = 0;
Yaowu Xua73bdb02017-11-23 15:31:38 -080085 int16_t *tx_pred_buf_q3 = pred_buf_q3;
86 for (int t_j = 0; t_j < tx_height; t_j++) {
87 for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
88 sum_q3 += tx_pred_buf_q3[t_i];
Luc Trudeau03678942017-06-12 17:33:19 -040089 }
Yaowu Xua73bdb02017-11-23 15:31:38 -080090 tx_pred_buf_q3 += MAX_SB_SIZE;
Luc Trudeau03678942017-06-12 17:33:19 -040091 }
Luc Trudeau593d02c2017-09-08 11:29:37 -040092 int avg_q3 = (sum_q3 + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
Luc Trudeau475fc9d2017-07-04 16:51:14 -040093 // Loss is never more than 1/2 (in Q3)
Yaowu Xu2a91ab72017-11-06 15:12:17 -080094 assert(abs((avg_q3 * (1 << num_pel_log2)) - sum_q3) <=
95 1 << num_pel_log2 >> 1);
Luc Trudeau593d02c2017-09-08 11:29:37 -040096
Yaowu Xua73bdb02017-11-23 15:31:38 -080097 tx_pred_buf_q3 = pred_buf_q3;
98 for (int t_j = 0; t_j < tx_height; t_j++) {
99 for (int t_i = b_i; t_i < b_i + tx_width; t_i++) {
100 tx_pred_buf_q3[t_i] -= avg_q3;
Luc Trudeau593d02c2017-09-08 11:29:37 -0400101 }
102
Yaowu Xua73bdb02017-11-23 15:31:38 -0800103 tx_pred_buf_q3 += MAX_SB_SIZE;
Luc Trudeau593d02c2017-09-08 11:29:37 -0400104 }
Luc Trudeau3e18e4a2017-06-13 13:54:14 -0400105 }
Luc Trudeau4e26d662017-09-11 13:08:40 -0400106 pred_buf_q3 += block_row_stride;
Luc Trudeau3e18e4a2017-06-13 13:54:14 -0400107 }
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400108}
109
David Michael Barrf6eaa152017-07-19 19:42:28 +0900110static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
Luc Trudeau4e81d922017-07-05 17:17:06 -0400111 CFL_PRED_TYPE pred_type) {
David Michael Barrf6eaa152017-07-19 19:42:28 +0900112 const int alpha_sign = (pred_type == CFL_PRED_U) ? CFL_SIGN_U(joint_sign)
113 : CFL_SIGN_V(joint_sign);
114 if (alpha_sign == CFL_SIGN_ZERO) return 0;
115 const int abs_alpha_q3 =
116 (pred_type == CFL_PRED_U) ? CFL_IDX_U(alpha_idx) : CFL_IDX_V(alpha_idx);
117 return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
Luc Trudeau3e18e4a2017-06-13 13:54:14 -0400118}
119
Luc Trudeau056d1f42017-09-15 17:38:14 -0400120static void cfl_build_prediction_lbd(const int16_t *pred_buf_q3, uint8_t *dst,
121 int dst_stride, int width, int height,
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400122 int alpha_q3) {
Luc Trudeau67914b52017-09-14 17:13:28 -0400123 for (int j = 0; j < height; j++) {
124 for (int i = 0; i < width; i++) {
125 dst[i] =
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400126 clip_pixel(get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dst[i]);
Luc Trudeau67914b52017-09-14 17:13:28 -0400127 }
128 dst += dst_stride;
129 pred_buf_q3 += MAX_SB_SIZE;
130 }
131}
132
Luc Trudeau056d1f42017-09-15 17:38:14 -0400133#if CONFIG_HIGHBITDEPTH
134static void cfl_build_prediction_hbd(const int16_t *pred_buf_q3, uint16_t *dst,
135 int dst_stride, int width, int height,
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400136 int alpha_q3, int bit_depth) {
Luc Trudeau056d1f42017-09-15 17:38:14 -0400137 for (int j = 0; j < height; j++) {
138 for (int i = 0; i < width; i++) {
139 dst[i] = clip_pixel_highbd(
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400140 get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dst[i], bit_depth);
Luc Trudeau056d1f42017-09-15 17:38:14 -0400141 }
142 dst += dst_stride;
143 pred_buf_q3 += MAX_SB_SIZE;
144 }
145}
146#endif // CONFIG_HIGHBITDEPTH
147
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400148static void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500149 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400150 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
151
152 // Do not call cfl_compute_parameters multiple time on the same values.
153 assert(cfl->are_parameters_computed == 0);
154
155 const BLOCK_SIZE plane_bsize = AOMMAX(
156 BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
157#if CONFIG_DEBUG
158 BLOCK_SIZE bsize = mbmi->sb_type;
159 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
160 const uint16_t compute_counter = cfl->sub8x8_val[0];
161 assert(compute_counter != cfl->last_compute_counter);
162 bsize = scale_chroma_bsize(bsize, cfl->subsampling_x, cfl->subsampling_y);
163 const int val_wide = mi_size_wide[bsize];
164 const int val_high = mi_size_high[bsize];
165 assert(val_wide <= CFL_SUB8X8_VAL_MI_SIZE);
166 assert(val_high <= CFL_SUB8X8_VAL_MI_SIZE);
167 for (int val_r = 0; val_r < val_high; val_r++) {
168 for (int val_c = 0; val_c < val_wide; val_c++) {
169 // If all counters in the validation buffer are equal then they are all
170 // related to the same chroma reference block.
171 assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] ==
172 compute_counter);
173 }
174 }
175 cfl->last_compute_counter = compute_counter;
176 }
177#endif // CONFIG_DEBUG
178
179 // AOM_PLANE_U is used, but both planes will have the same sizes.
180 cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
181 cfl->uv_height =
182 max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
183
184 cfl_subtract_averages(cfl, tx_size);
185 cfl->are_parameters_computed = 1;
186}
187
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400188void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
Luc Trudeaud1941f32017-11-22 14:17:21 -0500189 TX_SIZE tx_size, int plane) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500190 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400191 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Luc Trudeaud1941f32017-11-22 14:17:21 -0500192 assert(is_cfl_allowed(xd));
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400193
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400194 if (!cfl->are_parameters_computed) cfl_compute_parameters(xd, tx_size);
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400195
David Michael Barrf6eaa152017-07-19 19:42:28 +0900196 const int alpha_q3 =
197 cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
Luc Trudeau6d3befb2017-10-02 13:52:22 -0400198#if CONFIG_HIGHBITDEPTH
199 if (get_bitdepth_data_path_index(xd)) {
200 uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
Luc Trudeaud1941f32017-11-22 14:17:21 -0500201 cfl_build_prediction_hbd(cfl->pred_buf_q3, dst_16, dst_stride,
Luc Trudeau6d3befb2017-10-02 13:52:22 -0400202 tx_size_wide[tx_size], tx_size_high[tx_size],
Luc Trudeau8e232aa2017-10-18 12:12:05 -0400203 alpha_q3, xd->bd);
Luc Trudeau6d3befb2017-10-02 13:52:22 -0400204 return;
205 }
206#endif // CONFIG_HIGHBITDEPTH
Luc Trudeaud1941f32017-11-22 14:17:21 -0500207 cfl_build_prediction_lbd(cfl->pred_buf_q3, dst, dst_stride,
208 tx_size_wide[tx_size], tx_size_high[tx_size],
209 alpha_q3);
Luc Trudeau4e26d662017-09-11 13:08:40 -0400210}
211
Luc Trudeau056d1f42017-09-15 17:38:14 -0400212static void cfl_luma_subsampling_420_lbd(const uint8_t *input, int input_stride,
213 int16_t *output_q3, int width,
214 int height) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400215 for (int j = 0; j < height; j++) {
216 for (int i = 0; i < width; i++) {
217 int top = i << 1;
218 int bot = top + input_stride;
219 output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
220 << 1;
221 }
222 input += input_stride << 1;
223 output_q3 += MAX_SB_SIZE;
224 }
225}
226
Luc Trudeauc8323c02017-10-11 21:05:54 -0400227static void cfl_luma_subsampling_422_lbd(const uint8_t *input, int input_stride,
228 int16_t *output_q3, int width,
229 int height) {
230 for (int j = 0; j < height; j++) {
231 for (int i = 0; i < width; i++) {
232 int left = i << 1;
233 output_q3[i] = (input[left] + input[left + 1]) << 2;
234 }
235 input += input_stride;
236 output_q3 += MAX_SB_SIZE;
237 }
238}
239
Luc Trudeau06b47082017-10-31 10:42:36 -0400240static void cfl_luma_subsampling_440_lbd(const uint8_t *input, int input_stride,
241 int16_t *output_q3, int width,
242 int height) {
243 for (int j = 0; j < height; j++) {
244 for (int i = 0; i < width; i++) {
245 output_q3[i] = (input[i] + input[i + input_stride]) << 2;
246 }
247 input += input_stride << 1;
248 output_q3 += MAX_SB_SIZE;
249 }
250}
251
Luc Trudeau69d9e872017-09-15 20:40:47 -0400252static void cfl_luma_subsampling_444_lbd(const uint8_t *input, int input_stride,
253 int16_t *output_q3, int width,
254 int height) {
255 for (int j = 0; j < height; j++) {
256 for (int i = 0; i < width; i++) {
257 output_q3[i] = input[i] << 3;
258 }
259 input += input_stride;
260 output_q3 += MAX_SB_SIZE;
261 }
262}
263
Luc Trudeau43ed5712017-10-31 12:29:28 -0400264typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
265 int16_t *output_q3, int width, int height);
266
267static const cfl_subsample_lbd_fn subsample_lbd[2][2] = {
268 // (sub_y == 0, sub_x == 0) (sub_y == 0, sub_x == 1)
269 // (sub_y == 1, sub_x == 0) (sub_y == 1, sub_x == 1)
270 { cfl_luma_subsampling_444_lbd, cfl_luma_subsampling_422_lbd },
271 { cfl_luma_subsampling_440_lbd, cfl_luma_subsampling_420_lbd },
272};
273
Luc Trudeau056d1f42017-09-15 17:38:14 -0400274#if CONFIG_HIGHBITDEPTH
275static void cfl_luma_subsampling_420_hbd(const uint16_t *input,
276 int input_stride, int16_t *output_q3,
277 int width, int height) {
278 for (int j = 0; j < height; j++) {
279 for (int i = 0; i < width; i++) {
280 int top = i << 1;
281 int bot = top + input_stride;
282 output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
283 << 1;
284 }
285 input += input_stride << 1;
286 output_q3 += MAX_SB_SIZE;
287 }
288}
Luc Trudeau69d9e872017-09-15 20:40:47 -0400289
Luc Trudeauc8323c02017-10-11 21:05:54 -0400290static void cfl_luma_subsampling_422_hbd(const uint16_t *input,
291 int input_stride, int16_t *output_q3,
292 int width, int height) {
293 for (int j = 0; j < height; j++) {
294 for (int i = 0; i < width; i++) {
295 int left = i << 1;
296 output_q3[i] = (input[left] + input[left + 1]) << 2;
297 }
298 input += input_stride;
299 output_q3 += MAX_SB_SIZE;
300 }
301}
302
Luc Trudeau06b47082017-10-31 10:42:36 -0400303static void cfl_luma_subsampling_440_hbd(const uint16_t *input,
304 int input_stride, int16_t *output_q3,
305 int width, int height) {
306 for (int j = 0; j < height; j++) {
307 for (int i = 0; i < width; i++) {
Luc Trudeau6acb3002017-11-02 14:09:28 -0400308 output_q3[i] = (input[i] + input[i + input_stride]) << 2;
Luc Trudeau06b47082017-10-31 10:42:36 -0400309 }
310 input += input_stride << 1;
311 output_q3 += MAX_SB_SIZE;
312 }
313}
314
Luc Trudeau69d9e872017-09-15 20:40:47 -0400315static void cfl_luma_subsampling_444_hbd(const uint16_t *input,
316 int input_stride, int16_t *output_q3,
317 int width, int height) {
318 for (int j = 0; j < height; j++) {
319 for (int i = 0; i < width; i++) {
320 output_q3[i] = input[i] << 3;
321 }
322 input += input_stride;
323 output_q3 += MAX_SB_SIZE;
324 }
325}
Luc Trudeau43ed5712017-10-31 12:29:28 -0400326
327typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
328 int16_t *output_q3, int width, int height);
329
330static const cfl_subsample_hbd_fn subsample_hbd[2][2] = {
331 // (sub_y == 0, sub_x == 0) (sub_y == 0, sub_x == 1)
332 // (sub_y == 1, sub_x == 0) (sub_y == 1, sub_x == 1)
333 { cfl_luma_subsampling_444_hbd, cfl_luma_subsampling_422_hbd },
334 { cfl_luma_subsampling_440_hbd, cfl_luma_subsampling_420_hbd },
335};
Luc Trudeau056d1f42017-09-15 17:38:14 -0400336#endif // CONFIG_HIGHBITDEPTH
337
Luc Trudeau43ed5712017-10-31 12:29:28 -0400338static void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride,
339 int row, int col, int width, int height, int use_hbd) {
Luc Trudeaue3980282017-04-25 23:17:21 -0400340 const int tx_off_log2 = tx_size_wide_log2[0];
Luc Trudeau4e26d662017-09-11 13:08:40 -0400341 const int sub_x = cfl->subsampling_x;
342 const int sub_y = cfl->subsampling_y;
343 const int store_row = row << (tx_off_log2 - sub_y);
344 const int store_col = col << (tx_off_log2 - sub_x);
345 const int store_height = height >> sub_y;
346 const int store_width = width >> sub_x;
Luc Trudeaue3980282017-04-25 23:17:21 -0400347
Luc Trudeau780d2492017-06-15 22:26:41 -0400348 // Invalidate current parameters
349 cfl->are_parameters_computed = 0;
Luc Trudeaue3980282017-04-25 23:17:21 -0400350
351 // Store the surface of the pixel buffer that was written to, this way we
352 // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
353 // frame boundary)
354 if (col == 0 && row == 0) {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400355 cfl->buf_width = store_width;
356 cfl->buf_height = store_height;
Luc Trudeaue3980282017-04-25 23:17:21 -0400357 } else {
Luc Trudeau4e26d662017-09-11 13:08:40 -0400358 cfl->buf_width = OD_MAXI(store_col + store_width, cfl->buf_width);
359 cfl->buf_height = OD_MAXI(store_row + store_height, cfl->buf_height);
Luc Trudeaue3980282017-04-25 23:17:21 -0400360 }
Luc Trudeau3dc55e02017-06-22 14:03:47 -0400361
Luc Trudeau780d2492017-06-15 22:26:41 -0400362 // Check that we will remain inside the pixel buffer.
Luc Trudeau4e26d662017-09-11 13:08:40 -0400363 assert(store_row + store_height <= MAX_SB_SIZE);
364 assert(store_col + store_width <= MAX_SB_SIZE);
Luc Trudeau780d2492017-06-15 22:26:41 -0400365
366 // Store the input into the CfL pixel buffer
Luc Trudeau4e26d662017-09-11 13:08:40 -0400367 int16_t *pred_buf_q3 =
368 cfl->pred_buf_q3 + (store_row * MAX_SB_SIZE + store_col);
Luc Trudeau780d2492017-06-15 22:26:41 -0400369
Luc Trudeau43ed5712017-10-31 12:29:28 -0400370#if CONFIG_HIGHBITDEPTH
371 if (use_hbd) {
372 const uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
373 // AND sub_x and sub_y with 1 to ensures that an attacker won't be able to
374 // index the function pointer array out of bounds.
375 subsample_hbd[sub_y & 1][sub_x & 1](input_16, input_stride, pred_buf_q3,
376 store_width, store_height);
377 return;
Luc Trudeau780d2492017-06-15 22:26:41 -0400378 }
Luc Trudeau43ed5712017-10-31 12:29:28 -0400379#endif // CONFIG_HIGHBITDEPTH
380 (void)use_hbd;
381 // AND sub_x and sub_y with 1 to ensures that an attacker won't be able to
382 // index the function pointer array out of bounds.
383 subsample_lbd[sub_y & 1][sub_x & 1](input, input_stride, pred_buf_q3,
384 store_width, store_height);
Luc Trudeaue3980282017-04-25 23:17:21 -0400385}
Luc Trudeau4e26d662017-09-11 13:08:40 -0400386
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400387// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced
388// and non-chroma-referenced blocks are stored together in the CfL buffer.
389static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
390 int *col_out) {
391 // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s.
392 if ((cfl->mi_row & 0x01) && cfl->subsampling_y) {
393 assert(*row_out == 0);
394 (*row_out)++;
395 }
396
397 // Increment col index for right: 4x8, 4x16 or both right 4x4s.
398 if ((cfl->mi_col & 0x01) && cfl->subsampling_x) {
399 assert(*col_out == 0);
400 (*col_out)++;
401 }
402}
403#if CONFIG_DEBUG
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400404// Since the chroma surface of sub8x8 block span across multiple luma blocks,
405// this function validates that the reconstructed luma area required to predict
406// the chroma block using CfL has been stored during the previous luma encode.
407//
408// Issue 1: Chroma intra prediction is not always performed after luma. One
409// such example is when luma RD cost is really high and the mode decision
410// algorithm decides to terminate instead of evaluating chroma.
411//
412// Issue 2: When multiple CfL predictions are computed for a given sub8x8
413// block. The reconstructed luma that belongs to the non-reference sub8x8
414// blocks must remain in the buffer (we cannot clear the buffer when we
415// compute the CfL prediction
416//
417// To resolve these issues, we increment the store_counter on each store. if
418// other sub8x8 blocks have already been coded and the counter corresponds to
419// the previous value they are also set to the current value. If a sub8x8 block
420// is not stored the store_counter won't match which will be detected when the
421// CfL parements are computed.
422static void sub8x8_set_val(CFL_CTX *cfl, int row, int col, TX_SIZE y_tx_size) {
423 const int y_tx_wide_unit = tx_size_wide_unit[y_tx_size];
424 const int y_tx_high_unit = tx_size_high_unit[y_tx_size];
425
426 // How many 4x4 are in tx_size
427 const int y_tx_unit_len = y_tx_wide_unit * y_tx_high_unit;
428 assert(y_tx_unit_len == 1 || y_tx_unit_len == 2 || y_tx_unit_len == 4);
429
430 // Invalidate other counters if (0,0)
431 const int is_first = row + col == 0;
432 cfl->store_counter += is_first ? 2 : 1;
433
434 const int inc =
435 (y_tx_wide_unit >= y_tx_high_unit) ? 1 : CFL_SUB8X8_VAL_MI_SIZE;
436 uint16_t *sub8x8_val = cfl->sub8x8_val + (row * CFL_SUB8X8_VAL_MI_SIZE + col);
437 for (int i = 0; i < y_tx_unit_len; i++) {
438 *sub8x8_val = cfl->store_counter;
439 sub8x8_val += inc;
440 }
441
442 if (!is_first) {
443 const uint16_t prev_store_counter = cfl->store_counter - 1;
444 int found = 0;
Linfeng Zhang7352ca72017-11-20 15:10:34 -0800445 (void)found;
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400446 sub8x8_val = cfl->sub8x8_val;
447 for (int y = 0; y < CFL_SUB8X8_VAL_MI_SIZE; y++) {
448 for (int x = 0; x < CFL_SUB8X8_VAL_MI_SIZE; x++) {
449 if (sub8x8_val[x] == prev_store_counter) {
450 sub8x8_val[x] = cfl->store_counter;
451 found = 1;
452 }
453 }
454 sub8x8_val += CFL_SUB8X8_VAL_MI_SIZE;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400455 }
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400456 // Something is wrong if (0,0) is missing
457 assert(found);
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400458 }
459}
460#endif // CONFIG_DEBUG
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400461
462void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
463 BLOCK_SIZE bsize) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500464 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400465 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
466 uint8_t *dst =
467 &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
Luc Trudeaua8474b62017-12-07 12:13:45 -0500468
Luc Trudeaud1941f32017-11-22 14:17:21 -0500469 assert(is_cfl_allowed(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400470 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
471 // Only dimensions of size 4 can have an odd offset.
472 assert(!((col & 1) && tx_size_wide[tx_size] != 4));
473 assert(!((row & 1) && tx_size_high[tx_size] != 4));
474 sub8x8_adjust_offset(cfl, &row, &col);
475#if CONFIG_DEBUG
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400476 sub8x8_set_val(cfl, row, col, tx_size);
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400477#endif // CONFIG_DEBUG
478 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400479 cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
Luc Trudeau056d1f42017-09-15 17:38:14 -0400480 tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400481}
482
483void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
Luc Trudeau1e84af52017-11-25 15:00:28 -0500484 CFL_CTX *const cfl = &xd->cfl;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400485 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
486 int row = 0;
487 int col = 0;
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400488 bsize = AOMMAX(BLOCK_4X4, bsize);
Luc Trudeaua8474b62017-12-07 12:13:45 -0500489
Luc Trudeaud1941f32017-11-22 14:17:21 -0500490 assert(is_cfl_allowed(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400491 if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
492 sub8x8_adjust_offset(cfl, &row, &col);
493#if CONFIG_DEBUG
Luc Trudeauc7af36d2017-10-11 21:01:00 -0400494 // Point to the last transform block inside the partition.
495 const int off_row =
496 row + (mi_size_high[bsize] - tx_size_high_unit[tx_size]);
497 const int off_col =
498 col + (mi_size_wide[bsize] - tx_size_wide_unit[tx_size]);
499 sub8x8_set_val(cfl, off_row, off_col, tx_size);
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400500#endif // CONFIG_DEBUG
501 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400502 const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
503 const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
Luc Trudeau056d1f42017-09-15 17:38:14 -0400504 cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height,
505 get_bitdepth_data_path_index(xd));
Luc Trudeaub05eeae2017-08-18 15:14:30 -0400506}