blob: a48246c2a94f9e92ff9227b3fb8746aa2c69e4cf [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu9c01aa12016-09-01 14:32:49 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu9c01aa12016-09-01 14:32:49 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11#include <stdlib.h>
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -070012#include <string.h>
13#include <assert.h>
Yaowu Xuc27fc142016-08-22 16:08:15 -070014
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_config.h"
16#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017
18#include "aom_ports/mem.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom/aom_integer.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070020
21#include "aom_dsp/variance.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070022#include "aom_dsp/aom_filter.h"
David Barkerc155e012017-05-11 13:54:54 +010023#include "aom_dsp/blend.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070024
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -070025#include "./av1_rtcd.h"
26#include "av1/common/filter.h"
27
Yaowu Xuf883b422016-08-30 14:01:10 -070028uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
Yaowu Xuc27fc142016-08-22 16:08:15 -070029 int b_stride) {
30 int distortion = 0;
31 int r, c;
32
33 for (r = 0; r < 4; ++r) {
34 for (c = 0; c < 4; ++c) {
35 int diff = a[c] - b[c];
36 distortion += diff * diff;
37 }
38
39 a += a_stride;
40 b += b_stride;
41 }
42
43 return distortion;
44}
45
Yaowu Xuf883b422016-08-30 14:01:10 -070046uint32_t aom_get_mb_ss_c(const int16_t *a) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070047 unsigned int i, sum = 0;
48
49 for (i = 0; i < 256; ++i) {
50 sum += a[i] * a[i];
51 }
52
53 return sum;
54}
55
Yaowu Xuf883b422016-08-30 14:01:10 -070056uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -070057 const uint8_t *b, int b_stride,
58 uint32_t *sse) {
Yaowu Xuf883b422016-08-30 14:01:10 -070059 return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -070060}
61
Yaowu Xuf883b422016-08-30 14:01:10 -070062uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -070063 const uint8_t *b, int b_stride,
64 uint32_t *sse) {
Yaowu Xuf883b422016-08-30 14:01:10 -070065 return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -070066}
67
Yaowu Xuf883b422016-08-30 14:01:10 -070068uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -070069 const uint8_t *b, int b_stride,
70 uint32_t *sse) {
Yaowu Xuf883b422016-08-30 14:01:10 -070071 return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -070072}
73
74static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
75 int b_stride, int w, int h, uint32_t *sse, int *sum) {
76 int i, j;
77
78 *sum = 0;
79 *sse = 0;
80
81 for (i = 0; i < h; ++i) {
82 for (j = 0; j < w; ++j) {
83 const int diff = a[j] - b[j];
84 *sum += diff;
85 *sse += diff * diff;
86 }
87
88 a += a_stride;
89 b += b_stride;
90 }
91}
92
Alex Converse29608d82017-03-30 09:36:58 -070093uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
94 int b_stride, int w, int h) {
95 uint32_t sse;
96 int sum;
97 variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
98 return sse;
99}
100
Yaowu Xuc27fc142016-08-22 16:08:15 -0700101// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
102// or vertical direction to produce the filtered output block. Used to implement
103// the first-pass of 2-D separable filter.
104//
105// Produces int16_t output to retain precision for the next pass. Two filter
106// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
107// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
108// It defines the offset required to move from one input to the next.
Cheng Chend2864432017-11-17 17:59:24 -0800109void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a, uint16_t *b,
110 unsigned int src_pixels_per_line,
111 unsigned int pixel_step,
112 unsigned int output_height,
113 unsigned int output_width,
114 const uint8_t *filter) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700115 unsigned int i, j;
116
117 for (i = 0; i < output_height; ++i) {
118 for (j = 0; j < output_width; ++j) {
119 b[j] = ROUND_POWER_OF_TWO(
120 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
121
122 ++a;
123 }
124
125 a += src_pixels_per_line - output_width;
126 b += output_width;
127 }
128}
129
130// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
131// or vertical direction to produce the filtered output block. Used to implement
132// the second-pass of 2-D separable filter.
133//
134// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
135// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
136// filter is applied horizontally (pixel_step = 1) or vertically
137// (pixel_step = stride). It defines the offset required to move from one input
138// to the next. Output is 8-bit.
Cheng Chend2864432017-11-17 17:59:24 -0800139void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a, uint8_t *b,
140 unsigned int src_pixels_per_line,
141 unsigned int pixel_step,
142 unsigned int output_height,
143 unsigned int output_width,
144 const uint8_t *filter) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700145 unsigned int i, j;
146
147 for (i = 0; i < output_height; ++i) {
148 for (j = 0; j < output_width; ++j) {
149 b[j] = ROUND_POWER_OF_TWO(
150 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
151 ++a;
152 }
153
154 a += src_pixels_per_line - output_width;
155 b += output_width;
156 }
157}
158
159#define VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700160 uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700161 const uint8_t *b, int b_stride, \
162 uint32_t *sse) { \
163 int sum; \
164 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
Alex Converse2176b7a2016-07-28 09:48:50 -0700165 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700166 }
167
Cheng Chend2864432017-11-17 17:59:24 -0800168#define SUBPIX_VAR(W, H) \
169 uint32_t aom_sub_pixel_variance##W##x##H##_c( \
170 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
171 const uint8_t *b, int b_stride, uint32_t *sse) { \
172 uint16_t fdata3[(H + 1) * W]; \
173 uint8_t temp2[H * W]; \
174 \
175 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
176 bilinear_filters_2t[xoffset]); \
177 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
178 bilinear_filters_2t[yoffset]); \
179 \
180 return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700181 }
182
Cheng Chend2864432017-11-17 17:59:24 -0800183#define SUBPIX_AVG_VAR(W, H) \
184 uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
185 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
186 const uint8_t *b, int b_stride, uint32_t *sse, \
187 const uint8_t *second_pred) { \
188 uint16_t fdata3[(H + 1) * W]; \
189 uint8_t temp2[H * W]; \
190 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
191 \
192 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
193 bilinear_filters_2t[xoffset]); \
194 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
195 bilinear_filters_2t[yoffset]); \
196 \
197 aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
198 \
199 return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
200 } \
201 uint32_t aom_jnt_sub_pixel_avg_variance##W##x##H##_c( \
202 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
203 const uint8_t *b, int b_stride, uint32_t *sse, \
204 const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
205 uint16_t fdata3[(H + 1) * W]; \
206 uint8_t temp2[H * W]; \
207 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
208 \
209 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
210 bilinear_filters_2t[xoffset]); \
211 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
212 bilinear_filters_2t[yoffset]); \
213 \
214 aom_jnt_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
215 \
216 return aom_variance##W##x##H(temp3, W, b, b_stride, sse); \
Cheng Chenf78632e2017-10-20 15:30:51 -0700217 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700218
219/* Identical to the variance call except it takes an additional parameter, sum,
220 * and returns that value using pass-by-reference instead of returning
221 * sse - sum^2 / w*h
222 */
223#define GET_VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700224 void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700225 const uint8_t *b, int b_stride, uint32_t *sse, \
226 int *sum) { \
227 variance(a, a_stride, b, b_stride, W, H, sse, sum); \
228 }
229
230/* Identical to the variance call except it does not calculate the
231 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
232 * variable.
233 */
234#define MSE(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700235 uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700236 const uint8_t *b, int b_stride, \
237 uint32_t *sse) { \
238 int sum; \
239 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
240 return *sse; \
241 }
242
243/* All three forms of the variance are available in the same sizes. */
244#define VARIANCES(W, H) \
245 VAR(W, H) \
246 SUBPIX_VAR(W, H) \
247 SUBPIX_AVG_VAR(W, H)
248
Debargha Mukherjee2ccf4b92018-02-27 17:30:46 -0800249#if CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -0700250VARIANCES(128, 128)
251VARIANCES(128, 64)
252VARIANCES(64, 128)
Debargha Mukherjee2ccf4b92018-02-27 17:30:46 -0800253#endif // CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -0700254VARIANCES(64, 64)
255VARIANCES(64, 32)
256VARIANCES(32, 64)
257VARIANCES(32, 32)
258VARIANCES(32, 16)
259VARIANCES(16, 32)
260VARIANCES(16, 16)
261VARIANCES(16, 8)
262VARIANCES(8, 16)
263VARIANCES(8, 8)
264VARIANCES(8, 4)
265VARIANCES(4, 8)
266VARIANCES(4, 4)
Jingning Hane2ffaf82016-12-14 15:26:30 -0800267VARIANCES(4, 2)
268VARIANCES(2, 4)
Jingning Han9e7c49f2016-12-06 11:20:10 -0800269VARIANCES(2, 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700270
Debargha Mukherjee16870852018-02-28 10:00:17 -0800271#if CONFIG_AV1
Rupert Swarbrick93c39e92017-07-12 11:11:02 +0100272VARIANCES(4, 16)
273VARIANCES(16, 4)
274VARIANCES(8, 32)
275VARIANCES(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +0100276VARIANCES(16, 64)
277VARIANCES(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +0100278VARIANCES(32, 128)
279VARIANCES(128, 32)
Debargha Mukherjee16870852018-02-28 10:00:17 -0800280#endif // CONFIG_AV1
Rupert Swarbrick93c39e92017-07-12 11:11:02 +0100281
Yaowu Xuc27fc142016-08-22 16:08:15 -0700282GET_VAR(16, 16)
283GET_VAR(8, 8)
284
285MSE(16, 16)
286MSE(16, 8)
287MSE(8, 16)
288MSE(8, 8)
289
Yaowu Xuf883b422016-08-30 14:01:10 -0700290void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700291 int height, const uint8_t *ref, int ref_stride) {
292 int i, j;
293
294 for (i = 0; i < height; ++i) {
295 for (j = 0; j < width; ++j) {
296 const int tmp = pred[j] + ref[j];
297 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
298 }
299 comp_pred += width;
300 pred += width;
301 ref += ref_stride;
302 }
303}
304
305// Get pred block from up-sampled reference.
Yaowu Xuf883b422016-08-30 14:01:10 -0700306void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700307 int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
308 int ref_stride) {
309 if (!subpel_x_q3 && !subpel_y_q3) {
310 int i;
311 for (i = 0; i < height; i++) {
312 memcpy(comp_pred, ref, width * sizeof(*comp_pred));
313 comp_pred += width;
314 ref += ref_stride;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700315 }
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700316 } else {
317 InterpFilterParams filter;
318 filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR);
319 if (!subpel_y_q3) {
320 const int16_t *kernel;
321 kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
322 /*Directly call C version to allow this to work for small (2x2) sizes.*/
323 aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL,
324 -1, width, height);
325 } else if (!subpel_x_q3) {
326 const int16_t *kernel;
327 kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
328 /*Directly call C version to allow this to work for small (2x2) sizes.*/
329 aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel,
330 16, width, height);
331 } else {
332 DECLARE_ALIGNED(16, uint8_t,
333 temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
334 const int16_t *kernel_x;
335 const int16_t *kernel_y;
336 int intermediate_height;
337 kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
338 kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
339 intermediate_height =
340 (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
341 assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
342 /*Directly call C versions to allow this to work for small (2x2) sizes.*/
343 aom_convolve8_horiz_c(ref - ref_stride * ((filter.taps >> 1) - 1),
344 ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL,
345 -1, width, intermediate_height);
346 aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
347 MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y,
348 16, width, height);
349 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700350 }
351}
352
Yaowu Xuf883b422016-08-30 14:01:10 -0700353void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700354 int width, int height, int subpel_x_q3,
355 int subpel_y_q3, const uint8_t *ref,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700356 int ref_stride) {
357 int i, j;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700358
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700359 aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
360 ref_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700361 for (i = 0; i < height; i++) {
362 for (j = 0; j < width; j++) {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700363 comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700364 }
365 comp_pred += width;
366 pred += width;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700367 }
368}
369
Cheng Chenf78632e2017-10-20 15:30:51 -0700370void aom_jnt_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
371 int height, const uint8_t *ref, int ref_stride,
372 const JNT_COMP_PARAMS *jcp_param) {
373 int i, j;
374 const int fwd_offset = jcp_param->fwd_offset;
375 const int bck_offset = jcp_param->bck_offset;
Cheng Chenf78632e2017-10-20 15:30:51 -0700376
377 for (i = 0; i < height; ++i) {
378 for (j = 0; j < width; ++j) {
379 int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
Cheng Chenef34fff2017-10-30 15:59:26 -0700380 tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
Cheng Chenf78632e2017-10-20 15:30:51 -0700381 comp_pred[j] = (uint8_t)tmp;
382 }
383 comp_pred += width;
384 pred += width;
385 ref += ref_stride;
386 }
387}
388
389void aom_jnt_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
390 int width, int height, int subpel_x_q3,
391 int subpel_y_q3, const uint8_t *ref,
392 int ref_stride,
393 const JNT_COMP_PARAMS *jcp_param) {
394 int i, j;
395 const int fwd_offset = jcp_param->fwd_offset;
396 const int bck_offset = jcp_param->bck_offset;
Cheng Chenf78632e2017-10-20 15:30:51 -0700397
398 aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
399 ref_stride);
400
401 for (i = 0; i < height; i++) {
402 for (j = 0; j < width; j++) {
403 int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
Cheng Chenef34fff2017-10-30 15:59:26 -0700404 tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
Cheng Chenf78632e2017-10-20 15:30:51 -0700405 comp_pred[j] = (uint8_t)tmp;
406 }
407 comp_pred += width;
408 pred += width;
409 }
410}
Cheng Chenf78632e2017-10-20 15:30:51 -0700411
Yaowu Xuc27fc142016-08-22 16:08:15 -0700412static void highbd_variance64(const uint8_t *a8, int a_stride,
413 const uint8_t *b8, int b_stride, int w, int h,
414 uint64_t *sse, int64_t *sum) {
415 int i, j;
416
417 uint16_t *a = CONVERT_TO_SHORTPTR(a8);
418 uint16_t *b = CONVERT_TO_SHORTPTR(b8);
419 *sum = 0;
420 *sse = 0;
421
422 for (i = 0; i < h; ++i) {
423 for (j = 0; j < w; ++j) {
424 const int diff = a[j] - b[j];
425 *sum += diff;
Cheng Chen28b33e02018-03-01 12:31:26 -0800426 *sse += (int64_t)diff * (int64_t)diff;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700427 }
428 a += a_stride;
429 b += b_stride;
430 }
431}
432
Alex Converse29608d82017-03-30 09:36:58 -0700433uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
434 const uint8_t *b, int b_stride, int w, int h) {
435 uint64_t sse;
436 int64_t sum;
437 highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
438 return sse;
439}
440
Yaowu Xuc27fc142016-08-22 16:08:15 -0700441static void highbd_8_variance(const uint8_t *a8, int a_stride,
442 const uint8_t *b8, int b_stride, int w, int h,
443 uint32_t *sse, int *sum) {
444 uint64_t sse_long = 0;
445 int64_t sum_long = 0;
446 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
447 *sse = (uint32_t)sse_long;
448 *sum = (int)sum_long;
449}
450
451static void highbd_10_variance(const uint8_t *a8, int a_stride,
452 const uint8_t *b8, int b_stride, int w, int h,
453 uint32_t *sse, int *sum) {
454 uint64_t sse_long = 0;
455 int64_t sum_long = 0;
456 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
457 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
458 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
459}
460
461static void highbd_12_variance(const uint8_t *a8, int a_stride,
462 const uint8_t *b8, int b_stride, int w, int h,
463 uint32_t *sse, int *sum) {
464 uint64_t sse_long = 0;
465 int64_t sum_long = 0;
466 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
467 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
468 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
469}
470
471#define HIGHBD_VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700472 uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700473 const uint8_t *b, int b_stride, \
474 uint32_t *sse) { \
475 int sum; \
476 highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
James Zern51e0c542017-04-04 20:37:17 -0700477 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700478 } \
479 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700480 uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700481 const uint8_t *b, int b_stride, \
482 uint32_t *sse) { \
483 int sum; \
484 int64_t var; \
485 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
486 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
487 return (var >= 0) ? (uint32_t)var : 0; \
488 } \
489 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700490 uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700491 const uint8_t *b, int b_stride, \
492 uint32_t *sse) { \
493 int sum; \
494 int64_t var; \
495 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
496 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
497 return (var >= 0) ? (uint32_t)var : 0; \
498 }
499
500#define HIGHBD_GET_VAR(S) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700501 void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700502 const uint8_t *ref, int ref_stride, \
503 uint32_t *sse, int *sum) { \
504 highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
505 } \
506 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700507 void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700508 const uint8_t *ref, int ref_stride, \
509 uint32_t *sse, int *sum) { \
510 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
511 } \
512 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700513 void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700514 const uint8_t *ref, int ref_stride, \
515 uint32_t *sse, int *sum) { \
516 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
517 }
518
519#define HIGHBD_MSE(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700520 uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700521 const uint8_t *ref, int ref_stride, \
522 uint32_t *sse) { \
523 int sum; \
524 highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
525 return *sse; \
526 } \
527 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700528 uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700529 const uint8_t *ref, int ref_stride, \
530 uint32_t *sse) { \
531 int sum; \
532 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
533 return *sse; \
534 } \
535 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700536 uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700537 const uint8_t *ref, int ref_stride, \
538 uint32_t *sse) { \
539 int sum; \
540 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
541 return *sse; \
542 }
543
Yaowu Xuf883b422016-08-30 14:01:10 -0700544void aom_highbd_var_filter_block2d_bil_first_pass(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700545 const uint8_t *src_ptr8, uint16_t *output_ptr,
546 unsigned int src_pixels_per_line, int pixel_step,
547 unsigned int output_height, unsigned int output_width,
548 const uint8_t *filter) {
549 unsigned int i, j;
550 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
551 for (i = 0; i < output_height; ++i) {
552 for (j = 0; j < output_width; ++j) {
553 output_ptr[j] = ROUND_POWER_OF_TWO(
554 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
555 FILTER_BITS);
556
557 ++src_ptr;
558 }
559
560 // Next row...
561 src_ptr += src_pixels_per_line - output_width;
562 output_ptr += output_width;
563 }
564}
565
Yaowu Xuf883b422016-08-30 14:01:10 -0700566void aom_highbd_var_filter_block2d_bil_second_pass(
Yaowu Xuc27fc142016-08-22 16:08:15 -0700567 const uint16_t *src_ptr, uint16_t *output_ptr,
568 unsigned int src_pixels_per_line, unsigned int pixel_step,
569 unsigned int output_height, unsigned int output_width,
570 const uint8_t *filter) {
571 unsigned int i, j;
572
573 for (i = 0; i < output_height; ++i) {
574 for (j = 0; j < output_width; ++j) {
575 output_ptr[j] = ROUND_POWER_OF_TWO(
576 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
577 FILTER_BITS);
578 ++src_ptr;
579 }
580
581 src_ptr += src_pixels_per_line - output_width;
582 output_ptr += output_width;
583 }
584}
585
586#define HIGHBD_SUBPIX_VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700587 uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700588 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
589 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
590 uint16_t fdata3[(H + 1) * W]; \
591 uint16_t temp2[H * W]; \
592 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700593 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700594 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -0700595 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700596 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
597 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700598 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700599 dst, dst_stride, sse); \
600 } \
601 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700602 uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700603 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
604 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
605 uint16_t fdata3[(H + 1) * W]; \
606 uint16_t temp2[H * W]; \
607 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700608 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700609 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -0700610 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700611 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
612 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700613 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700614 dst, dst_stride, sse); \
615 } \
616 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700617 uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700618 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
619 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
620 uint16_t fdata3[(H + 1) * W]; \
621 uint16_t temp2[H * W]; \
622 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700623 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700624 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -0700625 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700626 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
627 \
Yaowu Xuf883b422016-08-30 14:01:10 -0700628 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700629 dst, dst_stride, sse); \
630 }
631
Cheng Chenbf3d4962017-11-01 14:48:52 -0700632#define HIGHBD_SUBPIX_AVG_VAR(W, H) \
633 uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
634 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
635 const uint8_t *dst, int dst_stride, uint32_t *sse, \
636 const uint8_t *second_pred) { \
637 uint16_t fdata3[(H + 1) * W]; \
638 uint16_t temp2[H * W]; \
639 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
640 \
641 aom_highbd_var_filter_block2d_bil_first_pass( \
642 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
643 aom_highbd_var_filter_block2d_bil_second_pass( \
644 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
645 \
646 aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
647 CONVERT_TO_BYTEPTR(temp2), W); \
648 \
649 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
650 dst, dst_stride, sse); \
651 } \
652 \
653 uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
654 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
655 const uint8_t *dst, int dst_stride, uint32_t *sse, \
656 const uint8_t *second_pred) { \
657 uint16_t fdata3[(H + 1) * W]; \
658 uint16_t temp2[H * W]; \
659 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
660 \
661 aom_highbd_var_filter_block2d_bil_first_pass( \
662 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
663 aom_highbd_var_filter_block2d_bil_second_pass( \
664 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
665 \
666 aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
667 CONVERT_TO_BYTEPTR(temp2), W); \
668 \
669 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
670 dst, dst_stride, sse); \
671 } \
672 \
673 uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
674 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
675 const uint8_t *dst, int dst_stride, uint32_t *sse, \
676 const uint8_t *second_pred) { \
677 uint16_t fdata3[(H + 1) * W]; \
678 uint16_t temp2[H * W]; \
679 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
680 \
681 aom_highbd_var_filter_block2d_bil_first_pass( \
682 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
683 aom_highbd_var_filter_block2d_bil_second_pass( \
684 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
685 \
686 aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
687 CONVERT_TO_BYTEPTR(temp2), W); \
688 \
689 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
690 dst, dst_stride, sse); \
691 } \
692 \
693 uint32_t aom_highbd_8_jnt_sub_pixel_avg_variance##W##x##H##_c( \
694 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
695 const uint8_t *dst, int dst_stride, uint32_t *sse, \
696 const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
697 uint16_t fdata3[(H + 1) * W]; \
698 uint16_t temp2[H * W]; \
699 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
700 \
701 aom_highbd_var_filter_block2d_bil_first_pass( \
702 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
703 aom_highbd_var_filter_block2d_bil_second_pass( \
704 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
705 \
706 aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \
707 CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
708 \
709 return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
710 dst_stride, sse); \
711 } \
712 \
713 uint32_t aom_highbd_10_jnt_sub_pixel_avg_variance##W##x##H##_c( \
714 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
715 const uint8_t *dst, int dst_stride, uint32_t *sse, \
716 const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
717 uint16_t fdata3[(H + 1) * W]; \
718 uint16_t temp2[H * W]; \
719 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
720 \
721 aom_highbd_var_filter_block2d_bil_first_pass( \
722 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
723 aom_highbd_var_filter_block2d_bil_second_pass( \
724 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
725 \
726 aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \
727 CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
728 \
729 return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
730 dst_stride, sse); \
731 } \
732 \
733 uint32_t aom_highbd_12_jnt_sub_pixel_avg_variance##W##x##H##_c( \
734 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
735 const uint8_t *dst, int dst_stride, uint32_t *sse, \
736 const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param) { \
737 uint16_t fdata3[(H + 1) * W]; \
738 uint16_t temp2[H * W]; \
739 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
740 \
741 aom_highbd_var_filter_block2d_bil_first_pass( \
742 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
743 aom_highbd_var_filter_block2d_bil_second_pass( \
744 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
745 \
746 aom_highbd_jnt_comp_avg_pred(temp3, second_pred, W, H, \
747 CONVERT_TO_BYTEPTR(temp2), W, jcp_param); \
748 \
749 return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
750 dst_stride, sse); \
751 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700752
753/* All three forms of the variance are available in the same sizes. */
754#define HIGHBD_VARIANCES(W, H) \
755 HIGHBD_VAR(W, H) \
756 HIGHBD_SUBPIX_VAR(W, H) \
757 HIGHBD_SUBPIX_AVG_VAR(W, H)
758
Debargha Mukherjee2ccf4b92018-02-27 17:30:46 -0800759#if CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -0700760HIGHBD_VARIANCES(128, 128)
761HIGHBD_VARIANCES(128, 64)
762HIGHBD_VARIANCES(64, 128)
Debargha Mukherjee2ccf4b92018-02-27 17:30:46 -0800763#endif // CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -0700764HIGHBD_VARIANCES(64, 64)
765HIGHBD_VARIANCES(64, 32)
766HIGHBD_VARIANCES(32, 64)
767HIGHBD_VARIANCES(32, 32)
768HIGHBD_VARIANCES(32, 16)
769HIGHBD_VARIANCES(16, 32)
770HIGHBD_VARIANCES(16, 16)
771HIGHBD_VARIANCES(16, 8)
772HIGHBD_VARIANCES(8, 16)
773HIGHBD_VARIANCES(8, 8)
774HIGHBD_VARIANCES(8, 4)
775HIGHBD_VARIANCES(4, 8)
776HIGHBD_VARIANCES(4, 4)
Jingning Hancc5bdf42016-12-19 11:14:30 -0800777HIGHBD_VARIANCES(4, 2)
778HIGHBD_VARIANCES(2, 4)
779HIGHBD_VARIANCES(2, 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700780
Debargha Mukherjee16870852018-02-28 10:00:17 -0800781#if CONFIG_AV1
Rupert Swarbrick93c39e92017-07-12 11:11:02 +0100782HIGHBD_VARIANCES(4, 16)
783HIGHBD_VARIANCES(16, 4)
784HIGHBD_VARIANCES(8, 32)
785HIGHBD_VARIANCES(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +0100786HIGHBD_VARIANCES(16, 64)
787HIGHBD_VARIANCES(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +0100788HIGHBD_VARIANCES(32, 128)
789HIGHBD_VARIANCES(128, 32)
Debargha Mukherjee16870852018-02-28 10:00:17 -0800790#endif // CONFIG_AV1
Rupert Swarbrick93c39e92017-07-12 11:11:02 +0100791
Yaowu Xuc27fc142016-08-22 16:08:15 -0700792HIGHBD_GET_VAR(8)
793HIGHBD_GET_VAR(16)
794
795HIGHBD_MSE(16, 16)
796HIGHBD_MSE(16, 8)
797HIGHBD_MSE(8, 16)
798HIGHBD_MSE(8, 8)
799
Yaowu Xuf883b422016-08-30 14:01:10 -0700800void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700801 int width, int height, const uint8_t *ref8,
802 int ref_stride) {
803 int i, j;
804 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
805 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
806 for (i = 0; i < height; ++i) {
807 for (j = 0; j < width; ++j) {
808 const int tmp = pred[j] + ref[j];
809 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
810 }
811 comp_pred += width;
812 pred += width;
813 ref += ref_stride;
814 }
815}
816
Yaowu Xuf883b422016-08-30 14:01:10 -0700817void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700818 int subpel_x_q3, int subpel_y_q3,
819 const uint8_t *ref8, int ref_stride, int bd) {
820 if (!subpel_x_q3 && !subpel_y_q3) {
821 const uint16_t *ref;
822 int i;
823 ref = CONVERT_TO_SHORTPTR(ref8);
824 for (i = 0; i < height; i++) {
825 memcpy(comp_pred, ref, width * sizeof(*comp_pred));
826 comp_pred += width;
827 ref += ref_stride;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700828 }
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700829 } else {
830 InterpFilterParams filter;
831 filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR);
832 if (!subpel_y_q3) {
833 const int16_t *kernel;
834 kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
835 /*Directly call C version to allow this to work for small (2x2) sizes.*/
836 aom_highbd_convolve8_horiz_c(ref8, ref_stride,
837 CONVERT_TO_BYTEPTR(comp_pred), width, kernel,
838 16, NULL, -1, width, height, bd);
839 } else if (!subpel_x_q3) {
840 const int16_t *kernel;
841 kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
842 /*Directly call C version to allow this to work for small (2x2) sizes.*/
843 aom_highbd_convolve8_vert_c(ref8, ref_stride,
844 CONVERT_TO_BYTEPTR(comp_pred), width, NULL,
845 -1, kernel, 16, width, height, bd);
846 } else {
Thomas Daede49bb8f82017-07-13 17:39:31 -0700847 DECLARE_ALIGNED(16, uint16_t,
848 temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700849 const int16_t *kernel_x;
850 const int16_t *kernel_y;
851 int intermediate_height;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700852 kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
853 kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
854 intermediate_height =
855 (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
856 assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
857 /*Directly call C versions to allow this to work for small (2x2) sizes.*/
Thomas Daede49bb8f82017-07-13 17:39:31 -0700858 aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter.taps >> 1) - 1),
859 ref_stride, CONVERT_TO_BYTEPTR(temp),
860 MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
861 intermediate_height, bd);
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700862 aom_highbd_convolve8_vert_c(
863 CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
864 MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
865 16, width, height, bd);
866 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700867 }
868}
869
Yaowu Xuf883b422016-08-30 14:01:10 -0700870void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871 const uint8_t *pred8, int width,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700872 int height, int subpel_x_q3,
873 int subpel_y_q3, const uint8_t *ref8,
874 int ref_stride, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700875 int i, j;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700876
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700877 const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
878 aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
879 ref8, ref_stride, bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700880 for (i = 0; i < height; ++i) {
881 for (j = 0; j < width; ++j) {
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -0700882 comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700883 }
884 comp_pred += width;
885 pred += width;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700886 }
887}
Cheng Chenbf3d4962017-11-01 14:48:52 -0700888
Cheng Chenbf3d4962017-11-01 14:48:52 -0700889void aom_highbd_jnt_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
890 int width, int height, const uint8_t *ref8,
891 int ref_stride,
892 const JNT_COMP_PARAMS *jcp_param) {
893 int i, j;
894 const int fwd_offset = jcp_param->fwd_offset;
895 const int bck_offset = jcp_param->bck_offset;
896 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
897 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
898
899 for (i = 0; i < height; ++i) {
900 for (j = 0; j < width; ++j) {
901 int tmp = pred[j] * bck_offset + ref[j] * fwd_offset;
902 tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
903 comp_pred[j] = (uint16_t)tmp;
904 }
905 comp_pred += width;
906 pred += width;
907 ref += ref_stride;
908 }
909}
910
911void aom_highbd_jnt_comp_avg_upsampled_pred_c(
912 uint16_t *comp_pred, const uint8_t *pred8, int width, int height,
913 int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
914 int bd, const JNT_COMP_PARAMS *jcp_param) {
915 int i, j;
916 const int fwd_offset = jcp_param->fwd_offset;
917 const int bck_offset = jcp_param->bck_offset;
918 const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
919
920 aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
921 ref8, ref_stride, bd);
922
923 for (i = 0; i < height; i++) {
924 for (j = 0; j < width; j++) {
925 int tmp = pred[j] * bck_offset + comp_pred[j] * fwd_offset;
926 tmp = ROUND_POWER_OF_TWO(tmp, DIST_PRECISION_BITS);
927 comp_pred[j] = (uint16_t)tmp;
928 }
929 comp_pred += width;
930 pred += width;
931 }
932}
Yaowu Xuc27fc142016-08-22 16:08:15 -0700933
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +0200934#if CONFIG_AV1
David Barkerc155e012017-05-11 13:54:54 +0100935void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
936 int height, const uint8_t *ref, int ref_stride,
937 const uint8_t *mask, int mask_stride,
938 int invert_mask) {
939 int i, j;
Peng Bin6a705982018-01-31 19:09:15 +0800940 const uint8_t *src0 = invert_mask ? pred : ref;
941 const uint8_t *src1 = invert_mask ? ref : pred;
942 const int stride0 = invert_mask ? width : ref_stride;
943 const int stride1 = invert_mask ? ref_stride : width;
David Barkerc155e012017-05-11 13:54:54 +0100944 for (i = 0; i < height; ++i) {
945 for (j = 0; j < width; ++j) {
Peng Bin6a705982018-01-31 19:09:15 +0800946 comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]);
David Barkerc155e012017-05-11 13:54:54 +0100947 }
948 comp_pred += width;
Peng Bin6a705982018-01-31 19:09:15 +0800949 src0 += stride0;
950 src1 += stride1;
David Barkerc155e012017-05-11 13:54:54 +0100951 mask += mask_stride;
952 }
953}
954
Peng Binf8daa922018-01-31 16:22:27 +0800955void aom_comp_mask_upsampled_pred(uint8_t *comp_pred, const uint8_t *pred,
956 int width, int height, int subpel_x_q3,
957 int subpel_y_q3, const uint8_t *ref,
958 int ref_stride, const uint8_t *mask,
959 int mask_stride, int invert_mask) {
960 if (subpel_x_q3 | subpel_y_q3) {
961 aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
962 ref_stride);
963 ref = comp_pred;
964 ref_stride = width;
David Barkerc155e012017-05-11 13:54:54 +0100965 }
Peng Binf8daa922018-01-31 16:22:27 +0800966 aom_comp_mask_pred(comp_pred, pred, width, height, ref, ref_stride, mask,
967 mask_stride, invert_mask);
David Barkerc155e012017-05-11 13:54:54 +0100968}
969
Cheng Chend2864432017-11-17 17:59:24 -0800970#define MASK_SUBPIX_VAR(W, H) \
971 unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \
972 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
973 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
974 const uint8_t *msk, int msk_stride, int invert_mask, \
975 unsigned int *sse) { \
976 uint16_t fdata3[(H + 1) * W]; \
977 uint8_t temp2[H * W]; \
978 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
979 \
980 aom_var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, \
981 W, bilinear_filters_2t[xoffset]); \
982 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
983 bilinear_filters_2t[yoffset]); \
984 \
985 aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
986 invert_mask); \
987 return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700988 }
989
Yaowu Xuc27fc142016-08-22 16:08:15 -0700990MASK_SUBPIX_VAR(4, 4)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700991MASK_SUBPIX_VAR(4, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700992MASK_SUBPIX_VAR(8, 4)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700993MASK_SUBPIX_VAR(8, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700994MASK_SUBPIX_VAR(8, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700995MASK_SUBPIX_VAR(16, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700996MASK_SUBPIX_VAR(16, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700997MASK_SUBPIX_VAR(16, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700998MASK_SUBPIX_VAR(32, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700999MASK_SUBPIX_VAR(32, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001000MASK_SUBPIX_VAR(32, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001001MASK_SUBPIX_VAR(64, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001002MASK_SUBPIX_VAR(64, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001003MASK_SUBPIX_VAR(64, 128)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001004MASK_SUBPIX_VAR(128, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001005MASK_SUBPIX_VAR(128, 128)
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001006MASK_SUBPIX_VAR(4, 16)
1007MASK_SUBPIX_VAR(16, 4)
1008MASK_SUBPIX_VAR(8, 32)
1009MASK_SUBPIX_VAR(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +01001010MASK_SUBPIX_VAR(16, 64)
1011MASK_SUBPIX_VAR(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +01001012MASK_SUBPIX_VAR(32, 128)
1013MASK_SUBPIX_VAR(128, 32)
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001014
David Barkerc155e012017-05-11 13:54:54 +01001015void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
1016 int width, int height, const uint8_t *ref8,
1017 int ref_stride, const uint8_t *mask,
1018 int mask_stride, int invert_mask) {
1019 int i, j;
1020 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
1021 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1022 for (i = 0; i < height; ++i) {
1023 for (j = 0; j < width; ++j) {
1024 if (!invert_mask)
1025 comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
1026 else
1027 comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
1028 }
1029 comp_pred += width;
1030 pred += width;
1031 ref += ref_stride;
1032 mask += mask_stride;
1033 }
1034}
1035
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07001036void aom_highbd_comp_mask_upsampled_pred_c(
1037 uint16_t *comp_pred, const uint8_t *pred8, int width, int height,
1038 int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
1039 const uint8_t *mask, int mask_stride, int invert_mask, int bd) {
David Barkerc155e012017-05-11 13:54:54 +01001040 int i, j;
David Barkerc155e012017-05-11 13:54:54 +01001041
1042 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07001043 aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
1044 ref8, ref_stride, bd);
David Barkerc155e012017-05-11 13:54:54 +01001045 for (i = 0; i < height; ++i) {
1046 for (j = 0; j < width; ++j) {
1047 if (!invert_mask)
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07001048 comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]);
David Barkerc155e012017-05-11 13:54:54 +01001049 else
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07001050 comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]);
David Barkerc155e012017-05-11 13:54:54 +01001051 }
1052 comp_pred += width;
1053 pred += width;
David Barkerc155e012017-05-11 13:54:54 +01001054 mask += mask_stride;
1055 }
1056}
1057
David Barkerc155e012017-05-11 13:54:54 +01001058#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
David Barkerf19f35f2017-05-22 16:33:22 +01001059 unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \
David Barkerc155e012017-05-11 13:54:54 +01001060 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
1061 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
1062 const uint8_t *msk, int msk_stride, int invert_mask, \
1063 unsigned int *sse) { \
1064 uint16_t fdata3[(H + 1) * W]; \
1065 uint16_t temp2[H * W]; \
1066 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
1067 \
1068 aom_highbd_var_filter_block2d_bil_first_pass( \
1069 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1070 aom_highbd_var_filter_block2d_bil_second_pass( \
1071 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1072 \
1073 aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
1074 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1075 invert_mask); \
1076 \
1077 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
1078 ref, ref_stride, sse); \
1079 } \
1080 \
David Barkerf19f35f2017-05-22 16:33:22 +01001081 unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
David Barkerc155e012017-05-11 13:54:54 +01001082 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
1083 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
1084 const uint8_t *msk, int msk_stride, int invert_mask, \
1085 unsigned int *sse) { \
1086 uint16_t fdata3[(H + 1) * W]; \
1087 uint16_t temp2[H * W]; \
1088 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
1089 \
1090 aom_highbd_var_filter_block2d_bil_first_pass( \
1091 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1092 aom_highbd_var_filter_block2d_bil_second_pass( \
1093 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1094 \
1095 aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
1096 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1097 invert_mask); \
1098 \
1099 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
1100 ref, ref_stride, sse); \
1101 } \
1102 \
David Barkerf19f35f2017-05-22 16:33:22 +01001103 unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
David Barkerc155e012017-05-11 13:54:54 +01001104 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
1105 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
1106 const uint8_t *msk, int msk_stride, int invert_mask, \
1107 unsigned int *sse) { \
1108 uint16_t fdata3[(H + 1) * W]; \
1109 uint16_t temp2[H * W]; \
1110 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
1111 \
1112 aom_highbd_var_filter_block2d_bil_first_pass( \
1113 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1114 aom_highbd_var_filter_block2d_bil_second_pass( \
1115 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1116 \
1117 aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
1118 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
1119 invert_mask); \
1120 \
1121 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
1122 ref, ref_stride, sse); \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001123 }
1124
Yaowu Xuc27fc142016-08-22 16:08:15 -07001125HIGHBD_MASK_SUBPIX_VAR(4, 4)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001126HIGHBD_MASK_SUBPIX_VAR(4, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001127HIGHBD_MASK_SUBPIX_VAR(8, 4)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001128HIGHBD_MASK_SUBPIX_VAR(8, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001129HIGHBD_MASK_SUBPIX_VAR(8, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001130HIGHBD_MASK_SUBPIX_VAR(16, 8)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001131HIGHBD_MASK_SUBPIX_VAR(16, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001132HIGHBD_MASK_SUBPIX_VAR(16, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001133HIGHBD_MASK_SUBPIX_VAR(32, 16)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001134HIGHBD_MASK_SUBPIX_VAR(32, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001135HIGHBD_MASK_SUBPIX_VAR(32, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001136HIGHBD_MASK_SUBPIX_VAR(64, 32)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001137HIGHBD_MASK_SUBPIX_VAR(64, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001138HIGHBD_MASK_SUBPIX_VAR(64, 128)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001139HIGHBD_MASK_SUBPIX_VAR(128, 64)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001140HIGHBD_MASK_SUBPIX_VAR(128, 128)
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001141HIGHBD_MASK_SUBPIX_VAR(4, 16)
1142HIGHBD_MASK_SUBPIX_VAR(16, 4)
1143HIGHBD_MASK_SUBPIX_VAR(8, 32)
1144HIGHBD_MASK_SUBPIX_VAR(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +01001145HIGHBD_MASK_SUBPIX_VAR(16, 64)
1146HIGHBD_MASK_SUBPIX_VAR(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +01001147HIGHBD_MASK_SUBPIX_VAR(32, 128)
1148HIGHBD_MASK_SUBPIX_VAR(128, 32)
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02001149#endif // CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001150
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01001151#if CONFIG_AV1
Yaowu Xuc27fc142016-08-22 16:08:15 -07001152static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
1153 const int32_t *wsrc, const int32_t *mask,
1154 int w, int h, unsigned int *sse, int *sum) {
1155 int i, j;
1156
1157 *sse = 0;
1158 *sum = 0;
1159
1160 for (i = 0; i < h; i++) {
1161 for (j = 0; j < w; j++) {
1162 int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1163 *sum += diff;
1164 *sse += diff * diff;
1165 }
1166
1167 pre += pre_stride;
1168 wsrc += w;
1169 mask += w;
1170 }
1171}
1172
Yue Chendead17d2017-04-20 16:19:39 -07001173#define OBMC_VAR(W, H) \
1174 unsigned int aom_obmc_variance##W##x##H##_c( \
1175 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1176 const int32_t *mask, unsigned int *sse) { \
1177 int sum; \
1178 obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1179 return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001180 }
1181
Cheng Chend2864432017-11-17 17:59:24 -08001182#define OBMC_SUBPIX_VAR(W, H) \
1183 unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \
1184 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1185 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1186 uint16_t fdata3[(H + 1) * W]; \
1187 uint8_t temp2[H * W]; \
1188 \
1189 aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
1190 W, bilinear_filters_2t[xoffset]); \
1191 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
1192 bilinear_filters_2t[yoffset]); \
1193 \
1194 return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001195 }
1196
1197OBMC_VAR(4, 4)
1198OBMC_SUBPIX_VAR(4, 4)
1199
1200OBMC_VAR(4, 8)
1201OBMC_SUBPIX_VAR(4, 8)
1202
1203OBMC_VAR(8, 4)
1204OBMC_SUBPIX_VAR(8, 4)
1205
1206OBMC_VAR(8, 8)
1207OBMC_SUBPIX_VAR(8, 8)
1208
1209OBMC_VAR(8, 16)
1210OBMC_SUBPIX_VAR(8, 16)
1211
1212OBMC_VAR(16, 8)
1213OBMC_SUBPIX_VAR(16, 8)
1214
1215OBMC_VAR(16, 16)
1216OBMC_SUBPIX_VAR(16, 16)
1217
1218OBMC_VAR(16, 32)
1219OBMC_SUBPIX_VAR(16, 32)
1220
1221OBMC_VAR(32, 16)
1222OBMC_SUBPIX_VAR(32, 16)
1223
1224OBMC_VAR(32, 32)
1225OBMC_SUBPIX_VAR(32, 32)
1226
1227OBMC_VAR(32, 64)
1228OBMC_SUBPIX_VAR(32, 64)
1229
1230OBMC_VAR(64, 32)
1231OBMC_SUBPIX_VAR(64, 32)
1232
1233OBMC_VAR(64, 64)
1234OBMC_SUBPIX_VAR(64, 64)
1235
Yaowu Xuc27fc142016-08-22 16:08:15 -07001236OBMC_VAR(64, 128)
1237OBMC_SUBPIX_VAR(64, 128)
1238
1239OBMC_VAR(128, 64)
1240OBMC_SUBPIX_VAR(128, 64)
1241
1242OBMC_VAR(128, 128)
1243OBMC_SUBPIX_VAR(128, 128)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001244
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001245OBMC_VAR(4, 16)
1246OBMC_SUBPIX_VAR(4, 16)
1247OBMC_VAR(16, 4)
1248OBMC_SUBPIX_VAR(16, 4)
1249OBMC_VAR(8, 32)
1250OBMC_SUBPIX_VAR(8, 32)
1251OBMC_VAR(32, 8)
1252OBMC_SUBPIX_VAR(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +01001253OBMC_VAR(16, 64)
1254OBMC_SUBPIX_VAR(16, 64)
1255OBMC_VAR(64, 16)
1256OBMC_SUBPIX_VAR(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +01001257OBMC_VAR(32, 128)
1258OBMC_SUBPIX_VAR(32, 128)
1259OBMC_VAR(128, 32)
1260OBMC_SUBPIX_VAR(128, 32)
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001261
Yaowu Xuc27fc142016-08-22 16:08:15 -07001262static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
1263 const int32_t *wsrc,
1264 const int32_t *mask, int w, int h,
1265 uint64_t *sse, int64_t *sum) {
1266 int i, j;
1267 uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1268
1269 *sse = 0;
1270 *sum = 0;
1271
1272 for (i = 0; i < h; i++) {
1273 for (j = 0; j < w; j++) {
1274 int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1275 *sum += diff;
1276 *sse += diff * diff;
1277 }
1278
1279 pre += pre_stride;
1280 wsrc += w;
1281 mask += w;
1282 }
1283}
1284
1285static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
1286 const int32_t *wsrc,
1287 const int32_t *mask, int w, int h,
1288 unsigned int *sse, int *sum) {
1289 int64_t sum64;
1290 uint64_t sse64;
1291 highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1292 *sum = (int)sum64;
1293 *sse = (unsigned int)sse64;
1294}
1295
1296static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1297 const int32_t *wsrc,
1298 const int32_t *mask, int w, int h,
1299 unsigned int *sse, int *sum) {
1300 int64_t sum64;
1301 uint64_t sse64;
1302 highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1303 *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
1304 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
1305}
1306
1307static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1308 const int32_t *wsrc,
1309 const int32_t *mask, int w, int h,
1310 unsigned int *sse, int *sum) {
1311 int64_t sum64;
1312 uint64_t sse64;
1313 highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1314 *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
1315 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
1316}
1317
1318#define HIGHBD_OBMC_VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -07001319 unsigned int aom_highbd_obmc_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001320 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1321 const int32_t *mask, unsigned int *sse) { \
1322 int sum; \
1323 highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
James Zernb7e7c042017-04-28 15:40:27 -07001324 return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001325 } \
1326 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001327 unsigned int aom_highbd_10_obmc_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001328 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1329 const int32_t *mask, unsigned int *sse) { \
1330 int sum; \
James Zernb7e7c042017-04-28 15:40:27 -07001331 int64_t var; \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001332 highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
James Zernb7e7c042017-04-28 15:40:27 -07001333 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1334 return (var >= 0) ? (uint32_t)var : 0; \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001335 } \
1336 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001337 unsigned int aom_highbd_12_obmc_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001338 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1339 const int32_t *mask, unsigned int *sse) { \
1340 int sum; \
James Zernb7e7c042017-04-28 15:40:27 -07001341 int64_t var; \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001342 highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
James Zernb7e7c042017-04-28 15:40:27 -07001343 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1344 return (var >= 0) ? (uint32_t)var : 0; \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001345 }
1346
1347#define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
Yaowu Xuf883b422016-08-30 14:01:10 -07001348 unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001349 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1350 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1351 uint16_t fdata3[(H + 1) * W]; \
1352 uint16_t temp2[H * W]; \
1353 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001354 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001355 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -07001356 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001357 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1358 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001359 return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 wsrc, mask, sse); \
1361 } \
1362 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001363 unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001364 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1365 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1366 uint16_t fdata3[(H + 1) * W]; \
1367 uint16_t temp2[H * W]; \
1368 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001369 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001370 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -07001371 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001372 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1373 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001374 return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001375 W, wsrc, mask, sse); \
1376 } \
1377 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001378 unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001379 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1380 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1381 uint16_t fdata3[(H + 1) * W]; \
1382 uint16_t temp2[H * W]; \
1383 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001384 aom_highbd_var_filter_block2d_bil_first_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001385 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xuf883b422016-08-30 14:01:10 -07001386 aom_highbd_var_filter_block2d_bil_second_pass( \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001387 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1388 \
Yaowu Xuf883b422016-08-30 14:01:10 -07001389 return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
Yaowu Xuc27fc142016-08-22 16:08:15 -07001390 W, wsrc, mask, sse); \
1391 }
1392
1393HIGHBD_OBMC_VAR(4, 4)
1394HIGHBD_OBMC_SUBPIX_VAR(4, 4)
1395
1396HIGHBD_OBMC_VAR(4, 8)
1397HIGHBD_OBMC_SUBPIX_VAR(4, 8)
1398
1399HIGHBD_OBMC_VAR(8, 4)
1400HIGHBD_OBMC_SUBPIX_VAR(8, 4)
1401
1402HIGHBD_OBMC_VAR(8, 8)
1403HIGHBD_OBMC_SUBPIX_VAR(8, 8)
1404
1405HIGHBD_OBMC_VAR(8, 16)
1406HIGHBD_OBMC_SUBPIX_VAR(8, 16)
1407
1408HIGHBD_OBMC_VAR(16, 8)
1409HIGHBD_OBMC_SUBPIX_VAR(16, 8)
1410
1411HIGHBD_OBMC_VAR(16, 16)
1412HIGHBD_OBMC_SUBPIX_VAR(16, 16)
1413
1414HIGHBD_OBMC_VAR(16, 32)
1415HIGHBD_OBMC_SUBPIX_VAR(16, 32)
1416
1417HIGHBD_OBMC_VAR(32, 16)
1418HIGHBD_OBMC_SUBPIX_VAR(32, 16)
1419
1420HIGHBD_OBMC_VAR(32, 32)
1421HIGHBD_OBMC_SUBPIX_VAR(32, 32)
1422
1423HIGHBD_OBMC_VAR(32, 64)
1424HIGHBD_OBMC_SUBPIX_VAR(32, 64)
1425
1426HIGHBD_OBMC_VAR(64, 32)
1427HIGHBD_OBMC_SUBPIX_VAR(64, 32)
1428
1429HIGHBD_OBMC_VAR(64, 64)
1430HIGHBD_OBMC_SUBPIX_VAR(64, 64)
1431
Yaowu Xuc27fc142016-08-22 16:08:15 -07001432HIGHBD_OBMC_VAR(64, 128)
1433HIGHBD_OBMC_SUBPIX_VAR(64, 128)
1434
1435HIGHBD_OBMC_VAR(128, 64)
1436HIGHBD_OBMC_SUBPIX_VAR(128, 64)
1437
1438HIGHBD_OBMC_VAR(128, 128)
1439HIGHBD_OBMC_SUBPIX_VAR(128, 128)
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001440
Rupert Swarbrick93c39e92017-07-12 11:11:02 +01001441HIGHBD_OBMC_VAR(4, 16)
1442HIGHBD_OBMC_SUBPIX_VAR(4, 16)
1443HIGHBD_OBMC_VAR(16, 4)
1444HIGHBD_OBMC_SUBPIX_VAR(16, 4)
1445HIGHBD_OBMC_VAR(8, 32)
1446HIGHBD_OBMC_SUBPIX_VAR(8, 32)
1447HIGHBD_OBMC_VAR(32, 8)
1448HIGHBD_OBMC_SUBPIX_VAR(32, 8)
Rupert Swarbrick72678572017-08-02 12:05:26 +01001449HIGHBD_OBMC_VAR(16, 64)
1450HIGHBD_OBMC_SUBPIX_VAR(16, 64)
1451HIGHBD_OBMC_VAR(64, 16)
1452HIGHBD_OBMC_SUBPIX_VAR(64, 16)
Rupert Swarbrick2fa6e1c2017-09-11 12:38:10 +01001453HIGHBD_OBMC_VAR(32, 128)
1454HIGHBD_OBMC_SUBPIX_VAR(32, 128)
1455HIGHBD_OBMC_VAR(128, 32)
1456HIGHBD_OBMC_SUBPIX_VAR(128, 32)
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01001457#endif // CONFIG_AV1