blob: 2172b6cdef47408dc8d07878231ada9556334131 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu9c01aa12016-09-01 14:32:49 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu9c01aa12016-09-01 14:32:49 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <stdlib.h>
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./aom_config.h"
15#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070016
Yaowu Xuf883b422016-08-30 14:01:10 -070017#include "aom/aom_integer.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "aom_ports/mem.h"
19
20/* Sum the difference between every corresponding element of the buffers. */
21static INLINE unsigned int sad(const uint8_t *a, int a_stride, const uint8_t *b,
22 int b_stride, int width, int height) {
23 int y, x;
24 unsigned int sad = 0;
25
26 for (y = 0; y < height; y++) {
27 for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
28
29 a += a_stride;
30 b += b_stride;
31 }
32 return sad;
33}
34
35#define sadMxN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -070036 unsigned int aom_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -070037 const uint8_t *ref, int ref_stride) { \
38 return sad(src, src_stride, ref, ref_stride, m, n); \
39 } \
Yaowu Xuf883b422016-08-30 14:01:10 -070040 unsigned int aom_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -070041 const uint8_t *ref, int ref_stride, \
42 const uint8_t *second_pred) { \
43 uint8_t comp_pred[m * n]; \
Yaowu Xuf883b422016-08-30 14:01:10 -070044 aom_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \
Yaowu Xuc27fc142016-08-22 16:08:15 -070045 return sad(src, src_stride, comp_pred, m, m, n); \
46 }
47
48// depending on call sites, pass **ref_array to avoid & in subsequent call and
49// de-dup with 4D below.
50#define sadMxNxK(m, n, k) \
Yaowu Xuf883b422016-08-30 14:01:10 -070051 void aom_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -070052 const uint8_t *ref_array, int ref_stride, \
53 uint32_t *sad_array) { \
54 int i; \
55 for (i = 0; i < k; ++i) \
56 sad_array[i] = \
Yaowu Xuf883b422016-08-30 14:01:10 -070057 aom_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \
Yaowu Xuc27fc142016-08-22 16:08:15 -070058 }
59
60// This appears to be equivalent to the above when k == 4 and refs is const
61#define sadMxNx4D(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -070062 void aom_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -070063 const uint8_t *const ref_array[], \
64 int ref_stride, uint32_t *sad_array) { \
65 int i; \
66 for (i = 0; i < 4; ++i) \
67 sad_array[i] = \
Yaowu Xuf883b422016-08-30 14:01:10 -070068 aom_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
Yaowu Xuc27fc142016-08-22 16:08:15 -070069 }
70
71/* clang-format off */
Yaowu Xuf883b422016-08-30 14:01:10 -070072#if CONFIG_AV1 && CONFIG_EXT_PARTITION
Yaowu Xuc27fc142016-08-22 16:08:15 -070073// 128x128
74sadMxN(128, 128)
75sadMxNxK(128, 128, 3)
76sadMxNxK(128, 128, 8)
77sadMxNx4D(128, 128)
78
79// 128x64
80sadMxN(128, 64)
81sadMxNx4D(128, 64)
82
83// 64x128
84sadMxN(64, 128)
85sadMxNx4D(64, 128)
Yaowu Xuf883b422016-08-30 14:01:10 -070086#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
Yaowu Xuc27fc142016-08-22 16:08:15 -070087
88// 64x64
89sadMxN(64, 64)
90sadMxNxK(64, 64, 3)
91sadMxNxK(64, 64, 8)
92sadMxNx4D(64, 64)
93
94// 64x32
95sadMxN(64, 32)
96sadMxNx4D(64, 32)
97
98// 32x64
99sadMxN(32, 64)
100sadMxNx4D(32, 64)
101
102// 32x32
103sadMxN(32, 32)
104sadMxNxK(32, 32, 3)
105sadMxNxK(32, 32, 8)
106sadMxNx4D(32, 32)
107
108// 32x16
109sadMxN(32, 16)
110sadMxNx4D(32, 16)
111
112// 16x32
113sadMxN(16, 32)
114sadMxNx4D(16, 32)
115
116// 16x16
117sadMxN(16, 16)
118sadMxNxK(16, 16, 3)
119sadMxNxK(16, 16, 8)
120sadMxNx4D(16, 16)
121
122// 16x8
123sadMxN(16, 8)
124sadMxNxK(16, 8, 3)
125sadMxNxK(16, 8, 8)
126sadMxNx4D(16, 8)
127
128// 8x16
129sadMxN(8, 16)
130sadMxNxK(8, 16, 3)
131sadMxNxK(8, 16, 8)
132sadMxNx4D(8, 16)
133
134// 8x8
135sadMxN(8, 8)
136sadMxNxK(8, 8, 3)
137sadMxNxK(8, 8, 8)
138sadMxNx4D(8, 8)
139
140// 8x4
141sadMxN(8, 4)
142sadMxNxK(8, 4, 8)
143sadMxNx4D(8, 4)
144
145// 4x8
146sadMxN(4, 8)
147sadMxNxK(4, 8, 8)
148sadMxNx4D(4, 8)
149
150// 4x4
151sadMxN(4, 4)
152sadMxNxK(4, 4, 3)
153sadMxNxK(4, 4, 8)
154sadMxNx4D(4, 4)
155/* clang-format on */
156
Yaowu Xuf883b422016-08-30 14:01:10 -0700157#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700158 static INLINE
159 unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8,
160 int b_stride, int width, int height) {
161 int y, x;
162 unsigned int sad = 0;
163 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
164 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
165 for (y = 0; y < height; y++) {
166 for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
167
168 a += a_stride;
169 b += b_stride;
170 }
171 return sad;
172}
173
174static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
175 const uint16_t *b, int b_stride,
176 int width, int height) {
177 int y, x;
178 unsigned int sad = 0;
179 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
180 for (y = 0; y < height; y++) {
181 for (x = 0; x < width; x++) sad += abs(a[x] - b[x]);
182
183 a += a_stride;
184 b += b_stride;
185 }
186 return sad;
187}
188
189#define highbd_sadMxN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700190 unsigned int aom_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700191 const uint8_t *ref, \
192 int ref_stride) { \
193 return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
194 } \
Yaowu Xuf883b422016-08-30 14:01:10 -0700195 unsigned int aom_highbd_sad##m##x##n##_avg_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700196 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
197 const uint8_t *second_pred) { \
198 uint16_t comp_pred[m * n]; \
Yaowu Xuf883b422016-08-30 14:01:10 -0700199 aom_highbd_comp_avg_pred_c(comp_pred, second_pred, m, n, ref, ref_stride); \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700200 return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
201 }
202
203#define highbd_sadMxNxK(m, n, k) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700204 void aom_highbd_sad##m##x##n##x##k##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700205 const uint8_t *src, int src_stride, const uint8_t *ref_array, \
206 int ref_stride, uint32_t *sad_array) { \
207 int i; \
208 for (i = 0; i < k; ++i) { \
Yaowu Xuf883b422016-08-30 14:01:10 -0700209 sad_array[i] = aom_highbd_sad##m##x##n##_c(src, src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700210 &ref_array[i], ref_stride); \
211 } \
212 }
213
214#define highbd_sadMxNx4D(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700215 void aom_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700216 const uint8_t *const ref_array[], \
217 int ref_stride, uint32_t *sad_array) { \
218 int i; \
219 for (i = 0; i < 4; ++i) { \
Yaowu Xuf883b422016-08-30 14:01:10 -0700220 sad_array[i] = aom_highbd_sad##m##x##n##_c(src, src_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700221 ref_array[i], ref_stride); \
222 } \
223 }
224
225/* clang-format off */
Yaowu Xuf883b422016-08-30 14:01:10 -0700226#if CONFIG_AV1 && CONFIG_EXT_PARTITION
Yaowu Xuc27fc142016-08-22 16:08:15 -0700227// 128x128
228highbd_sadMxN(128, 128)
229highbd_sadMxNxK(128, 128, 3)
230highbd_sadMxNxK(128, 128, 8)
231highbd_sadMxNx4D(128, 128)
232
233// 128x64
234highbd_sadMxN(128, 64)
235highbd_sadMxNx4D(128, 64)
236
237// 64x128
238highbd_sadMxN(64, 128)
239highbd_sadMxNx4D(64, 128)
Yaowu Xuf883b422016-08-30 14:01:10 -0700240#endif // CONFIG_AV1 && CONFIG_EXT_PARTITION
Yaowu Xuc27fc142016-08-22 16:08:15 -0700241
242// 64x64
243highbd_sadMxN(64, 64)
244highbd_sadMxNxK(64, 64, 3)
245highbd_sadMxNxK(64, 64, 8)
246highbd_sadMxNx4D(64, 64)
247
248// 64x32
249highbd_sadMxN(64, 32)
250highbd_sadMxNx4D(64, 32)
251
252// 32x64
253highbd_sadMxN(32, 64)
254highbd_sadMxNx4D(32, 64)
255
256// 32x32
257highbd_sadMxN(32, 32)
258highbd_sadMxNxK(32, 32, 3)
259highbd_sadMxNxK(32, 32, 8)
260highbd_sadMxNx4D(32, 32)
261
262// 32x16
263highbd_sadMxN(32, 16)
264highbd_sadMxNx4D(32, 16)
265
266// 16x32
267highbd_sadMxN(16, 32)
268highbd_sadMxNx4D(16, 32)
269
270// 16x16
271highbd_sadMxN(16, 16)
272highbd_sadMxNxK(16, 16, 3)
273highbd_sadMxNxK(16, 16, 8)
274highbd_sadMxNx4D(16, 16)
275
276// 16x8
277highbd_sadMxN(16, 8)
278highbd_sadMxNxK(16, 8, 3)
279highbd_sadMxNxK(16, 8, 8)
280highbd_sadMxNx4D(16, 8)
281
282// 8x16
283highbd_sadMxN(8, 16)
284highbd_sadMxNxK(8, 16, 3)
285highbd_sadMxNxK(8, 16, 8)
286highbd_sadMxNx4D(8, 16)
287
288// 8x8
289highbd_sadMxN(8, 8)
290highbd_sadMxNxK(8, 8, 3)
291highbd_sadMxNxK(8, 8, 8)
292highbd_sadMxNx4D(8, 8)
293
294// 8x4
295highbd_sadMxN(8, 4)
296highbd_sadMxNxK(8, 4, 8)
297highbd_sadMxNx4D(8, 4)
298
299// 4x8
300highbd_sadMxN(4, 8)
301highbd_sadMxNxK(4, 8, 8)
302highbd_sadMxNx4D(4, 8)
303
304// 4x4
305highbd_sadMxN(4, 4)
306highbd_sadMxNxK(4, 4, 3)
307highbd_sadMxNxK(4, 4, 8)
308highbd_sadMxNx4D(4, 4)
309/* clang-format on */
Yaowu Xuf883b422016-08-30 14:01:10 -0700310#endif // CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700311
Yaowu Xuf883b422016-08-30 14:01:10 -0700312#if CONFIG_AV1 && CONFIG_EXT_INTER
Yaowu Xuc27fc142016-08-22 16:08:15 -0700313 static INLINE
314 unsigned int masked_sad(const uint8_t *a, int a_stride, const uint8_t *b,
315 int b_stride, const uint8_t *m, int m_stride,
316 int width, int height) {
317 int y, x;
318 unsigned int sad = 0;
319
320 for (y = 0; y < height; y++) {
321 for (x = 0; x < width; x++) sad += m[x] * abs(a[x] - b[x]);
322
323 a += a_stride;
324 b += b_stride;
325 m += m_stride;
326 }
327 sad = (sad + 31) >> 6;
328
329 return sad;
330}
331
332#define MASKSADMxN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700333 unsigned int aom_masked_sad##m##x##n##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700334 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
335 const uint8_t *msk, int msk_stride) { \
336 return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, \
337 n); \
338 }
339
340/* clang-format off */
341#if CONFIG_EXT_PARTITION
342MASKSADMxN(128, 128)
343MASKSADMxN(128, 64)
344MASKSADMxN(64, 128)
345#endif // CONFIG_EXT_PARTITION
346MASKSADMxN(64, 64)
347MASKSADMxN(64, 32)
348MASKSADMxN(32, 64)
349MASKSADMxN(32, 32)
350MASKSADMxN(32, 16)
351MASKSADMxN(16, 32)
352MASKSADMxN(16, 16)
353MASKSADMxN(16, 8)
354MASKSADMxN(8, 16)
355MASKSADMxN(8, 8)
356MASKSADMxN(8, 4)
357MASKSADMxN(4, 8)
358MASKSADMxN(4, 4)
359/* clang-format on */
360
Yaowu Xuf883b422016-08-30 14:01:10 -0700361#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700362 static INLINE
363 unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride,
364 const uint8_t *b8, int b_stride,
365 const uint8_t *m, int m_stride, int width,
366 int height) {
367 int y, x;
368 unsigned int sad = 0;
369 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
370 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
371
372 for (y = 0; y < height; y++) {
373 for (x = 0; x < width; x++) sad += m[x] * abs(a[x] - b[x]);
374
375 a += a_stride;
376 b += b_stride;
377 m += m_stride;
378 }
379 sad = (sad + 31) >> 6;
380
381 return sad;
382}
383
384#define HIGHBD_MASKSADMXN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700385 unsigned int aom_highbd_masked_sad##m##x##n##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700386 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
387 const uint8_t *msk, int msk_stride) { \
388 return highbd_masked_sad(src, src_stride, ref, ref_stride, msk, \
389 msk_stride, m, n); \
390 }
391
392#if CONFIG_EXT_PARTITION
393HIGHBD_MASKSADMXN(128, 128)
394HIGHBD_MASKSADMXN(128, 64)
395HIGHBD_MASKSADMXN(64, 128)
396#endif // CONFIG_EXT_PARTITION
397HIGHBD_MASKSADMXN(64, 64)
398HIGHBD_MASKSADMXN(64, 32)
399HIGHBD_MASKSADMXN(32, 64)
400HIGHBD_MASKSADMXN(32, 32)
401HIGHBD_MASKSADMXN(32, 16)
402HIGHBD_MASKSADMXN(16, 32)
403HIGHBD_MASKSADMXN(16, 16)
404HIGHBD_MASKSADMXN(16, 8)
405HIGHBD_MASKSADMXN(8, 16)
406HIGHBD_MASKSADMXN(8, 8)
407HIGHBD_MASKSADMXN(8, 4)
408HIGHBD_MASKSADMXN(4, 8)
409HIGHBD_MASKSADMXN(4, 4)
Yaowu Xuf883b422016-08-30 14:01:10 -0700410#endif // CONFIG_AOM_HIGHBITDEPTH
411#endif // CONFIG_AV1 && CONFIG_EXT_INTER
Yaowu Xuc27fc142016-08-22 16:08:15 -0700412
Yue Chencb60b182016-10-13 15:18:22 -0700413#if CONFIG_AV1 && CONFIG_MOTION_VAR
Yaowu Xuc27fc142016-08-22 16:08:15 -0700414// pre: predictor being evaluated
415// wsrc: target weighted prediction (has been *4096 to keep precision)
416// mask: 2d weights (scaled by 4096)
417static INLINE unsigned int obmc_sad(const uint8_t *pre, int pre_stride,
418 const int32_t *wsrc, const int32_t *mask,
419 int width, int height) {
420 int y, x;
421 unsigned int sad = 0;
422
423 for (y = 0; y < height; y++) {
424 for (x = 0; x < width; x++)
425 sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
426
427 pre += pre_stride;
428 wsrc += width;
429 mask += width;
430 }
431
432 return sad;
433}
434
435#define OBMCSADMxN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700436 unsigned int aom_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700437 const int32_t *wsrc, \
438 const int32_t *mask) { \
439 return obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
440 }
441
442/* clang-format off */
443#if CONFIG_EXT_PARTITION
444OBMCSADMxN(128, 128)
445OBMCSADMxN(128, 64)
446OBMCSADMxN(64, 128)
447#endif // CONFIG_EXT_PARTITION
448OBMCSADMxN(64, 64)
449OBMCSADMxN(64, 32)
450OBMCSADMxN(32, 64)
451OBMCSADMxN(32, 32)
452OBMCSADMxN(32, 16)
453OBMCSADMxN(16, 32)
454OBMCSADMxN(16, 16)
455OBMCSADMxN(16, 8)
456OBMCSADMxN(8, 16)
457OBMCSADMxN(8, 8)
458OBMCSADMxN(8, 4)
459OBMCSADMxN(4, 8)
460OBMCSADMxN(4, 4)
461/* clang-format on */
462
Yaowu Xuf883b422016-08-30 14:01:10 -0700463#if CONFIG_AOM_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700464 static INLINE
465 unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
466 const int32_t *wsrc, const int32_t *mask,
467 int width, int height) {
468 int y, x;
469 unsigned int sad = 0;
470 const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
471
472 for (y = 0; y < height; y++) {
473 for (x = 0; x < width; x++)
474 sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
475
476 pre += pre_stride;
477 wsrc += width;
478 mask += width;
479 }
480
481 return sad;
482}
483
484#define HIGHBD_OBMCSADMXN(m, n) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700485 unsigned int aom_highbd_obmc_sad##m##x##n##_c( \
Yaowu Xuc27fc142016-08-22 16:08:15 -0700486 const uint8_t *ref, int ref_stride, const int32_t *wsrc, \
487 const int32_t *mask) { \
488 return highbd_obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
489 }
490
491/* clang-format off */
492#if CONFIG_EXT_PARTITION
493HIGHBD_OBMCSADMXN(128, 128)
494HIGHBD_OBMCSADMXN(128, 64)
495HIGHBD_OBMCSADMXN(64, 128)
496#endif // CONFIG_EXT_PARTITION
497HIGHBD_OBMCSADMXN(64, 64)
498HIGHBD_OBMCSADMXN(64, 32)
499HIGHBD_OBMCSADMXN(32, 64)
500HIGHBD_OBMCSADMXN(32, 32)
501HIGHBD_OBMCSADMXN(32, 16)
502HIGHBD_OBMCSADMXN(16, 32)
503HIGHBD_OBMCSADMXN(16, 16)
504HIGHBD_OBMCSADMXN(16, 8)
505HIGHBD_OBMCSADMXN(8, 16)
506HIGHBD_OBMCSADMXN(8, 8)
507HIGHBD_OBMCSADMXN(8, 4)
508HIGHBD_OBMCSADMXN(4, 8)
509HIGHBD_OBMCSADMXN(4, 4)
510/* clang-format on */
Yaowu Xuf883b422016-08-30 14:01:10 -0700511#endif // CONFIG_AOM_HIGHBITDEPTH
Yue Chencb60b182016-10-13 15:18:22 -0700512#endif // CONFIG_AV1 && CONFIG_MOTION_VAR