blob: c500206986a85c507443f0d1b6ecdf73bf3af390 [file] [log] [blame]
Johannd5d92892015-04-17 16:11:38 -04001/*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdlib.h>
12
13#include "./vpx_config.h"
14#include "./vpx_dsp_rtcd.h"
15
16#include "vpx/vpx_integer.h"
Johann1d7ccd52015-05-11 19:09:22 -070017#include "vpx_ports/mem.h"
Johannd5d92892015-04-17 16:11:38 -040018
19/* Sum the difference between every corresponding element of the buffers. */
20static INLINE unsigned int sad(const uint8_t *a, int a_stride,
21 const uint8_t *b, int b_stride,
22 int width, int height) {
23 int y, x;
24 unsigned int sad = 0;
25
26 for (y = 0; y < height; y++) {
27 for (x = 0; x < width; x++)
28 sad += abs(a[x] - b[x]);
29
30 a += a_stride;
31 b += b_stride;
32 }
33 return sad;
34}
35
Johannc3bdffb2015-05-15 11:52:03 -070036// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up.
Johannd5d92892015-04-17 16:11:38 -040037/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
38 * The function averages every corresponding element of the buffers and stores
39 * the value in a third buffer, comp_pred.
40 * pred and comp_pred are assumed to have stride = width
41 * In the usage below comp_pred is a local array.
42 */
43static INLINE void avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
44 int height, const uint8_t *ref, int ref_stride) {
45 int i, j;
46
47 for (i = 0; i < height; i++) {
48 for (j = 0; j < width; j++) {
49 const int tmp = pred[j] + ref[j];
50 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
51 }
52 comp_pred += width;
53 pred += width;
54 ref += ref_stride;
55 }
56}
57
58#if CONFIG_VP9_HIGHBITDEPTH
59static INLINE void highbd_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
60 int width, int height, const uint8_t *ref8,
61 int ref_stride) {
62 int i, j;
63 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
64 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
65 for (i = 0; i < height; i++) {
66 for (j = 0; j < width; j++) {
67 const int tmp = pred[j] + ref[j];
68 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
69 }
70 comp_pred += width;
71 pred += width;
72 ref += ref_stride;
73 }
74}
75#endif // CONFIG_VP9_HIGHBITDEPTH
76
77#define sadMxN(m, n) \
78unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
79 const uint8_t *ref, int ref_stride) { \
80 return sad(src, src_stride, ref, ref_stride, m, n); \
81} \
82unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
83 const uint8_t *ref, int ref_stride, \
84 const uint8_t *second_pred) { \
85 uint8_t comp_pred[m * n]; \
86 avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
87 return sad(src, src_stride, comp_pred, m, m, n); \
88}
89
90// depending on call sites, pass **ref_array to avoid & in subsequent call and
91// de-dup with 4D below.
92#define sadMxNxK(m, n, k) \
93void vpx_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
94 const uint8_t *ref_array, int ref_stride, \
95 uint32_t *sad_array) { \
96 int i; \
97 for (i = 0; i < k; ++i) \
98 sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \
99}
100
101// This appears to be equivalent to the above when k == 4 and refs is const
102#define sadMxNx4D(m, n) \
103void vpx_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
104 const uint8_t *const ref_array[], int ref_stride, \
105 uint32_t *sad_array) { \
106 int i; \
107 for (i = 0; i < 4; ++i) \
108 sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
109}
110
Geza Lore697bf5b2016-03-02 11:12:52 +0000111#if CONFIG_VP10 && CONFIG_EXT_PARTITION
112// 128x128
113sadMxN(128, 128)
114sadMxNxK(128, 128, 3)
115sadMxNxK(128, 128, 8)
116sadMxNx4D(128, 128)
117
118// 128x64
119sadMxN(128, 64)
120sadMxNx4D(128, 64)
121
122// 64x128
123sadMxN(64, 128)
124sadMxNx4D(64, 128)
125#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
126
Johannd5d92892015-04-17 16:11:38 -0400127// 64x64
128sadMxN(64, 64)
129sadMxNxK(64, 64, 3)
130sadMxNxK(64, 64, 8)
131sadMxNx4D(64, 64)
132
133// 64x32
134sadMxN(64, 32)
135sadMxNx4D(64, 32)
136
137// 32x64
138sadMxN(32, 64)
139sadMxNx4D(32, 64)
140
141// 32x32
142sadMxN(32, 32)
143sadMxNxK(32, 32, 3)
144sadMxNxK(32, 32, 8)
145sadMxNx4D(32, 32)
146
147// 32x16
148sadMxN(32, 16)
149sadMxNx4D(32, 16)
150
151// 16x32
152sadMxN(16, 32)
153sadMxNx4D(16, 32)
154
155// 16x16
156sadMxN(16, 16)
157sadMxNxK(16, 16, 3)
158sadMxNxK(16, 16, 8)
159sadMxNx4D(16, 16)
160
161// 16x8
162sadMxN(16, 8)
163sadMxNxK(16, 8, 3)
164sadMxNxK(16, 8, 8)
165sadMxNx4D(16, 8)
166
167// 8x16
168sadMxN(8, 16)
169sadMxNxK(8, 16, 3)
170sadMxNxK(8, 16, 8)
171sadMxNx4D(8, 16)
172
173// 8x8
174sadMxN(8, 8)
175sadMxNxK(8, 8, 3)
176sadMxNxK(8, 8, 8)
177sadMxNx4D(8, 8)
178
179// 8x4
180sadMxN(8, 4)
181sadMxNxK(8, 4, 8)
182sadMxNx4D(8, 4)
183
184// 4x8
185sadMxN(4, 8)
186sadMxNxK(4, 8, 8)
187sadMxNx4D(4, 8)
188
189// 4x4
190sadMxN(4, 4)
191sadMxNxK(4, 4, 3)
192sadMxNxK(4, 4, 8)
193sadMxNx4D(4, 4)
194
195#if CONFIG_VP9_HIGHBITDEPTH
196static INLINE unsigned int highbd_sad(const uint8_t *a8, int a_stride,
197 const uint8_t *b8, int b_stride,
198 int width, int height) {
199 int y, x;
200 unsigned int sad = 0;
201 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
202 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
203 for (y = 0; y < height; y++) {
204 for (x = 0; x < width; x++)
205 sad += abs(a[x] - b[x]);
206
207 a += a_stride;
208 b += b_stride;
209 }
210 return sad;
211}
212
213static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
214 const uint16_t *b, int b_stride,
215 int width, int height) {
216 int y, x;
217 unsigned int sad = 0;
218 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
219 for (y = 0; y < height; y++) {
220 for (x = 0; x < width; x++)
221 sad += abs(a[x] - b[x]);
222
223 a += a_stride;
224 b += b_stride;
225 }
226 return sad;
227}
228
229#define highbd_sadMxN(m, n) \
230unsigned int vpx_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
231 const uint8_t *ref, int ref_stride) { \
232 return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
233} \
234unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \
235 int src_stride, \
236 const uint8_t *ref, \
237 int ref_stride, \
238 const uint8_t *second_pred) { \
239 uint16_t comp_pred[m * n]; \
240 highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
241 return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
242}
243
244#define highbd_sadMxNxK(m, n, k) \
245void vpx_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
246 const uint8_t *ref_array, int ref_stride, \
247 uint32_t *sad_array) { \
248 int i; \
249 for (i = 0; i < k; ++i) { \
250 sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, &ref_array[i], \
251 ref_stride); \
252 } \
253}
254
255#define highbd_sadMxNx4D(m, n) \
256void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
257 const uint8_t *const ref_array[], \
258 int ref_stride, uint32_t *sad_array) { \
259 int i; \
260 for (i = 0; i < 4; ++i) { \
261 sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, ref_array[i], \
262 ref_stride); \
263 } \
264}
265
Geza Lore697bf5b2016-03-02 11:12:52 +0000266#if CONFIG_VP10 && CONFIG_EXT_PARTITION
267// 128x128
268highbd_sadMxN(128, 128)
269highbd_sadMxNxK(128, 128, 3)
270highbd_sadMxNxK(128, 128, 8)
271highbd_sadMxNx4D(128, 128)
272
273// 128x64
274highbd_sadMxN(128, 64)
275highbd_sadMxNx4D(128, 64)
276
277// 64x128
278highbd_sadMxN(64, 128)
279highbd_sadMxNx4D(64, 128)
280#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
281
Johannd5d92892015-04-17 16:11:38 -0400282// 64x64
283highbd_sadMxN(64, 64)
284highbd_sadMxNxK(64, 64, 3)
285highbd_sadMxNxK(64, 64, 8)
286highbd_sadMxNx4D(64, 64)
287
288// 64x32
289highbd_sadMxN(64, 32)
290highbd_sadMxNx4D(64, 32)
291
292// 32x64
293highbd_sadMxN(32, 64)
294highbd_sadMxNx4D(32, 64)
295
296// 32x32
297highbd_sadMxN(32, 32)
298highbd_sadMxNxK(32, 32, 3)
299highbd_sadMxNxK(32, 32, 8)
300highbd_sadMxNx4D(32, 32)
301
302// 32x16
303highbd_sadMxN(32, 16)
304highbd_sadMxNx4D(32, 16)
305
306// 16x32
307highbd_sadMxN(16, 32)
308highbd_sadMxNx4D(16, 32)
309
310// 16x16
311highbd_sadMxN(16, 16)
312highbd_sadMxNxK(16, 16, 3)
313highbd_sadMxNxK(16, 16, 8)
314highbd_sadMxNx4D(16, 16)
315
316// 16x8
317highbd_sadMxN(16, 8)
318highbd_sadMxNxK(16, 8, 3)
319highbd_sadMxNxK(16, 8, 8)
320highbd_sadMxNx4D(16, 8)
321
322// 8x16
323highbd_sadMxN(8, 16)
324highbd_sadMxNxK(8, 16, 3)
325highbd_sadMxNxK(8, 16, 8)
326highbd_sadMxNx4D(8, 16)
327
328// 8x8
329highbd_sadMxN(8, 8)
330highbd_sadMxNxK(8, 8, 3)
331highbd_sadMxNxK(8, 8, 8)
332highbd_sadMxNx4D(8, 8)
333
334// 8x4
335highbd_sadMxN(8, 4)
336highbd_sadMxNxK(8, 4, 8)
337highbd_sadMxNx4D(8, 4)
338
339// 4x8
340highbd_sadMxN(4, 8)
341highbd_sadMxNxK(4, 8, 8)
342highbd_sadMxNx4D(4, 8)
343
344// 4x4
345highbd_sadMxN(4, 4)
346highbd_sadMxNxK(4, 4, 3)
347highbd_sadMxNxK(4, 4, 8)
348highbd_sadMxNx4D(4, 4)
349
350#endif // CONFIG_VP9_HIGHBITDEPTH
Debargha Mukherjee1d69cee2016-02-29 16:08:07 -0800351
352#if CONFIG_VP10 && CONFIG_EXT_INTER
353static INLINE unsigned int masked_sad(const uint8_t *a, int a_stride,
354 const uint8_t *b, int b_stride,
355 const uint8_t *m, int m_stride,
356 int width, int height) {
357 int y, x;
358 unsigned int sad = 0;
359
360 for (y = 0; y < height; y++) {
361 for (x = 0; x < width; x++)
362 sad += m[x] * abs(a[x] - b[x]);
363
364 a += a_stride;
365 b += b_stride;
366 m += m_stride;
367 }
368 sad = (sad + 31) >> 6;
369
370 return sad;
371}
372
373#define MASKSADMxN(m, n) \
374unsigned int vpx_masked_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
375 const uint8_t *ref, int ref_stride, \
376 const uint8_t *msk, int msk_stride) { \
377 return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, n); \
378}
379
380#if CONFIG_EXT_PARTITION
381MASKSADMxN(128, 128)
382MASKSADMxN(128, 64)
383MASKSADMxN(64, 128)
384#endif // CONFIG_EXT_PARTITION
385MASKSADMxN(64, 64)
386MASKSADMxN(64, 32)
387MASKSADMxN(32, 64)
388MASKSADMxN(32, 32)
389MASKSADMxN(32, 16)
390MASKSADMxN(16, 32)
391MASKSADMxN(16, 16)
392MASKSADMxN(16, 8)
393MASKSADMxN(8, 16)
394MASKSADMxN(8, 8)
395MASKSADMxN(8, 4)
396MASKSADMxN(4, 8)
397MASKSADMxN(4, 4)
398
399#if CONFIG_VP9_HIGHBITDEPTH
400static INLINE unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride,
401 const uint8_t *b8, int b_stride,
402 const uint8_t *m, int m_stride,
403 int width, int height) {
404 int y, x;
405 unsigned int sad = 0;
406 const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
407 const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
408
409 for (y = 0; y < height; y++) {
410 for (x = 0; x < width; x++)
411 sad += m[x] * abs(a[x] - b[x]);
412
413 a += a_stride;
414 b += b_stride;
415 m += m_stride;
416 }
417 sad = (sad + 31) >> 6;
418
419 return sad;
420}
421
422#define HIGHBD_MASKSADMXN(m, n) \
423unsigned int vpx_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \
424 int src_stride, \
425 const uint8_t *ref, \
426 int ref_stride, \
427 const uint8_t *msk, \
428 int msk_stride) { \
429 return highbd_masked_sad(src, src_stride, ref, ref_stride, \
430 msk, msk_stride, m, n); \
431}
432
433#if CONFIG_EXT_PARTITION
434HIGHBD_MASKSADMXN(128, 128)
435HIGHBD_MASKSADMXN(128, 64)
436HIGHBD_MASKSADMXN(64, 128)
437#endif // CONFIG_EXT_PARTITION
438HIGHBD_MASKSADMXN(64, 64)
439HIGHBD_MASKSADMXN(64, 32)
440HIGHBD_MASKSADMXN(32, 64)
441HIGHBD_MASKSADMXN(32, 32)
442HIGHBD_MASKSADMXN(32, 16)
443HIGHBD_MASKSADMXN(16, 32)
444HIGHBD_MASKSADMXN(16, 16)
445HIGHBD_MASKSADMXN(16, 8)
446HIGHBD_MASKSADMXN(8, 16)
447HIGHBD_MASKSADMXN(8, 8)
448HIGHBD_MASKSADMXN(8, 4)
449HIGHBD_MASKSADMXN(4, 8)
450HIGHBD_MASKSADMXN(4, 4)
451#endif // CONFIG_VP9_HIGHBITDEPTH
452#endif // CONFIG_VP10 && CONFIG_EXT_INTER