blob: 9add3d5f623c47ef0d1d24a35c10332c34e11192 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu9c01aa12016-09-01 14:32:49 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu9c01aa12016-09-01 14:32:49 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11#include <stdlib.h>
12
Yaowu Xuf883b422016-08-30 14:01:10 -070013#include "./aom_dsp_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070014#include "aom_ports/mem.h"
15
Yaowu Xuf883b422016-08-30 14:01:10 -070016unsigned int aom_avg_8x8_c(const uint8_t *src, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070017 int i, j;
18 int sum = 0;
19 for (i = 0; i < 8; ++i, src += stride)
20 for (j = 0; j < 8; sum += src[j], ++j) {
21 }
22
23 return ROUND_POWER_OF_TWO(sum, 6);
24}
25
Yaowu Xuf883b422016-08-30 14:01:10 -070026unsigned int aom_avg_4x4_c(const uint8_t *src, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070027 int i, j;
28 int sum = 0;
29 for (i = 0; i < 4; ++i, src += stride)
30 for (j = 0; j < 4; sum += src[j], ++j) {
31 }
32
33 return ROUND_POWER_OF_TWO(sum, 4);
34}
35
36// src_diff: first pass, 9 bit, dynamic range [-255, 255]
37// second pass, 12 bit, dynamic range [-2040, 2040]
38static void hadamard_col8(const int16_t *src_diff, int src_stride,
39 int16_t *coeff) {
40 int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
41 int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride];
42 int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride];
43 int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride];
44 int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride];
45 int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride];
46 int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride];
47 int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride];
48
49 int16_t c0 = b0 + b2;
50 int16_t c1 = b1 + b3;
51 int16_t c2 = b0 - b2;
52 int16_t c3 = b1 - b3;
53 int16_t c4 = b4 + b6;
54 int16_t c5 = b5 + b7;
55 int16_t c6 = b4 - b6;
56 int16_t c7 = b5 - b7;
57
58 coeff[0] = c0 + c4;
59 coeff[7] = c1 + c5;
60 coeff[3] = c2 + c6;
61 coeff[4] = c3 + c7;
62 coeff[2] = c0 - c4;
63 coeff[6] = c1 - c5;
64 coeff[1] = c2 - c6;
65 coeff[5] = c3 - c7;
66}
67
68// The order of the output coeff of the hadamard is not important. For
69// optimization purposes the final transpose may be skipped.
Yaowu Xuf883b422016-08-30 14:01:10 -070070void aom_hadamard_8x8_c(const int16_t *src_diff, int src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -070071 int16_t *coeff) {
72 int idx;
73 int16_t buffer[64];
74 int16_t *tmp_buf = &buffer[0];
75 for (idx = 0; idx < 8; ++idx) {
76 hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit
77 // dynamic range [-255, 255]
78 tmp_buf += 8;
79 ++src_diff;
80 }
81
82 tmp_buf = &buffer[0];
83 for (idx = 0; idx < 8; ++idx) {
84 hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit
85 // dynamic range [-2040, 2040]
86 coeff += 8; // coeff: 15 bit
87 // dynamic range [-16320, 16320]
88 ++tmp_buf;
89 }
90}
91
92// In place 16x16 2D Hadamard transform
Yaowu Xuf883b422016-08-30 14:01:10 -070093void aom_hadamard_16x16_c(const int16_t *src_diff, int src_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -070094 int16_t *coeff) {
95 int idx;
96 for (idx = 0; idx < 4; ++idx) {
97 // src_diff: 9 bit, dynamic range [-255, 255]
98 const int16_t *src_ptr =
99 src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
Yaowu Xuf883b422016-08-30 14:01:10 -0700100 aom_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700101 }
102
103 // coeff: 15 bit, dynamic range [-16320, 16320]
104 for (idx = 0; idx < 64; ++idx) {
105 int16_t a0 = coeff[0];
106 int16_t a1 = coeff[64];
107 int16_t a2 = coeff[128];
108 int16_t a3 = coeff[192];
109
110 int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
111 int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
112 int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
113 int16_t b3 = (a2 - a3) >> 1;
114
115 coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
116 coeff[64] = b1 + b3;
117 coeff[128] = b0 - b2;
118 coeff[192] = b1 - b3;
119
120 ++coeff;
121 }
122}
123
124// coeff: 16 bits, dynamic range [-32640, 32640].
125// length: value range {16, 64, 256, 1024}.
Yaowu Xuf883b422016-08-30 14:01:10 -0700126int aom_satd_c(const int16_t *coeff, int length) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700127 int i;
128 int satd = 0;
129 for (i = 0; i < length; ++i) satd += abs(coeff[i]);
130
131 // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
132 return satd;
133}
134
135// Integer projection onto row vectors.
136// height: value range {16, 32, 64}.
Yaowu Xuf883b422016-08-30 14:01:10 -0700137void aom_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700138 const int ref_stride, const int height) {
139 int idx;
140 const int norm_factor = height >> 1;
141 for (idx = 0; idx < 16; ++idx) {
142 int i;
143 hbuf[idx] = 0;
144 // hbuf[idx]: 14 bit, dynamic range [0, 16320].
145 for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride];
146 // hbuf[idx]: 9 bit, dynamic range [0, 510].
147 hbuf[idx] /= norm_factor;
148 ++ref;
149 }
150}
151
152// width: value range {16, 32, 64}.
Yaowu Xuf883b422016-08-30 14:01:10 -0700153int16_t aom_int_pro_col_c(const uint8_t *ref, const int width) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700154 int idx;
155 int16_t sum = 0;
156 // sum: 14 bit, dynamic range [0, 16320]
157 for (idx = 0; idx < width; ++idx) sum += ref[idx];
158 return sum;
159}
160
161// ref: [0 - 510]
162// src: [0 - 510]
163// bwl: {2, 3, 4}
Yaowu Xuf883b422016-08-30 14:01:10 -0700164int aom_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700165 int i;
166 int width = 4 << bwl;
167 int sse = 0, mean = 0, var;
168
169 for (i = 0; i < width; ++i) {
170 int diff = ref[i] - src[i]; // diff: dynamic range [-510, 510], 10 bits.
171 mean += diff; // mean: dynamic range 16 bits.
172 sse += diff * diff; // sse: dynamic range 26 bits.
173 }
174
175 // (mean * mean): dynamic range 31 bits.
176 var = sse - ((mean * mean) >> (bwl + 2));
177 return var;
178}
179
Yaowu Xuf883b422016-08-30 14:01:10 -0700180void aom_minmax_8x8_c(const uint8_t *src, int src_stride, const uint8_t *ref,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700181 int ref_stride, int *min, int *max) {
182 int i, j;
183 *min = 255;
184 *max = 0;
185 for (i = 0; i < 8; ++i, src += src_stride, ref += ref_stride) {
186 for (j = 0; j < 8; ++j) {
187 int diff = abs(src[j] - ref[j]);
188 *min = diff < *min ? diff : *min;
189 *max = diff > *max ? diff : *max;
190 }
191 }
192}
193
Yaowu Xuf883b422016-08-30 14:01:10 -0700194#if CONFIG_AOM_HIGHBITDEPTH
195unsigned int aom_highbd_avg_8x8_c(const uint8_t *src, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700196 int i, j;
197 int sum = 0;
198 const uint16_t *s = CONVERT_TO_SHORTPTR(src);
199 for (i = 0; i < 8; ++i, s += stride)
200 for (j = 0; j < 8; sum += s[j], ++j) {
201 }
202
203 return ROUND_POWER_OF_TWO(sum, 6);
204}
205
Yaowu Xuf883b422016-08-30 14:01:10 -0700206unsigned int aom_highbd_avg_4x4_c(const uint8_t *src, int stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700207 int i, j;
208 int sum = 0;
209 const uint16_t *s = CONVERT_TO_SHORTPTR(src);
210 for (i = 0; i < 4; ++i, s += stride)
211 for (j = 0; j < 4; sum += s[j], ++j) {
212 }
213
214 return ROUND_POWER_OF_TWO(sum, 4);
215}
216
Yaowu Xuf883b422016-08-30 14:01:10 -0700217void aom_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700218 int dp, int *min, int *max) {
219 int i, j;
220 const uint16_t *s = CONVERT_TO_SHORTPTR(s8);
221 const uint16_t *d = CONVERT_TO_SHORTPTR(d8);
222 *min = 255;
223 *max = 0;
224 for (i = 0; i < 8; ++i, s += p, d += dp) {
225 for (j = 0; j < 8; ++j) {
226 int diff = abs(s[j] - d[j]);
227 *min = diff < *min ? diff : *min;
228 *max = diff > *max ? diff : *max;
229 }
230 }
231}
Yaowu Xuf883b422016-08-30 14:01:10 -0700232#endif // CONFIG_AOM_HIGHBITDEPTH