blob: 225b29d59d08138c0597dae53739e27000baa6be [file] [log] [blame]
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
Tom Finegan7a07ece2017-02-07 17:14:05 -080012#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
Angie Chiangd5349112016-02-04 16:13:18 -080013
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./av1_rtcd.h"
15#include "./aom_dsp_rtcd.h"
Angie Chiangd5349112016-02-04 16:13:18 -080016#include "test/acm_random.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017#include "av1/common/filter.h"
Yaowu Xu6557ea92016-10-31 16:33:36 -070018#include "av1/common/convolve.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070020#include "aom_ports/mem.h"
Angie Chiangd5349112016-02-04 16:13:18 -080021
Yaowu Xuc27fc142016-08-22 16:08:15 -070022using libaom_test::ACMRandom;
Angie Chiangd5349112016-02-04 16:13:18 -080023
24namespace {
Yi Luo81ad9532016-06-21 12:17:39 -070025void setup_convolve() {
Yaowu Xu3bd709f2016-07-25 15:39:46 -070026#if HAVE_SSSE3 && CONFIG_RUNTIME_CPU_DETECT
Yaowu Xuf883b422016-08-30 14:01:10 -070027 av1_convolve_horiz = av1_convolve_horiz_c;
28 av1_convolve_vert = av1_convolve_vert_c;
Yi Luo81ad9532016-06-21 12:17:39 -070029#endif
30}
31
Yaowu Xuf883b422016-08-30 14:01:10 -070032TEST(AV1ConvolveTest, av1_convolve8) {
Angie Chiangd5349112016-02-04 16:13:18 -080033 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -070034#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -070035 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
36 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -070037 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070038 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -070039#else
James Zern7b9407a2016-05-18 23:48:05 -070040 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -080041 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070042 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -070043#endif
Yunqing Wanged070562016-08-30 09:13:16 -070044 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -080045 int filter_center = filter_size / 2 - 1;
46 uint8_t src[12 * 12];
47 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -070048 uint8_t dst[1] = { 0 };
49 uint8_t dst1[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -080050 int dst_stride = 1;
51 int x_step_q4 = 16;
52 int y_step_q4 = 16;
53 int subpel_x_q4 = 3;
54 int subpel_y_q4 = 2;
Angie Chiange3a4c1c2017-02-10 16:26:49 -080055 const int plane = 0;
Angie Chiangd5349112016-02-04 16:13:18 -080056
57 int w = 1;
58 int h = 1;
59
Angie Chiange3a4c1c2017-02-10 16:26:49 -080060 ConvolveParams conv_params = get_conv_params(0, plane);
Angie Chiang674bffd2017-01-11 16:15:55 -080061
Yi Luo81ad9532016-06-21 12:17:39 -070062 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -070063
Angie Chiangd5349112016-02-04 16:13:18 -080064 for (int i = 0; i < filter_size * filter_size; i++) {
65 src[i] = rnd.Rand16() % (1 << 8);
66 }
67
Yaowu Xuf883b422016-08-30 14:01:10 -070068 av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
69 dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -080070 subpel_y_q4, y_step_q4, &conv_params);
Angie Chiangd5349112016-02-04 16:13:18 -080071
clang-format3a826f12016-08-11 17:46:05 -070072 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -070073 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -070074 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -070075 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -080076
Yaowu Xuf883b422016-08-30 14:01:10 -070077 aom_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
Angie Chiangd5349112016-02-04 16:13:18 -080078 dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
79 EXPECT_EQ(dst[0], dst1[0]);
80}
Yaowu Xuf883b422016-08-30 14:01:10 -070081TEST(AV1ConvolveTest, av1_convolve) {
Angie Chiangd5349112016-02-04 16:13:18 -080082 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -070083#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -070084 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
85 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -070086 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070087 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -070088#else
James Zern7b9407a2016-05-18 23:48:05 -070089 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -080090 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070091 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -070092#endif
Yunqing Wanged070562016-08-30 09:13:16 -070093 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -080094 int filter_center = filter_size / 2 - 1;
95 uint8_t src[12 * 12];
96 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -070097 uint8_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -080098 int dst_stride = 1;
99 int x_step_q4 = 16;
100 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800101 int w = 1;
102 int h = 1;
103
Angie Chiang1e403062016-02-19 19:31:38 -0800104 int subpel_x_q4;
105 int subpel_y_q4;
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800106 const int plane = 0;
Angie Chiang1e403062016-02-19 19:31:38 -0800107
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800108 ConvolveParams conv_params = get_conv_params(0, plane);
Angie Chiang674bffd2017-01-11 16:15:55 -0800109
Yaowu Xu637590c2016-11-16 15:15:46 -0800110 ASSERT_LE(filter_size, 12);
Yi Luo81ad9532016-06-21 12:17:39 -0700111 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -0700112
Yaowu Xu637590c2016-11-16 15:15:46 -0800113 for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
Angie Chiangd5349112016-02-04 16:13:18 -0800114 src[i] = rnd.Rand16() % (1 << 8);
115 }
116
Angie Chiangb968d462016-11-29 17:49:29 -0800117 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
118 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700119 av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
120 dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -0800121 subpel_y_q4, y_step_q4, &conv_params);
Angie Chiangd5349112016-02-04 16:13:18 -0800122
clang-format3a826f12016-08-11 17:46:05 -0700123 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700124 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -0700125 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700126 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -0800127
Angie Chiang1e403062016-02-19 19:31:38 -0800128 int temp[12];
129 int dst_ref = 0;
130 for (int r = 0; r < filter_size; r++) {
131 temp[r] = 0;
132 for (int c = 0; c < filter_size; c++) {
133 temp[r] += x_filter[c] * src[r * filter_size + c];
134 }
135 temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
136 dst_ref += temp[r] * y_filter[r];
137 }
138 dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
139 EXPECT_EQ(dst[0], dst_ref);
Angie Chiangd5349112016-02-04 16:13:18 -0800140 }
Angie Chiangd5349112016-02-04 16:13:18 -0800141 }
Angie Chiangd5349112016-02-04 16:13:18 -0800142}
143
Angie Chiang1733f6b2017-01-05 09:52:20 -0800144#if CONFIG_DUAL_FILTER
Angie Chiangb79424e2016-11-28 18:49:51 -0800145TEST(AV1ConvolveTest, av1_convolve_vert_first) {
146 ACMRandom rnd(ACMRandom::DeterministicSeed());
147 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, MULTITAP_SHARP,
148 EIGHTTAP_REGULAR, MULTITAP_SHARP };
149 InterpFilterParams filter_params_x =
150 av1_get_interp_filter_params(interp_filter[1]);
151 InterpFilterParams filter_params_y =
152 av1_get_interp_filter_params(interp_filter[0]);
153 int filter_size_x = filter_params_x.taps;
154 int filter_size_y = filter_params_y.taps;
155 int filter_center_x = filter_size_x / 2 - 1;
156 int filter_center_y = filter_size_y / 2 - 1;
157 uint8_t src[12 * 12];
158 int src_stride = filter_size_x;
159 uint8_t dst[1] = { 0 };
160 int dst_stride = 1;
161 int x_step_q4 = 16;
162 int y_step_q4 = 16;
Angie Chiangb79424e2016-11-28 18:49:51 -0800163 int w = 1;
164 int h = 1;
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800165 const int plane = 0;
Angie Chiangb79424e2016-11-28 18:49:51 -0800166
167 int subpel_x_q4;
168 int subpel_y_q4;
169
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800170 ConvolveParams conv_params = get_conv_params(0, plane);
Angie Chiang674bffd2017-01-11 16:15:55 -0800171
Angie Chiangb79424e2016-11-28 18:49:51 -0800172 ASSERT_LE(filter_size_x, 12);
173 ASSERT_LE(filter_size_y, 12);
174 setup_convolve();
175
176 for (int i = 0; i < static_cast<int>(sizeof(src) / sizeof(src[0])); i++) {
177 src[i] = rnd.Rand16() % (1 << 8);
178 }
179
Angie Chiangb968d462016-11-29 17:49:29 -0800180 for (subpel_x_q4 = 1; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
181 for (subpel_y_q4 = 1; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Angie Chiangb79424e2016-11-28 18:49:51 -0800182 av1_convolve(src + src_stride * filter_center_y + filter_center_x,
183 src_stride, dst, dst_stride, w, h, interp_filter,
Angie Chiang674bffd2017-01-11 16:15:55 -0800184 subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
185 &conv_params);
Angie Chiangb79424e2016-11-28 18:49:51 -0800186
187 const int16_t *x_filter =
188 av1_get_interp_filter_subpel_kernel(filter_params_x, subpel_x_q4);
189 const int16_t *y_filter =
190 av1_get_interp_filter_subpel_kernel(filter_params_y, subpel_y_q4);
191
192 int temp[12];
193 int dst_ref = 0;
194 for (int c = 0; c < filter_size_x; c++) {
195 temp[c] = 0;
196 for (int r = 0; r < filter_size_y; r++) {
197 temp[c] += y_filter[r] * src[r * filter_size_x + c];
198 }
199 temp[c] = clip_pixel(ROUND_POWER_OF_TWO(temp[c], FILTER_BITS));
200 dst_ref += temp[c] * x_filter[c];
201 }
202 dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
203 EXPECT_EQ(dst[0], dst_ref);
204 }
205 }
206}
207#endif
208
Yaowu Xuf883b422016-08-30 14:01:10 -0700209TEST(AV1ConvolveTest, av1_convolve_avg) {
Angie Chiangd5349112016-02-04 16:13:18 -0800210 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -0700211#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700212 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
213 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -0700214 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700215 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -0700216#else
James Zern7b9407a2016-05-18 23:48:05 -0700217 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800218 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700219 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -0700220#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700221 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800222 int filter_center = filter_size / 2 - 1;
223 uint8_t src0[12 * 12];
224 uint8_t src1[12 * 12];
225 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700226 uint8_t dst0[1] = { 0 };
227 uint8_t dst1[1] = { 0 };
228 uint8_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800229 int dst_stride = 1;
230 int x_step_q4 = 16;
231 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800232
233 int w = 1;
234 int h = 1;
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800235 const int plane = 0;
Angie Chiangd5349112016-02-04 16:13:18 -0800236
Angie Chiang1e403062016-02-19 19:31:38 -0800237 int subpel_x_q4;
238 int subpel_y_q4;
239
Angie Chiange3a4c1c2017-02-10 16:26:49 -0800240 ConvolveParams conv_params = get_conv_params(0, plane);
Angie Chiang674bffd2017-01-11 16:15:55 -0800241
Yi Luo81ad9532016-06-21 12:17:39 -0700242 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -0700243
Angie Chiangd5349112016-02-04 16:13:18 -0800244 for (int i = 0; i < filter_size * filter_size; i++) {
245 src0[i] = rnd.Rand16() % (1 << 8);
246 src1[i] = rnd.Rand16() % (1 << 8);
247 }
248
249 int offset = filter_size * filter_center + filter_center;
250
Angie Chiangb968d462016-11-29 17:49:29 -0800251 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
252 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Angie Chiang674bffd2017-01-11 16:15:55 -0800253 conv_params.ref = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700254 av1_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
255 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -0800256 y_step_q4, &conv_params);
257 conv_params.ref = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700258 av1_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
259 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -0800260 y_step_q4, &conv_params);
Angie Chiangd5349112016-02-04 16:13:18 -0800261
Angie Chiang674bffd2017-01-11 16:15:55 -0800262 conv_params.ref = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700263 av1_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
264 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -0800265 y_step_q4, &conv_params);
266 conv_params.ref = 1;
Yaowu Xuf883b422016-08-30 14:01:10 -0700267 av1_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
268 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
Angie Chiang674bffd2017-01-11 16:15:55 -0800269 y_step_q4, &conv_params);
Angie Chiangd5349112016-02-04 16:13:18 -0800270
Angie Chiang1e403062016-02-19 19:31:38 -0800271 EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
272 }
273 }
Angie Chiangd5349112016-02-04 16:13:18 -0800274}
275
Yaowu Xuf883b422016-08-30 14:01:10 -0700276#if CONFIG_AOM_HIGHBITDEPTH
277TEST(AV1ConvolveTest, av1_highbd_convolve) {
Angie Chiangd5349112016-02-04 16:13:18 -0800278 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Han9de916e2016-05-08 10:36:51 -0700279#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700280 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
281 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Han9de916e2016-05-08 10:36:51 -0700282 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700283 av1_get_interp_filter_params(interp_filter[0]);
Jingning Han9de916e2016-05-08 10:36:51 -0700284#else
James Zern7b9407a2016-05-18 23:48:05 -0700285 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800286 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700287 av1_get_interp_filter_params(interp_filter);
Jingning Han9de916e2016-05-08 10:36:51 -0700288#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700289 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800290 int filter_center = filter_size / 2 - 1;
291 uint16_t src[12 * 12];
292 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700293 uint16_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800294 int dst_stride = 1;
295 int x_step_q4 = 16;
296 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800297 int avg = 0;
298 int bd = 10;
Angie Chiangd5349112016-02-04 16:13:18 -0800299 int w = 1;
300 int h = 1;
301
Angie Chiang1e403062016-02-19 19:31:38 -0800302 int subpel_x_q4;
303 int subpel_y_q4;
304
Angie Chiangd5349112016-02-04 16:13:18 -0800305 for (int i = 0; i < filter_size * filter_size; i++) {
306 src[i] = rnd.Rand16() % (1 << bd);
307 }
308
Angie Chiangb968d462016-11-29 17:49:29 -0800309 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
310 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700311 av1_highbd_convolve(
Angie Chiang1e403062016-02-19 19:31:38 -0800312 CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
Jingning Han9de916e2016-05-08 10:36:51 -0700313 src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
Angie Chiang1e403062016-02-19 19:31:38 -0800314 subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800315
clang-format3a826f12016-08-11 17:46:05 -0700316 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700317 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -0700318 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700319 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -0800320
Angie Chiang1e403062016-02-19 19:31:38 -0800321 int temp[12];
322 int dst_ref = 0;
323 for (int r = 0; r < filter_size; r++) {
324 temp[r] = 0;
325 for (int c = 0; c < filter_size; c++) {
326 temp[r] += x_filter[c] * src[r * filter_size + c];
327 }
328 temp[r] =
329 clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
330 dst_ref += temp[r] * y_filter[r];
331 }
332 dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
333 EXPECT_EQ(dst[0], dst_ref);
Angie Chiangd5349112016-02-04 16:13:18 -0800334 }
Angie Chiangd5349112016-02-04 16:13:18 -0800335 }
Angie Chiangd5349112016-02-04 16:13:18 -0800336}
337
Yaowu Xuf883b422016-08-30 14:01:10 -0700338TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
Angie Chiangd5349112016-02-04 16:13:18 -0800339 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Han9de916e2016-05-08 10:36:51 -0700340#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700341 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
342 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Han9de916e2016-05-08 10:36:51 -0700343 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700344 av1_get_interp_filter_params(interp_filter[0]);
Jingning Han9de916e2016-05-08 10:36:51 -0700345#else
James Zern7b9407a2016-05-18 23:48:05 -0700346 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800347 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700348 av1_get_interp_filter_params(interp_filter);
Jingning Han9de916e2016-05-08 10:36:51 -0700349#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700350 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800351 int filter_center = filter_size / 2 - 1;
352 uint16_t src0[12 * 12];
353 uint16_t src1[12 * 12];
354 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700355 uint16_t dst0[1] = { 0 };
356 uint16_t dst1[1] = { 0 };
357 uint16_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800358 int dst_stride = 1;
359 int x_step_q4 = 16;
360 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800361 int avg = 0;
362 int bd = 10;
363
364 int w = 1;
365 int h = 1;
366
Angie Chiang1e403062016-02-19 19:31:38 -0800367 int subpel_x_q4;
368 int subpel_y_q4;
369
Angie Chiangd5349112016-02-04 16:13:18 -0800370 for (int i = 0; i < filter_size * filter_size; i++) {
371 src0[i] = rnd.Rand16() % (1 << bd);
372 src1[i] = rnd.Rand16() % (1 << bd);
373 }
374
Angie Chiangb968d462016-11-29 17:49:29 -0800375 for (subpel_x_q4 = 0; subpel_x_q4 < SUBPEL_SHIFTS; subpel_x_q4++) {
376 for (subpel_y_q4 = 0; subpel_y_q4 < SUBPEL_SHIFTS; subpel_y_q4++) {
Angie Chiang1e403062016-02-19 19:31:38 -0800377 int offset = filter_size * filter_center + filter_center;
Angie Chiangd5349112016-02-04 16:13:18 -0800378
Angie Chiang1e403062016-02-19 19:31:38 -0800379 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700380 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
381 CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
382 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
383 y_step_q4, avg, bd);
Angie Chiang1e403062016-02-19 19:31:38 -0800384 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700385 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
386 CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
387 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
388 y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800389
Angie Chiang1e403062016-02-19 19:31:38 -0800390 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700391 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
392 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
393 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
394 y_step_q4, avg, bd);
Angie Chiang1e403062016-02-19 19:31:38 -0800395 avg = 1;
Yaowu Xuf883b422016-08-30 14:01:10 -0700396 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
397 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
398 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
399 y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800400
Angie Chiang1e403062016-02-19 19:31:38 -0800401 EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
402 }
403 }
Angie Chiangd5349112016-02-04 16:13:18 -0800404}
Yaowu Xuf883b422016-08-30 14:01:10 -0700405#endif // CONFIG_AOM_HIGHBITDEPTH
Angie Chiang8878fa42016-02-22 14:11:05 -0800406
407#define CONVOLVE_SPEED_TEST 0
408#if CONVOLVE_SPEED_TEST
409#define highbd_convolve_speed(func, block_size, frame_size) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700410 TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) { \
Angie Chiang8878fa42016-02-22 14:11:05 -0800411 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
James Zern7b9407a2016-05-18 23:48:05 -0700412 InterpFilter interp_filter = EIGHTTAP; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800413 InterpFilterParams filter_params = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700414 av1_get_interp_filter_params(interp_filter); \
Yunqing Wanged070562016-08-30 09:13:16 -0700415 int filter_size = filter_params.tap; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800416 int filter_center = filter_size / 2 - 1; \
417 DECLARE_ALIGNED(16, uint16_t, \
clang-format3a826f12016-08-11 17:46:05 -0700418 src[(frame_size + 7) * (frame_size + 7)]) = { 0 }; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800419 int src_stride = frame_size + 7; \
clang-format3a826f12016-08-11 17:46:05 -0700420 DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 }; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800421 int dst_stride = frame_size; \
422 int x_step_q4 = 16; \
423 int y_step_q4 = 16; \
424 int subpel_x_q4 = 8; \
425 int subpel_y_q4 = 6; \
426 int bd = 10; \
427 \
428 int w = block_size; \
429 int h = block_size; \
430 \
clang-format3a826f12016-08-11 17:46:05 -0700431 const int16_t *filter_x = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700432 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
clang-format3a826f12016-08-11 17:46:05 -0700433 const int16_t *filter_y = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700434 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
Angie Chiang8878fa42016-02-22 14:11:05 -0800435 \
436 for (int i = 0; i < src_stride * src_stride; i++) { \
437 src[i] = rnd.Rand16() % (1 << bd); \
438 } \
439 \
440 int offset = filter_center * src_stride + filter_center; \
441 int row_offset = 0; \
442 int col_offset = 0; \
443 for (int i = 0; i < 100000; i++) { \
444 int src_total_offset = offset + col_offset * src_stride + row_offset; \
445 int dst_total_offset = col_offset * dst_stride + row_offset; \
446 func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride, \
447 CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
448 x_step_q4, filter_y, y_step_q4, w, h, bd); \
449 if (offset + w + w < frame_size) { \
450 row_offset += w; \
451 } else { \
452 row_offset = 0; \
453 col_offset += h; \
454 } \
455 if (col_offset + h >= frame_size) { \
456 col_offset = 0; \
457 } \
458 } \
459 }
460
461#define lowbd_convolve_speed(func, block_size, frame_size) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700462 TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) { \
Angie Chiang8878fa42016-02-22 14:11:05 -0800463 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
James Zern7b9407a2016-05-18 23:48:05 -0700464 InterpFilter interp_filter = EIGHTTAP; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800465 InterpFilterParams filter_params = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700466 av1_get_interp_filter_params(interp_filter); \
Yunqing Wanged070562016-08-30 09:13:16 -0700467 int filter_size = filter_params.tap; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800468 int filter_center = filter_size / 2 - 1; \
469 DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
470 int src_stride = frame_size + 7; \
471 DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]); \
472 int dst_stride = frame_size; \
473 int x_step_q4 = 16; \
474 int y_step_q4 = 16; \
475 int subpel_x_q4 = 8; \
476 int subpel_y_q4 = 6; \
477 int bd = 8; \
478 \
479 int w = block_size; \
480 int h = block_size; \
481 \
clang-format3a826f12016-08-11 17:46:05 -0700482 const int16_t *filter_x = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700483 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
clang-format3a826f12016-08-11 17:46:05 -0700484 const int16_t *filter_y = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700485 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
Angie Chiang8878fa42016-02-22 14:11:05 -0800486 \
487 for (int i = 0; i < src_stride * src_stride; i++) { \
488 src[i] = rnd.Rand16() % (1 << bd); \
489 } \
490 \
491 int offset = filter_center * src_stride + filter_center; \
492 int row_offset = 0; \
493 int col_offset = 0; \
494 for (int i = 0; i < 100000; i++) { \
495 func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4, \
496 filter_y, y_step_q4, w, h); \
497 if (offset + w + w < frame_size) { \
498 row_offset += w; \
499 } else { \
500 row_offset = 0; \
501 col_offset += h; \
502 } \
503 if (col_offset + h >= frame_size) { \
504 col_offset = 0; \
505 } \
506 } \
507 }
508
509// This experiment shows that when frame size is 64x64
Yaowu Xuf883b422016-08-30 14:01:10 -0700510// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
Angie Chiang8878fa42016-02-22 14:11:05 -0800511// However when frame size becomes 1024x1024
Yaowu Xuf883b422016-08-30 14:01:10 -0700512// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
Angie Chiang8878fa42016-02-22 14:11:05 -0800513// we think the bottleneck is from memory IO
Yaowu Xuf883b422016-08-30 14:01:10 -0700514highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
515highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
516highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
517highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);
Angie Chiang8878fa42016-02-22 14:11:05 -0800518
Yaowu Xuf883b422016-08-30 14:01:10 -0700519lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
520lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
521lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
522lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);
Angie Chiang8878fa42016-02-22 14:11:05 -0800523
Yaowu Xuf883b422016-08-30 14:01:10 -0700524highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
525highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
526highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
527highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);
Angie Chiang8878fa42016-02-22 14:11:05 -0800528
Yaowu Xuf883b422016-08-30 14:01:10 -0700529lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
530lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
531lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
532lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
Angie Chiang8878fa42016-02-22 14:11:05 -0800533#endif // CONVOLVE_SPEED_TEST
Angie Chiangd5349112016-02-04 16:13:18 -0800534} // namespace