blob: 0324b8e9ce33665142096b3c545a785c056680f1 [file] [log] [blame]
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
Angie Chiangd5349112016-02-04 16:13:18 -080012#include "third_party/googletest/src/include/gtest/gtest.h"
13
Yaowu Xuf883b422016-08-30 14:01:10 -070014#include "./av1_rtcd.h"
15#include "./aom_dsp_rtcd.h"
Angie Chiangd5349112016-02-04 16:13:18 -080016#include "test/acm_random.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017#include "av1/common/filter.h"
Yaowu Xu6557ea92016-10-31 16:33:36 -070018#include "av1/common/convolve.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070020#include "aom_ports/mem.h"
Angie Chiangd5349112016-02-04 16:13:18 -080021
Yaowu Xuc27fc142016-08-22 16:08:15 -070022using libaom_test::ACMRandom;
Angie Chiangd5349112016-02-04 16:13:18 -080023
24namespace {
Yi Luo81ad9532016-06-21 12:17:39 -070025void setup_convolve() {
Yaowu Xu3bd709f2016-07-25 15:39:46 -070026#if HAVE_SSSE3 && CONFIG_RUNTIME_CPU_DETECT
Yaowu Xuf883b422016-08-30 14:01:10 -070027 av1_convolve_horiz = av1_convolve_horiz_c;
28 av1_convolve_vert = av1_convolve_vert_c;
Yi Luo81ad9532016-06-21 12:17:39 -070029#endif
30}
31
Yaowu Xuf883b422016-08-30 14:01:10 -070032TEST(AV1ConvolveTest, av1_convolve8) {
Angie Chiangd5349112016-02-04 16:13:18 -080033 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -070034#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -070035 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
36 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -070037 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070038 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -070039#else
James Zern7b9407a2016-05-18 23:48:05 -070040 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -080041 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070042 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -070043#endif
Yunqing Wanged070562016-08-30 09:13:16 -070044 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -080045 int filter_center = filter_size / 2 - 1;
46 uint8_t src[12 * 12];
47 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -070048 uint8_t dst[1] = { 0 };
49 uint8_t dst1[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -080050 int dst_stride = 1;
51 int x_step_q4 = 16;
52 int y_step_q4 = 16;
53 int subpel_x_q4 = 3;
54 int subpel_y_q4 = 2;
55 int avg = 0;
56
57 int w = 1;
58 int h = 1;
59
Yi Luo81ad9532016-06-21 12:17:39 -070060 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -070061
Angie Chiangd5349112016-02-04 16:13:18 -080062 for (int i = 0; i < filter_size * filter_size; i++) {
63 src[i] = rnd.Rand16() % (1 << 8);
64 }
65
Yaowu Xuf883b422016-08-30 14:01:10 -070066 av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
67 dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
68 subpel_y_q4, y_step_q4, avg);
Angie Chiangd5349112016-02-04 16:13:18 -080069
clang-format3a826f12016-08-11 17:46:05 -070070 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -070071 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -070072 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -070073 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -080074
Yaowu Xuf883b422016-08-30 14:01:10 -070075 aom_convolve8_c(src + src_stride * filter_center + filter_center, src_stride,
Angie Chiangd5349112016-02-04 16:13:18 -080076 dst1, dst_stride, x_filter, 16, y_filter, 16, w, h);
77 EXPECT_EQ(dst[0], dst1[0]);
78}
Yaowu Xuf883b422016-08-30 14:01:10 -070079TEST(AV1ConvolveTest, av1_convolve) {
Angie Chiangd5349112016-02-04 16:13:18 -080080 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -070081#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -070082 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
83 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -070084 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070085 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -070086#else
James Zern7b9407a2016-05-18 23:48:05 -070087 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -080088 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -070089 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -070090#endif
Yunqing Wanged070562016-08-30 09:13:16 -070091 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -080092 int filter_center = filter_size / 2 - 1;
93 uint8_t src[12 * 12];
94 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -070095 uint8_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -080096 int dst_stride = 1;
97 int x_step_q4 = 16;
98 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -080099 int avg = 0;
Angie Chiangd5349112016-02-04 16:13:18 -0800100 int w = 1;
101 int h = 1;
102
Angie Chiang1e403062016-02-19 19:31:38 -0800103 int subpel_x_q4;
104 int subpel_y_q4;
105
Yi Luo81ad9532016-06-21 12:17:39 -0700106 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -0700107
Angie Chiangd5349112016-02-04 16:13:18 -0800108 for (int i = 0; i < filter_size * filter_size; i++) {
109 src[i] = rnd.Rand16() % (1 << 8);
110 }
111
Angie Chiang1e403062016-02-19 19:31:38 -0800112 for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
113 for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700114 av1_convolve(src + src_stride * filter_center + filter_center, src_stride,
115 dst, dst_stride, w, h, interp_filter, subpel_x_q4, x_step_q4,
116 subpel_y_q4, y_step_q4, avg);
Angie Chiangd5349112016-02-04 16:13:18 -0800117
clang-format3a826f12016-08-11 17:46:05 -0700118 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700119 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -0700120 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700121 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -0800122
Angie Chiang1e403062016-02-19 19:31:38 -0800123 int temp[12];
124 int dst_ref = 0;
125 for (int r = 0; r < filter_size; r++) {
126 temp[r] = 0;
127 for (int c = 0; c < filter_size; c++) {
128 temp[r] += x_filter[c] * src[r * filter_size + c];
129 }
130 temp[r] = clip_pixel(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS));
131 dst_ref += temp[r] * y_filter[r];
132 }
133 dst_ref = clip_pixel(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS));
134 EXPECT_EQ(dst[0], dst_ref);
Angie Chiangd5349112016-02-04 16:13:18 -0800135 }
Angie Chiangd5349112016-02-04 16:13:18 -0800136 }
Angie Chiangd5349112016-02-04 16:13:18 -0800137}
138
Yaowu Xuf883b422016-08-30 14:01:10 -0700139TEST(AV1ConvolveTest, av1_convolve_avg) {
Angie Chiangd5349112016-02-04 16:13:18 -0800140 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Hanbd333262016-05-02 10:52:05 -0700141#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700142 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
143 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Hanbd333262016-05-02 10:52:05 -0700144 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700145 av1_get_interp_filter_params(interp_filter[0]);
Jingning Hanbd333262016-05-02 10:52:05 -0700146#else
James Zern7b9407a2016-05-18 23:48:05 -0700147 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800148 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700149 av1_get_interp_filter_params(interp_filter);
Jingning Hanbd333262016-05-02 10:52:05 -0700150#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700151 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800152 int filter_center = filter_size / 2 - 1;
153 uint8_t src0[12 * 12];
154 uint8_t src1[12 * 12];
155 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700156 uint8_t dst0[1] = { 0 };
157 uint8_t dst1[1] = { 0 };
158 uint8_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800159 int dst_stride = 1;
160 int x_step_q4 = 16;
161 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800162 int avg = 0;
163
164 int w = 1;
165 int h = 1;
166
Angie Chiang1e403062016-02-19 19:31:38 -0800167 int subpel_x_q4;
168 int subpel_y_q4;
169
Yi Luo81ad9532016-06-21 12:17:39 -0700170 setup_convolve();
Yi Luo229690a2016-06-13 17:01:17 -0700171
Angie Chiangd5349112016-02-04 16:13:18 -0800172 for (int i = 0; i < filter_size * filter_size; i++) {
173 src0[i] = rnd.Rand16() % (1 << 8);
174 src1[i] = rnd.Rand16() % (1 << 8);
175 }
176
177 int offset = filter_size * filter_center + filter_center;
178
Angie Chiang1e403062016-02-19 19:31:38 -0800179 for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
180 for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
181 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700182 av1_convolve(src0 + offset, src_stride, dst0, dst_stride, w, h,
183 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
184 y_step_q4, avg);
Angie Chiang1e403062016-02-19 19:31:38 -0800185 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700186 av1_convolve(src1 + offset, src_stride, dst1, dst_stride, w, h,
187 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
188 y_step_q4, avg);
Angie Chiangd5349112016-02-04 16:13:18 -0800189
Angie Chiang1e403062016-02-19 19:31:38 -0800190 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700191 av1_convolve(src0 + offset, src_stride, dst, dst_stride, w, h,
192 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
193 y_step_q4, avg);
Angie Chiang1e403062016-02-19 19:31:38 -0800194 avg = 1;
Yaowu Xuf883b422016-08-30 14:01:10 -0700195 av1_convolve(src1 + offset, src_stride, dst, dst_stride, w, h,
196 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
197 y_step_q4, avg);
Angie Chiangd5349112016-02-04 16:13:18 -0800198
Angie Chiang1e403062016-02-19 19:31:38 -0800199 EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
200 }
201 }
Angie Chiangd5349112016-02-04 16:13:18 -0800202}
203
Yaowu Xuf883b422016-08-30 14:01:10 -0700204#if CONFIG_AOM_HIGHBITDEPTH
205TEST(AV1ConvolveTest, av1_highbd_convolve) {
Angie Chiangd5349112016-02-04 16:13:18 -0800206 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Han9de916e2016-05-08 10:36:51 -0700207#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700208 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
209 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Han9de916e2016-05-08 10:36:51 -0700210 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700211 av1_get_interp_filter_params(interp_filter[0]);
Jingning Han9de916e2016-05-08 10:36:51 -0700212#else
James Zern7b9407a2016-05-18 23:48:05 -0700213 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800214 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700215 av1_get_interp_filter_params(interp_filter);
Jingning Han9de916e2016-05-08 10:36:51 -0700216#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700217 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800218 int filter_center = filter_size / 2 - 1;
219 uint16_t src[12 * 12];
220 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700221 uint16_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800222 int dst_stride = 1;
223 int x_step_q4 = 16;
224 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800225 int avg = 0;
226 int bd = 10;
Angie Chiangd5349112016-02-04 16:13:18 -0800227 int w = 1;
228 int h = 1;
229
Angie Chiang1e403062016-02-19 19:31:38 -0800230 int subpel_x_q4;
231 int subpel_y_q4;
232
Angie Chiangd5349112016-02-04 16:13:18 -0800233 for (int i = 0; i < filter_size * filter_size; i++) {
234 src[i] = rnd.Rand16() % (1 << bd);
235 }
236
Angie Chiang1e403062016-02-19 19:31:38 -0800237 for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
238 for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
Yaowu Xuf883b422016-08-30 14:01:10 -0700239 av1_highbd_convolve(
Angie Chiang1e403062016-02-19 19:31:38 -0800240 CONVERT_TO_BYTEPTR(src + src_stride * filter_center + filter_center),
Jingning Han9de916e2016-05-08 10:36:51 -0700241 src_stride, CONVERT_TO_BYTEPTR(dst), dst_stride, w, h, interp_filter,
Angie Chiang1e403062016-02-19 19:31:38 -0800242 subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800243
clang-format3a826f12016-08-11 17:46:05 -0700244 const int16_t *x_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700245 av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
clang-format3a826f12016-08-11 17:46:05 -0700246 const int16_t *y_filter =
Yaowu Xuf883b422016-08-30 14:01:10 -0700247 av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
Angie Chiangd5349112016-02-04 16:13:18 -0800248
Angie Chiang1e403062016-02-19 19:31:38 -0800249 int temp[12];
250 int dst_ref = 0;
251 for (int r = 0; r < filter_size; r++) {
252 temp[r] = 0;
253 for (int c = 0; c < filter_size; c++) {
254 temp[r] += x_filter[c] * src[r * filter_size + c];
255 }
256 temp[r] =
257 clip_pixel_highbd(ROUND_POWER_OF_TWO(temp[r], FILTER_BITS), bd);
258 dst_ref += temp[r] * y_filter[r];
259 }
260 dst_ref = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst_ref, FILTER_BITS), bd);
261 EXPECT_EQ(dst[0], dst_ref);
Angie Chiangd5349112016-02-04 16:13:18 -0800262 }
Angie Chiangd5349112016-02-04 16:13:18 -0800263 }
Angie Chiangd5349112016-02-04 16:13:18 -0800264}
265
Yaowu Xuf883b422016-08-30 14:01:10 -0700266TEST(AV1ConvolveTest, av1_highbd_convolve_avg) {
Angie Chiangd5349112016-02-04 16:13:18 -0800267 ACMRandom rnd(ACMRandom::DeterministicSeed());
Jingning Han9de916e2016-05-08 10:36:51 -0700268#if CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -0700269 InterpFilter interp_filter[4] = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR,
270 EIGHTTAP_REGULAR, EIGHTTAP_REGULAR };
Jingning Han9de916e2016-05-08 10:36:51 -0700271 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700272 av1_get_interp_filter_params(interp_filter[0]);
Jingning Han9de916e2016-05-08 10:36:51 -0700273#else
James Zern7b9407a2016-05-18 23:48:05 -0700274 InterpFilter interp_filter = EIGHTTAP_REGULAR;
Angie Chiangd5349112016-02-04 16:13:18 -0800275 InterpFilterParams filter_params =
Yaowu Xuf883b422016-08-30 14:01:10 -0700276 av1_get_interp_filter_params(interp_filter);
Jingning Han9de916e2016-05-08 10:36:51 -0700277#endif
Yunqing Wanged070562016-08-30 09:13:16 -0700278 int filter_size = filter_params.taps;
Angie Chiangd5349112016-02-04 16:13:18 -0800279 int filter_center = filter_size / 2 - 1;
280 uint16_t src0[12 * 12];
281 uint16_t src1[12 * 12];
282 int src_stride = filter_size;
clang-format3a826f12016-08-11 17:46:05 -0700283 uint16_t dst0[1] = { 0 };
284 uint16_t dst1[1] = { 0 };
285 uint16_t dst[1] = { 0 };
Angie Chiangd5349112016-02-04 16:13:18 -0800286 int dst_stride = 1;
287 int x_step_q4 = 16;
288 int y_step_q4 = 16;
Angie Chiangd5349112016-02-04 16:13:18 -0800289 int avg = 0;
290 int bd = 10;
291
292 int w = 1;
293 int h = 1;
294
Angie Chiang1e403062016-02-19 19:31:38 -0800295 int subpel_x_q4;
296 int subpel_y_q4;
297
Angie Chiangd5349112016-02-04 16:13:18 -0800298 for (int i = 0; i < filter_size * filter_size; i++) {
299 src0[i] = rnd.Rand16() % (1 << bd);
300 src1[i] = rnd.Rand16() % (1 << bd);
301 }
302
Angie Chiang1e403062016-02-19 19:31:38 -0800303 for (subpel_x_q4 = 0; subpel_x_q4 < 16; subpel_x_q4++) {
304 for (subpel_y_q4 = 0; subpel_y_q4 < 16; subpel_y_q4++) {
305 int offset = filter_size * filter_center + filter_center;
Angie Chiangd5349112016-02-04 16:13:18 -0800306
Angie Chiang1e403062016-02-19 19:31:38 -0800307 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700308 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
309 CONVERT_TO_BYTEPTR(dst0), dst_stride, w, h,
310 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
311 y_step_q4, avg, bd);
Angie Chiang1e403062016-02-19 19:31:38 -0800312 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700313 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
314 CONVERT_TO_BYTEPTR(dst1), dst_stride, w, h,
315 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
316 y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800317
Angie Chiang1e403062016-02-19 19:31:38 -0800318 avg = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -0700319 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src0 + offset), src_stride,
320 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
321 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
322 y_step_q4, avg, bd);
Angie Chiang1e403062016-02-19 19:31:38 -0800323 avg = 1;
Yaowu Xuf883b422016-08-30 14:01:10 -0700324 av1_highbd_convolve(CONVERT_TO_BYTEPTR(src1 + offset), src_stride,
325 CONVERT_TO_BYTEPTR(dst), dst_stride, w, h,
326 interp_filter, subpel_x_q4, x_step_q4, subpel_y_q4,
327 y_step_q4, avg, bd);
Angie Chiangd5349112016-02-04 16:13:18 -0800328
Angie Chiang1e403062016-02-19 19:31:38 -0800329 EXPECT_EQ(dst[0], ROUND_POWER_OF_TWO(dst0[0] + dst1[0], 1));
330 }
331 }
Angie Chiangd5349112016-02-04 16:13:18 -0800332}
Yaowu Xuf883b422016-08-30 14:01:10 -0700333#endif // CONFIG_AOM_HIGHBITDEPTH
Angie Chiang8878fa42016-02-22 14:11:05 -0800334
335#define CONVOLVE_SPEED_TEST 0
336#if CONVOLVE_SPEED_TEST
337#define highbd_convolve_speed(func, block_size, frame_size) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700338 TEST(AV1ConvolveTest, func##_speed_##block_size##_##frame_size) { \
Angie Chiang8878fa42016-02-22 14:11:05 -0800339 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
James Zern7b9407a2016-05-18 23:48:05 -0700340 InterpFilter interp_filter = EIGHTTAP; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800341 InterpFilterParams filter_params = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700342 av1_get_interp_filter_params(interp_filter); \
Yunqing Wanged070562016-08-30 09:13:16 -0700343 int filter_size = filter_params.tap; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800344 int filter_center = filter_size / 2 - 1; \
345 DECLARE_ALIGNED(16, uint16_t, \
clang-format3a826f12016-08-11 17:46:05 -0700346 src[(frame_size + 7) * (frame_size + 7)]) = { 0 }; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800347 int src_stride = frame_size + 7; \
clang-format3a826f12016-08-11 17:46:05 -0700348 DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = { 0 }; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800349 int dst_stride = frame_size; \
350 int x_step_q4 = 16; \
351 int y_step_q4 = 16; \
352 int subpel_x_q4 = 8; \
353 int subpel_y_q4 = 6; \
354 int bd = 10; \
355 \
356 int w = block_size; \
357 int h = block_size; \
358 \
clang-format3a826f12016-08-11 17:46:05 -0700359 const int16_t *filter_x = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700360 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
clang-format3a826f12016-08-11 17:46:05 -0700361 const int16_t *filter_y = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700362 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
Angie Chiang8878fa42016-02-22 14:11:05 -0800363 \
364 for (int i = 0; i < src_stride * src_stride; i++) { \
365 src[i] = rnd.Rand16() % (1 << bd); \
366 } \
367 \
368 int offset = filter_center * src_stride + filter_center; \
369 int row_offset = 0; \
370 int col_offset = 0; \
371 for (int i = 0; i < 100000; i++) { \
372 int src_total_offset = offset + col_offset * src_stride + row_offset; \
373 int dst_total_offset = col_offset * dst_stride + row_offset; \
374 func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride, \
375 CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
376 x_step_q4, filter_y, y_step_q4, w, h, bd); \
377 if (offset + w + w < frame_size) { \
378 row_offset += w; \
379 } else { \
380 row_offset = 0; \
381 col_offset += h; \
382 } \
383 if (col_offset + h >= frame_size) { \
384 col_offset = 0; \
385 } \
386 } \
387 }
388
389#define lowbd_convolve_speed(func, block_size, frame_size) \
Yaowu Xuf883b422016-08-30 14:01:10 -0700390 TEST(AV1ConvolveTest, func##_speed_l_##block_size##_##frame_size) { \
Angie Chiang8878fa42016-02-22 14:11:05 -0800391 ACMRandom rnd(ACMRandom::DeterministicSeed()); \
James Zern7b9407a2016-05-18 23:48:05 -0700392 InterpFilter interp_filter = EIGHTTAP; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800393 InterpFilterParams filter_params = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700394 av1_get_interp_filter_params(interp_filter); \
Yunqing Wanged070562016-08-30 09:13:16 -0700395 int filter_size = filter_params.tap; \
Angie Chiang8878fa42016-02-22 14:11:05 -0800396 int filter_center = filter_size / 2 - 1; \
397 DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
398 int src_stride = frame_size + 7; \
399 DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]); \
400 int dst_stride = frame_size; \
401 int x_step_q4 = 16; \
402 int y_step_q4 = 16; \
403 int subpel_x_q4 = 8; \
404 int subpel_y_q4 = 6; \
405 int bd = 8; \
406 \
407 int w = block_size; \
408 int h = block_size; \
409 \
clang-format3a826f12016-08-11 17:46:05 -0700410 const int16_t *filter_x = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700411 av1_get_interp_filter_kernel(filter_params, subpel_x_q4); \
clang-format3a826f12016-08-11 17:46:05 -0700412 const int16_t *filter_y = \
Yaowu Xuf883b422016-08-30 14:01:10 -0700413 av1_get_interp_filter_kernel(filter_params, subpel_y_q4); \
Angie Chiang8878fa42016-02-22 14:11:05 -0800414 \
415 for (int i = 0; i < src_stride * src_stride; i++) { \
416 src[i] = rnd.Rand16() % (1 << bd); \
417 } \
418 \
419 int offset = filter_center * src_stride + filter_center; \
420 int row_offset = 0; \
421 int col_offset = 0; \
422 for (int i = 0; i < 100000; i++) { \
423 func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4, \
424 filter_y, y_step_q4, w, h); \
425 if (offset + w + w < frame_size) { \
426 row_offset += w; \
427 } else { \
428 row_offset = 0; \
429 col_offset += h; \
430 } \
431 if (col_offset + h >= frame_size) { \
432 col_offset = 0; \
433 } \
434 } \
435 }
436
437// This experiment shows that when frame size is 64x64
Yaowu Xuf883b422016-08-30 14:01:10 -0700438// aom_highbd_convolve8_sse2 and aom_convolve8_sse2's speed are similar.
Angie Chiang8878fa42016-02-22 14:11:05 -0800439// However when frame size becomes 1024x1024
Yaowu Xuf883b422016-08-30 14:01:10 -0700440// aom_highbd_convolve8_sse2 is around 50% slower than aom_convolve8_sse2
Angie Chiang8878fa42016-02-22 14:11:05 -0800441// we think the bottleneck is from memory IO
Yaowu Xuf883b422016-08-30 14:01:10 -0700442highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 64);
443highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 64);
444highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 64);
445highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 64);
Angie Chiang8878fa42016-02-22 14:11:05 -0800446
Yaowu Xuf883b422016-08-30 14:01:10 -0700447lowbd_convolve_speed(aom_convolve8_sse2, 8, 64);
448lowbd_convolve_speed(aom_convolve8_sse2, 16, 64);
449lowbd_convolve_speed(aom_convolve8_sse2, 32, 64);
450lowbd_convolve_speed(aom_convolve8_sse2, 64, 64);
Angie Chiang8878fa42016-02-22 14:11:05 -0800451
Yaowu Xuf883b422016-08-30 14:01:10 -0700452highbd_convolve_speed(aom_highbd_convolve8_sse2, 8, 1024);
453highbd_convolve_speed(aom_highbd_convolve8_sse2, 16, 1024);
454highbd_convolve_speed(aom_highbd_convolve8_sse2, 32, 1024);
455highbd_convolve_speed(aom_highbd_convolve8_sse2, 64, 1024);
Angie Chiang8878fa42016-02-22 14:11:05 -0800456
Yaowu Xuf883b422016-08-30 14:01:10 -0700457lowbd_convolve_speed(aom_convolve8_sse2, 8, 1024);
458lowbd_convolve_speed(aom_convolve8_sse2, 16, 1024);
459lowbd_convolve_speed(aom_convolve8_sse2, 32, 1024);
460lowbd_convolve_speed(aom_convolve8_sse2, 64, 1024);
Angie Chiang8878fa42016-02-22 14:11:05 -0800461#endif // CONVOLVE_SPEED_TEST
Angie Chiangd5349112016-02-04 16:13:18 -0800462} // namespace