blob: 58875f6f349c68b2e02164a25ceea8d9c245a5d0 [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <arm_neon.h>
#include <assert.h>
#include <stdlib.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
static const uint16_t error_measure_lut_diff[257] = {
339, 211, 181, 163, 151, 142, 136, 129, 125, 121, 117, 114, 111, 108, 107,
104, 102, 101, 99, 97, 96, 94, 93, 92, 91, 90, 88, 88, 87, 86,
85, 84, 84, 82, 82, 82, 80, 80, 79, 79, 78, 78, 77, 76, 76,
76, 74, 75, 74, 73, 74, 72, 73, 71, 72, 71, 71, 70, 70, 69,
70, 69, 68, 68, 68, 68, 67, 67, 67, 66, 67, 65, 66, 65, 65,
65, 65, 64, 64, 64, 63, 64, 63, 63, 62, 63, 62, 62, 62, 61,
62, 61, 61, 61, 60, 61, 60, 60, 60, 59, 60, 59, 59, 59, 59,
58, 59, 58, 58, 58, 58, 58, 57, 58, 57, 57, 57, 56, 57, 56,
57, 56, 56, 56, 56, 55, 56, 55, 55, 56, 55, 54, 55, 55, 54,
55, 54, 54, 54, 54, 54, 53, 54, 53, 54, 53, 53, 53, 53, 53,
52, 53, 52, 53, 52, 52, 52, 52, 52, 52, 52, 51, 52, 51, 51,
52, 51, 51, 51, 50, 51, 51, 50, 51, 50, 51, 50, 50, 50, 50,
50, 50, 49, 50, 50, 49, 50, 49, 49, 49, 49, 49, 49, 49, 49,
49, 48, 49, 48, 49, 48, 48, 49, 48, 48, 48, 48, 47, 48, 48,
48, 47, 48, 47, 47, 48, 47, 47, 47, 47, 47, 47, 47, 47, 47,
46, 47, 46, 47, 46, 47, 46, 46, 46, 47, 46, 46, 46, 45, 46,
46, 46, 45, 46, 46, 45, 45, 46, 45, 45, 46, 45, 45, 45, 45,
0
};
static const int hbd_12_error_measure_lut[257] = {
0, 5424, 8800, 11696, 14304, 16720, 18992, 21168, 23232,
25232, 27168, 29040, 30864, 32640, 34368, 36080, 37744, 39376,
40992, 42576, 44128, 45664, 47168, 48656, 50128, 51584, 53024,
54432, 55840, 57232, 58608, 59968, 61312, 62656, 63968, 65280,
66592, 67872, 69152, 70416, 71680, 72928, 74176, 75408, 76624,
77840, 79056, 80240, 81440, 82624, 83792, 84976, 86128, 87296,
88432, 89584, 90720, 91856, 92976, 94096, 95200, 96320, 97424,
98512, 99600, 100688, 101776, 102848, 103920, 104992, 106048, 107120,
108160, 109216, 110256, 111296, 112336, 113376, 114400, 115424, 116448,
117456, 118480, 119488, 120496, 121488, 122496, 123488, 124480, 125472,
126448, 127440, 128416, 129392, 130368, 131328, 132304, 133264, 134224,
135184, 136128, 137088, 138032, 138976, 139920, 140864, 141792, 142736,
143664, 144592, 145520, 146448, 147376, 148288, 149216, 150128, 151040,
151952, 152848, 153760, 154656, 155568, 156464, 157360, 158256, 159152,
160032, 160928, 161808, 162688, 163584, 164464, 165328, 166208, 167088,
167952, 168832, 169696, 170560, 171424, 172288, 173152, 174000, 174864,
175712, 176576, 177424, 178272, 179120, 179968, 180816, 181648, 182496,
183328, 184176, 185008, 185840, 186672, 187504, 188336, 189168, 190000,
190816, 191648, 192464, 193280, 194112, 194928, 195744, 196560, 197360,
198176, 198992, 199792, 200608, 201408, 202224, 203024, 203824, 204624,
205424, 206224, 207024, 207808, 208608, 209408, 210192, 210992, 211776,
212560, 213344, 214128, 214912, 215696, 216480, 217264, 218048, 218816,
219600, 220368, 221152, 221920, 222688, 223472, 224240, 225008, 225776,
226544, 227296, 228064, 228832, 229600, 230352, 231120, 231872, 232624,
233392, 234144, 234896, 235648, 236400, 237152, 237904, 238656, 239408,
240160, 240896, 241648, 242384, 243136, 243872, 244624, 245360, 246096,
246832, 247584, 248320, 249056, 249792, 250512, 251248, 251984, 252720,
253440, 254176, 254912, 255632, 256352, 257088, 257808, 258528, 259264,
259984, 260704, 261424, 262144, 262144,
};
static const int hbd_10_error_measure_lut[257] = {
0, 1356, 2200, 2924, 3576, 4180, 4748, 5292, 5808, 6308, 6792,
7260, 7716, 8160, 8592, 9020, 9436, 9844, 10248, 10644, 11032, 11416,
11792, 12164, 12532, 12896, 13256, 13608, 13960, 14308, 14652, 14992, 15328,
15664, 15992, 16320, 16648, 16968, 17288, 17604, 17920, 18232, 18544, 18852,
19156, 19460, 19764, 20060, 20360, 20656, 20948, 21244, 21532, 21824, 22108,
22396, 22680, 22964, 23244, 23524, 23800, 24080, 24356, 24628, 24900, 25172,
25444, 25712, 25980, 26248, 26512, 26780, 27040, 27304, 27564, 27824, 28084,
28344, 28600, 28856, 29112, 29364, 29620, 29872, 30124, 30372, 30624, 30872,
31120, 31368, 31612, 31860, 32104, 32348, 32592, 32832, 33076, 33316, 33556,
33796, 34032, 34272, 34508, 34744, 34980, 35216, 35448, 35684, 35916, 36148,
36380, 36612, 36844, 37072, 37304, 37532, 37760, 37988, 38212, 38440, 38664,
38892, 39116, 39340, 39564, 39788, 40008, 40232, 40452, 40672, 40896, 41116,
41332, 41552, 41772, 41988, 42208, 42424, 42640, 42856, 43072, 43288, 43500,
43716, 43928, 44144, 44356, 44568, 44780, 44992, 45204, 45412, 45624, 45832,
46044, 46252, 46460, 46668, 46876, 47084, 47292, 47500, 47704, 47912, 48116,
48320, 48528, 48732, 48936, 49140, 49340, 49544, 49748, 49948, 50152, 50352,
50556, 50756, 50956, 51156, 51356, 51556, 51756, 51952, 52152, 52352, 52548,
52748, 52944, 53140, 53336, 53532, 53728, 53924, 54120, 54316, 54512, 54704,
54900, 55092, 55288, 55480, 55672, 55868, 56060, 56252, 56444, 56636, 56824,
57016, 57208, 57400, 57588, 57780, 57968, 58156, 58348, 58536, 58724, 58912,
59100, 59288, 59476, 59664, 59852, 60040, 60224, 60412, 60596, 60784, 60968,
61156, 61340, 61524, 61708, 61896, 62080, 62264, 62448, 62628, 62812, 62996,
63180, 63360, 63544, 63728, 63908, 64088, 64272, 64452, 64632, 64816, 64996,
65176, 65356, 65536, 65536,
};
static const int hbd_8_error_measure_lut[257] = {
0, 339, 550, 731, 894, 1045, 1187, 1323, 1452, 1577, 1698,
1815, 1929, 2040, 2148, 2255, 2359, 2461, 2562, 2661, 2758, 2854,
2948, 3041, 3133, 3224, 3314, 3402, 3490, 3577, 3663, 3748, 3832,
3916, 3998, 4080, 4162, 4242, 4322, 4401, 4480, 4558, 4636, 4713,
4789, 4865, 4941, 5015, 5090, 5164, 5237, 5311, 5383, 5456, 5527,
5599, 5670, 5741, 5811, 5881, 5950, 6020, 6089, 6157, 6225, 6293,
6361, 6428, 6495, 6562, 6628, 6695, 6760, 6826, 6891, 6956, 7021,
7086, 7150, 7214, 7278, 7341, 7405, 7468, 7531, 7593, 7656, 7718,
7780, 7842, 7903, 7965, 8026, 8087, 8148, 8208, 8269, 8329, 8389,
8449, 8508, 8568, 8627, 8686, 8745, 8804, 8862, 8921, 8979, 9037,
9095, 9153, 9211, 9268, 9326, 9383, 9440, 9497, 9553, 9610, 9666,
9723, 9779, 9835, 9891, 9947, 10002, 10058, 10113, 10168, 10224, 10279,
10333, 10388, 10443, 10497, 10552, 10606, 10660, 10714, 10768, 10822, 10875,
10929, 10982, 11036, 11089, 11142, 11195, 11248, 11301, 11353, 11406, 11458,
11511, 11563, 11615, 11667, 11719, 11771, 11823, 11875, 11926, 11978, 12029,
12080, 12132, 12183, 12234, 12285, 12335, 12386, 12437, 12487, 12538, 12588,
12639, 12689, 12739, 12789, 12839, 12889, 12939, 12988, 13038, 13088, 13137,
13187, 13236, 13285, 13334, 13383, 13432, 13481, 13530, 13579, 13628, 13676,
13725, 13773, 13822, 13870, 13918, 13967, 14015, 14063, 14111, 14159, 14206,
14254, 14302, 14350, 14397, 14445, 14492, 14539, 14587, 14634, 14681, 14728,
14775, 14822, 14869, 14916, 14963, 15010, 15056, 15103, 15149, 15196, 15242,
15289, 15335, 15381, 15427, 15474, 15520, 15566, 15612, 15657, 15703, 15749,
15795, 15840, 15886, 15932, 15977, 16022, 16068, 16113, 16158, 16204, 16249,
16294, 16339, 16384, 16384,
};
// Split error into two parts and do an interpolated table lookup.
// To compute the table index and interpolation value, we want to calculate
// the quotient and remainder of (dst - ref) / 2^(bd - 8).
#define HBD_CALC_FRAME_ERROR(bd, offset, mask) \
static INLINE int highbd_##bd##_error_measure(int q, int r) { \
return (hbd_##bd##_error_measure_lut[q]) + \
(error_measure_lut_diff[q]) * r; \
} \
\
int64_t av1_calc_highbd_##bd##_frame_error_neon( \
const uint16_t *const ref, int ref_stride, const uint16_t *const dst, \
int dst_stride, int width, int height) { \
int64_t sum_error[4] = { 0, 0, 0, 0 }; \
int r = 0; \
int d = 0; \
\
do { \
int w = width; \
int rr = r; \
int dd = d; \
\
do { \
uint16x8_t dst_v = vld1q_u16(&dst[dd]); \
uint16x8_t ref_v = vld1q_u16(&ref[rr]); \
\
uint64x2_t abs_v = vreinterpretq_u64_u16(vabdq_u16(dst_v, ref_v)); \
\
uint64_t abs0 = vgetq_lane_u64(abs_v, 0); \
uint64_t abs1 = vgetq_lane_u64(abs_v, 1); \
\
sum_error[0] += highbd_##bd##_error_measure( \
(abs0 >> (0 + offset)) & 0xFF, (abs0 >> 0) & mask); \
sum_error[1] += highbd_##bd##_error_measure( \
(abs0 >> (16 + offset)) & 0xFF, (abs0 >> 16) & mask); \
sum_error[2] += highbd_##bd##_error_measure( \
(abs0 >> (32 + offset)) & 0xFF, (abs0 >> 32) & mask); \
sum_error[3] += highbd_##bd##_error_measure( \
(abs0 >> (48 + offset)) & 0xFF, (abs0 >> 48) & mask); \
\
sum_error[0] += highbd_##bd##_error_measure( \
(abs1 >> (0 + offset)) & 0xFF, (abs1 >> 0) & mask); \
sum_error[1] += highbd_##bd##_error_measure( \
(abs1 >> (16 + offset)) & 0xFF, (abs1 >> 16) & mask); \
sum_error[2] += highbd_##bd##_error_measure( \
(abs1 >> (32 + offset)) & 0xFF, (abs1 >> 32) & mask); \
sum_error[3] += highbd_##bd##_error_measure( \
(abs1 >> (48 + offset)) & 0xFF, (abs1 >> 48) & mask); \
\
dd += 8; \
rr += 8; \
w -= 8; \
} while (w >= 8); \
\
while (w-- != 0) { \
uint16_t abs_u16 = abs(dst[dd] - ref[rr]); \
sum_error[0] += \
highbd_##bd##_error_measure(abs_u16 >> offset, abs_u16 & mask); \
dd++; \
rr++; \
} \
\
r += ref_stride; \
d += dst_stride; \
} while (--height != 0); \
\
return sum_error[0] + sum_error[1] + sum_error[2] + sum_error[3]; \
}
// 12 bitdepth
HBD_CALC_FRAME_ERROR(12, 4, 0xF)
// 10 bitdepth
HBD_CALC_FRAME_ERROR(10, 2, 0x3)
// 8 bitdepth
HBD_CALC_FRAME_ERROR(8, 0, 0x0)
int64_t av1_calc_highbd_frame_error_neon(const uint16_t *const ref,
int ref_stride,
const uint16_t *const dst,
int dst_stride, int width, int height,
int bd) {
switch (bd) {
case 8:
default:
return av1_calc_highbd_8_frame_error_neon(ref, ref_stride, dst,
dst_stride, width, height);
case 10:
return av1_calc_highbd_10_frame_error_neon(ref, ref_stride, dst,
dst_stride, width, height);
case 12:
return av1_calc_highbd_12_frame_error_neon(ref, ref_stride, dst,
dst_stride, width, height);
}
}