Blame - vpx_dsp/sad.c - avm

blob: c500206986a85c507443f0d1b6ecdf73bf3af390 [file] [log] [blame]

Johann	d5d9289	2015-04-17 16:11:38 -0400	[diff] [blame]	1	/*
				2	* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#include <stdlib.h>
				12
				13	#include "./vpx_config.h"
				14	#include "./vpx_dsp_rtcd.h"
				15
				16	#include "vpx/vpx_integer.h"
Johann	1d7ccd5	2015-05-11 19:09:22 -0700	[diff] [blame]	17	#include "vpx_ports/mem.h"
Johann	d5d9289	2015-04-17 16:11:38 -0400	[diff] [blame]	18
				19	/* Sum the difference between every corresponding element of the buffers. */
				20	static INLINE unsigned int sad(const uint8_t *a, int a_stride,
				21	const uint8_t *b, int b_stride,
				22	int width, int height) {
				23	int y, x;
				24	unsigned int sad = 0;
				25
				26	for (y = 0; y < height; y++) {
				27	for (x = 0; x < width; x++)
				28	sad += abs(a[x] - b[x]);
				29
				30	a += a_stride;
				31	b += b_stride;
				32	}
				33	return sad;
				34	}
				35
Johann	c3bdffb	2015-05-15 11:52:03 -0700	[diff] [blame]	36	// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up.
Johann	d5d9289	2015-04-17 16:11:38 -0400	[diff] [blame]	37	/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
				38	* The function averages every corresponding element of the buffers and stores
				39	* the value in a third buffer, comp_pred.
				40	* pred and comp_pred are assumed to have stride = width
				41	* In the usage below comp_pred is a local array.
				42	*/
				43	static INLINE void avg_pred(uint8_t comp_pred, const uint8_t pred, int width,
				44	int height, const uint8_t *ref, int ref_stride) {
				45	int i, j;
				46
				47	for (i = 0; i < height; i++) {
				48	for (j = 0; j < width; j++) {
				49	const int tmp = pred[j] + ref[j];
				50	comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
				51	}
				52	comp_pred += width;
				53	pred += width;
				54	ref += ref_stride;
				55	}
				56	}
				57
				58	#if CONFIG_VP9_HIGHBITDEPTH
				59	static INLINE void highbd_avg_pred(uint16_t comp_pred, const uint8_t pred8,
				60	int width, int height, const uint8_t *ref8,
				61	int ref_stride) {
				62	int i, j;
				63	uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
				64	uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
				65	for (i = 0; i < height; i++) {
				66	for (j = 0; j < width; j++) {
				67	const int tmp = pred[j] + ref[j];
				68	comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
				69	}
				70	comp_pred += width;
				71	pred += width;
				72	ref += ref_stride;
				73	}
				74	}
				75	#endif // CONFIG_VP9_HIGHBITDEPTH
				76
				77	#define sadMxN(m, n) \
				78	unsigned int vpx_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
				79	const uint8_t *ref, int ref_stride) { \
				80	return sad(src, src_stride, ref, ref_stride, m, n); \
				81	} \
				82	unsigned int vpx_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
				83	const uint8_t *ref, int ref_stride, \
				84	const uint8_t *second_pred) { \
				85	uint8_t comp_pred[m * n]; \
				86	avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
				87	return sad(src, src_stride, comp_pred, m, m, n); \
				88	}
				89
				90	// depending on call sites, pass **ref_array to avoid & in subsequent call and
				91	// de-dup with 4D below.
				92	#define sadMxNxK(m, n, k) \
				93	void vpx_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
				94	const uint8_t *ref_array, int ref_stride, \
				95	uint32_t *sad_array) { \
				96	int i; \
				97	for (i = 0; i < k; ++i) \
				98	sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \
				99	}
				100
				101	// This appears to be equivalent to the above when k == 4 and refs is const
				102	#define sadMxNx4D(m, n) \
				103	void vpx_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
				104	const uint8_t *const ref_array[], int ref_stride, \
				105	uint32_t *sad_array) { \
				106	int i; \
				107	for (i = 0; i < 4; ++i) \
				108	sad_array[i] = vpx_sad##m##x##n##_c(src, src_stride, ref_array[i], ref_stride); \
				109	}
				110
Geza Lore	697bf5b	2016-03-02 11:12:52 +0000	[diff] [blame]	111	#if CONFIG_VP10 && CONFIG_EXT_PARTITION
				112	// 128x128
				113	sadMxN(128, 128)
				114	sadMxNxK(128, 128, 3)
				115	sadMxNxK(128, 128, 8)
				116	sadMxNx4D(128, 128)
				117
				118	// 128x64
				119	sadMxN(128, 64)
				120	sadMxNx4D(128, 64)
				121
				122	// 64x128
				123	sadMxN(64, 128)
				124	sadMxNx4D(64, 128)
				125	#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
				126
Johann	d5d9289	2015-04-17 16:11:38 -0400	[diff] [blame]	127	// 64x64
				128	sadMxN(64, 64)
				129	sadMxNxK(64, 64, 3)
				130	sadMxNxK(64, 64, 8)
				131	sadMxNx4D(64, 64)
				132
				133	// 64x32
				134	sadMxN(64, 32)
				135	sadMxNx4D(64, 32)
				136
				137	// 32x64
				138	sadMxN(32, 64)
				139	sadMxNx4D(32, 64)
				140
				141	// 32x32
				142	sadMxN(32, 32)
				143	sadMxNxK(32, 32, 3)
				144	sadMxNxK(32, 32, 8)
				145	sadMxNx4D(32, 32)
				146
				147	// 32x16
				148	sadMxN(32, 16)
				149	sadMxNx4D(32, 16)
				150
				151	// 16x32
				152	sadMxN(16, 32)
				153	sadMxNx4D(16, 32)
				154
				155	// 16x16
				156	sadMxN(16, 16)
				157	sadMxNxK(16, 16, 3)
				158	sadMxNxK(16, 16, 8)
				159	sadMxNx4D(16, 16)
				160
				161	// 16x8
				162	sadMxN(16, 8)
				163	sadMxNxK(16, 8, 3)
				164	sadMxNxK(16, 8, 8)
				165	sadMxNx4D(16, 8)
				166
				167	// 8x16
				168	sadMxN(8, 16)
				169	sadMxNxK(8, 16, 3)
				170	sadMxNxK(8, 16, 8)
				171	sadMxNx4D(8, 16)
				172
				173	// 8x8
				174	sadMxN(8, 8)
				175	sadMxNxK(8, 8, 3)
				176	sadMxNxK(8, 8, 8)
				177	sadMxNx4D(8, 8)
				178
				179	// 8x4
				180	sadMxN(8, 4)
				181	sadMxNxK(8, 4, 8)
				182	sadMxNx4D(8, 4)
				183
				184	// 4x8
				185	sadMxN(4, 8)
				186	sadMxNxK(4, 8, 8)
				187	sadMxNx4D(4, 8)
				188
				189	// 4x4
				190	sadMxN(4, 4)
				191	sadMxNxK(4, 4, 3)
				192	sadMxNxK(4, 4, 8)
				193	sadMxNx4D(4, 4)
				194
				195	#if CONFIG_VP9_HIGHBITDEPTH
				196	static INLINE unsigned int highbd_sad(const uint8_t *a8, int a_stride,
				197	const uint8_t *b8, int b_stride,
				198	int width, int height) {
				199	int y, x;
				200	unsigned int sad = 0;
				201	const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
				202	const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
				203	for (y = 0; y < height; y++) {
				204	for (x = 0; x < width; x++)
				205	sad += abs(a[x] - b[x]);
				206
				207	a += a_stride;
				208	b += b_stride;
				209	}
				210	return sad;
				211	}
				212
				213	static INLINE unsigned int highbd_sadb(const uint8_t *a8, int a_stride,
				214	const uint16_t *b, int b_stride,
				215	int width, int height) {
				216	int y, x;
				217	unsigned int sad = 0;
				218	const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
				219	for (y = 0; y < height; y++) {
				220	for (x = 0; x < width; x++)
				221	sad += abs(a[x] - b[x]);
				222
				223	a += a_stride;
				224	b += b_stride;
				225	}
				226	return sad;
				227	}
				228
				229	#define highbd_sadMxN(m, n) \
				230	unsigned int vpx_highbd_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
				231	const uint8_t *ref, int ref_stride) { \
				232	return highbd_sad(src, src_stride, ref, ref_stride, m, n); \
				233	} \
				234	unsigned int vpx_highbd_sad##m##x##n##_avg_c(const uint8_t *src, \
				235	int src_stride, \
				236	const uint8_t *ref, \
				237	int ref_stride, \
				238	const uint8_t *second_pred) { \
				239	uint16_t comp_pred[m * n]; \
				240	highbd_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
				241	return highbd_sadb(src, src_stride, comp_pred, m, m, n); \
				242	}
				243
				244	#define highbd_sadMxNxK(m, n, k) \
				245	void vpx_highbd_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \
				246	const uint8_t *ref_array, int ref_stride, \
				247	uint32_t *sad_array) { \
				248	int i; \
				249	for (i = 0; i < k; ++i) { \
				250	sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, &ref_array[i], \
				251	ref_stride); \
				252	} \
				253	}
				254
				255	#define highbd_sadMxNx4D(m, n) \
				256	void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \
				257	const uint8_t *const ref_array[], \
				258	int ref_stride, uint32_t *sad_array) { \
				259	int i; \
				260	for (i = 0; i < 4; ++i) { \
				261	sad_array[i] = vpx_highbd_sad##m##x##n##_c(src, src_stride, ref_array[i], \
				262	ref_stride); \
				263	} \
				264	}
				265
Geza Lore	697bf5b	2016-03-02 11:12:52 +0000	[diff] [blame]	266	#if CONFIG_VP10 && CONFIG_EXT_PARTITION
				267	// 128x128
				268	highbd_sadMxN(128, 128)
				269	highbd_sadMxNxK(128, 128, 3)
				270	highbd_sadMxNxK(128, 128, 8)
				271	highbd_sadMxNx4D(128, 128)
				272
				273	// 128x64
				274	highbd_sadMxN(128, 64)
				275	highbd_sadMxNx4D(128, 64)
				276
				277	// 64x128
				278	highbd_sadMxN(64, 128)
				279	highbd_sadMxNx4D(64, 128)
				280	#endif // CONFIG_VP10 && CONFIG_EXT_PARTITION
				281
Johann	d5d9289	2015-04-17 16:11:38 -0400	[diff] [blame]	282	// 64x64
				283	highbd_sadMxN(64, 64)
				284	highbd_sadMxNxK(64, 64, 3)
				285	highbd_sadMxNxK(64, 64, 8)
				286	highbd_sadMxNx4D(64, 64)
				287
				288	// 64x32
				289	highbd_sadMxN(64, 32)
				290	highbd_sadMxNx4D(64, 32)
				291
				292	// 32x64
				293	highbd_sadMxN(32, 64)
				294	highbd_sadMxNx4D(32, 64)
				295
				296	// 32x32
				297	highbd_sadMxN(32, 32)
				298	highbd_sadMxNxK(32, 32, 3)
				299	highbd_sadMxNxK(32, 32, 8)
				300	highbd_sadMxNx4D(32, 32)
				301
				302	// 32x16
				303	highbd_sadMxN(32, 16)
				304	highbd_sadMxNx4D(32, 16)
				305
				306	// 16x32
				307	highbd_sadMxN(16, 32)
				308	highbd_sadMxNx4D(16, 32)
				309
				310	// 16x16
				311	highbd_sadMxN(16, 16)
				312	highbd_sadMxNxK(16, 16, 3)
				313	highbd_sadMxNxK(16, 16, 8)
				314	highbd_sadMxNx4D(16, 16)
				315
				316	// 16x8
				317	highbd_sadMxN(16, 8)
				318	highbd_sadMxNxK(16, 8, 3)
				319	highbd_sadMxNxK(16, 8, 8)
				320	highbd_sadMxNx4D(16, 8)
				321
				322	// 8x16
				323	highbd_sadMxN(8, 16)
				324	highbd_sadMxNxK(8, 16, 3)
				325	highbd_sadMxNxK(8, 16, 8)
				326	highbd_sadMxNx4D(8, 16)
				327
				328	// 8x8
				329	highbd_sadMxN(8, 8)
				330	highbd_sadMxNxK(8, 8, 3)
				331	highbd_sadMxNxK(8, 8, 8)
				332	highbd_sadMxNx4D(8, 8)
				333
				334	// 8x4
				335	highbd_sadMxN(8, 4)
				336	highbd_sadMxNxK(8, 4, 8)
				337	highbd_sadMxNx4D(8, 4)
				338
				339	// 4x8
				340	highbd_sadMxN(4, 8)
				341	highbd_sadMxNxK(4, 8, 8)
				342	highbd_sadMxNx4D(4, 8)
				343
				344	// 4x4
				345	highbd_sadMxN(4, 4)
				346	highbd_sadMxNxK(4, 4, 3)
				347	highbd_sadMxNxK(4, 4, 8)
				348	highbd_sadMxNx4D(4, 4)
				349
				350	#endif // CONFIG_VP9_HIGHBITDEPTH
Debargha Mukherjee	1d69cee	2016-02-29 16:08:07 -0800	[diff] [blame]	351
				352	#if CONFIG_VP10 && CONFIG_EXT_INTER
				353	static INLINE unsigned int masked_sad(const uint8_t *a, int a_stride,
				354	const uint8_t *b, int b_stride,
				355	const uint8_t *m, int m_stride,
				356	int width, int height) {
				357	int y, x;
				358	unsigned int sad = 0;
				359
				360	for (y = 0; y < height; y++) {
				361	for (x = 0; x < width; x++)
				362	sad += m[x] * abs(a[x] - b[x]);
				363
				364	a += a_stride;
				365	b += b_stride;
				366	m += m_stride;
				367	}
				368	sad = (sad + 31) >> 6;
				369
				370	return sad;
				371	}
				372
				373	#define MASKSADMxN(m, n) \
				374	unsigned int vpx_masked_sad##m##x##n##_c(const uint8_t *src, int src_stride, \
				375	const uint8_t *ref, int ref_stride, \
				376	const uint8_t *msk, int msk_stride) { \
				377	return masked_sad(src, src_stride, ref, ref_stride, msk, msk_stride, m, n); \
				378	}
				379
				380	#if CONFIG_EXT_PARTITION
				381	MASKSADMxN(128, 128)
				382	MASKSADMxN(128, 64)
				383	MASKSADMxN(64, 128)
				384	#endif // CONFIG_EXT_PARTITION
				385	MASKSADMxN(64, 64)
				386	MASKSADMxN(64, 32)
				387	MASKSADMxN(32, 64)
				388	MASKSADMxN(32, 32)
				389	MASKSADMxN(32, 16)
				390	MASKSADMxN(16, 32)
				391	MASKSADMxN(16, 16)
				392	MASKSADMxN(16, 8)
				393	MASKSADMxN(8, 16)
				394	MASKSADMxN(8, 8)
				395	MASKSADMxN(8, 4)
				396	MASKSADMxN(4, 8)
				397	MASKSADMxN(4, 4)
				398
				399	#if CONFIG_VP9_HIGHBITDEPTH
				400	static INLINE unsigned int highbd_masked_sad(const uint8_t *a8, int a_stride,
				401	const uint8_t *b8, int b_stride,
				402	const uint8_t *m, int m_stride,
				403	int width, int height) {
				404	int y, x;
				405	unsigned int sad = 0;
				406	const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
				407	const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
				408
				409	for (y = 0; y < height; y++) {
				410	for (x = 0; x < width; x++)
				411	sad += m[x] * abs(a[x] - b[x]);
				412
				413	a += a_stride;
				414	b += b_stride;
				415	m += m_stride;
				416	}
				417	sad = (sad + 31) >> 6;
				418
				419	return sad;
				420	}
				421
				422	#define HIGHBD_MASKSADMXN(m, n) \
				423	unsigned int vpx_highbd_masked_sad##m##x##n##_c(const uint8_t *src, \
				424	int src_stride, \
				425	const uint8_t *ref, \
				426	int ref_stride, \
				427	const uint8_t *msk, \
				428	int msk_stride) { \
				429	return highbd_masked_sad(src, src_stride, ref, ref_stride, \
				430	msk, msk_stride, m, n); \
				431	}
				432
				433	#if CONFIG_EXT_PARTITION
				434	HIGHBD_MASKSADMXN(128, 128)
				435	HIGHBD_MASKSADMXN(128, 64)
				436	HIGHBD_MASKSADMXN(64, 128)
				437	#endif // CONFIG_EXT_PARTITION
				438	HIGHBD_MASKSADMXN(64, 64)
				439	HIGHBD_MASKSADMXN(64, 32)
				440	HIGHBD_MASKSADMXN(32, 64)
				441	HIGHBD_MASKSADMXN(32, 32)
				442	HIGHBD_MASKSADMXN(32, 16)
				443	HIGHBD_MASKSADMXN(16, 32)
				444	HIGHBD_MASKSADMXN(16, 16)
				445	HIGHBD_MASKSADMXN(16, 8)
				446	HIGHBD_MASKSADMXN(8, 16)
				447	HIGHBD_MASKSADMXN(8, 8)
				448	HIGHBD_MASKSADMXN(8, 4)
				449	HIGHBD_MASKSADMXN(4, 8)
				450	HIGHBD_MASKSADMXN(4, 4)
				451	#endif // CONFIG_VP9_HIGHBITDEPTH
				452	#endif // CONFIG_VP10 && CONFIG_EXT_INTER