Blame - vp10/common/od_dering.c - avm

blob: af89b80dbac1c76a0d06ac0c6907c53aa0a5864d [file] [log] [blame]

Yaowu Xu	253c001	2016-08-15 10:27:19 -0700	[diff] [blame^]	1	/*Daala video codec
				2	Copyright (c) 2014-2016 Daala project contributors. All rights reserved.
				3
				4	Redistribution and use in source and binary forms, with or without
				5	modification, are permitted provided that the following conditions are met:
				6
				7	- Redistributions of source code must retain the above copyright notice, this
				8	list of conditions and the following disclaimer.
				9
				10	- Redistributions in binary form must reproduce the above copyright notice,
				11	this list of conditions and the following disclaimer in the documentation
				12	and/or other materials provided with the distribution.
				13
				14	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				15	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				16	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				17	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
				18	FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				19	DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				20	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				21	CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				22	OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				23	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
				24
				25	#ifdef HAVE_CONFIG_H
				26	# include "config.h"
				27	#endif
				28
				29	#include <stdlib.h>
				30	#include <math.h>
				31	#include "dering.h"
				32
				33	const od_dering_opt_vtbl OD_DERING_VTBL_C = {
				34	{od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c},
				35	{od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c}
				36	};
				37
				38	/* Generated from gen_filter_tables.c. */
				39	const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
				40	{-1OD_FILT_BSTRIDE + 1, -2OD_FILT_BSTRIDE + 2, -3*OD_FILT_BSTRIDE + 3 },
				41	{ 0OD_FILT_BSTRIDE + 1, -1OD_FILT_BSTRIDE + 2, -1*OD_FILT_BSTRIDE + 3 },
				42	{ 0OD_FILT_BSTRIDE + 1, 0OD_FILT_BSTRIDE + 2, 0*OD_FILT_BSTRIDE + 3 },
				43	{ 0OD_FILT_BSTRIDE + 1, 1OD_FILT_BSTRIDE + 2, 1*OD_FILT_BSTRIDE + 3 },
				44	{ 1OD_FILT_BSTRIDE + 1, 2OD_FILT_BSTRIDE + 2, 3*OD_FILT_BSTRIDE + 3 },
				45	{ 1OD_FILT_BSTRIDE + 0, 2OD_FILT_BSTRIDE + 1, 3*OD_FILT_BSTRIDE + 1 },
				46	{ 1OD_FILT_BSTRIDE + 0, 2OD_FILT_BSTRIDE + 0, 3*OD_FILT_BSTRIDE + 0 },
				47	{ 1OD_FILT_BSTRIDE + 0, 2OD_FILT_BSTRIDE - 1, 3*OD_FILT_BSTRIDE - 1 },
				48	};
				49
				50	const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = {
				51	0, 0.5, 0.707, 1, 1.41, 2
				52	};
				53
				54	/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
				55	The search minimizes the weighted variance along all the lines in a
				56	particular direction, i.e. the squared error between the input and a
				57	"predicted" block where each pixel is replaced by the average along a line
				58	in a particular direction. Since each direction have the same sum(x^2) term,
				59	that term is never computed. See Section 2, step 2, of:
				60	http://jmvalin.ca/notes/intra_paint.pdf */
				61	static int od_dir_find8(const od_dering_in img, int stride, int32_t var,
				62	int coeff_shift) {
				63	int i;
				64	int32_t cost[8] = {0};
				65	int partial[8][15] = {{0}};
				66	int32_t best_cost = 0;
				67	int best_dir = 0;
				68	/* Instead of dividing by n between 2 and 8, we multiply by 357*8/n.
				69	The output is then 840 times larger, but we don't care for finding
				70	the max. */
				71	static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105};
				72	for (i = 0; i < 8; i++) {
				73	int j;
				74	for (j = 0; j < 8; j++) {
				75	int x;
				76	/* We subtract 128 here to reduce the maximum range of the squared
				77	partial sums. */
				78	x = (img[i*stride + j] >> coeff_shift) - 128;
				79	partial[0][i + j] += x;
				80	partial[1][i + j/2] += x;
				81	partial[2][i] += x;
				82	partial[3][3 + i - j/2] += x;
				83	partial[4][7 + i - j] += x;
				84	partial[5][3 - i/2 + j] += x;
				85	partial[6][j] += x;
				86	partial[7][i/2 + j] += x;
				87	}
				88	}
				89	for (i = 0; i < 8; i++) {
				90	cost[2] += partial[2][i]*partial[2][i];
				91	cost[6] += partial[6][i]*partial[6][i];
				92	}
				93	cost[2] *= div_table[8];
				94	cost[6] *= div_table[8];
				95	for (i = 0; i < 7; i++) {
				96	cost[0] += (partial[0][i]*partial[0][i]
				97	+ partial[0][14 - i]partial[0][14 - i])div_table[i + 1];
				98	cost[4] += (partial[4][i]*partial[4][i]
				99	+ partial[4][14 - i]partial[4][14 - i])div_table[i + 1];
				100	}
				101	cost[0] += partial[0][7]partial[0][7]div_table[8];
				102	cost[4] += partial[4][7]partial[4][7]div_table[8];
				103	for (i = 1; i < 8; i += 2) {
				104	int j;
				105	for (j = 0; j < 4 + 1; j++) {
				106	cost[i] += partial[i][3 + j]*partial[i][3 + j];
				107	}
				108	cost[i] *= div_table[8];
				109	for (j = 0; j < 4 - 1; j++) {
				110	cost[i] += (partial[i][j]*partial[i][j]
				111	+ partial[i][10 - j]partial[i][10 - j])div_table[2*j + 2];
				112	}
				113	}
				114	for (i = 0; i < 8; i++) {
				115	if (cost[i] > best_cost) {
				116	best_cost = cost[i];
				117	best_dir = i;
				118	}
				119	}
				120	/* Difference between the optimal variance and the variance along the
				121	orthogonal direction. Again, the sum(x^2) terms cancel out. */
				122	*var = best_cost - cost[(best_dir + 4) & 7];
				123	/* We'd normally divide by 840, but dividing by 1024 is close enough
				124	for what we're going to do with this. */
				125	*var >>= 10;
				126	return best_dir;
				127	}
				128
				129	#define OD_DERING_VERY_LARGE (30000)
				130	#define OD_DERING_INBUF_SIZE ((OD_BSIZE_MAX + 2OD_FILT_BORDER)\
				131	(OD_BSIZE_MAX + 2*OD_FILT_BORDER))
				132
				133	/* Smooth in the direction detected. */
				134	void od_filter_dering_direction_c(int16_t y, int ystride, const int16_t in,
				135	int ln, int threshold, int dir) {
				136	int i;
				137	int j;
				138	int k;
				139	static const int taps[3] = {3, 2, 2};
				140	for (i = 0; i < 1 << ln; i++) {
				141	for (j = 0; j < 1 << ln; j++) {
				142	int16_t sum;
				143	int16_t xx;
				144	int16_t yy;
				145	xx = in[i*OD_FILT_BSTRIDE + j];
				146	sum= 0;
				147	for (k = 0; k < 3; k++) {
				148	int16_t p0;
				149	int16_t p1;
				150	p0 = in[i*OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]]
				151	- xx;
				152	p1 = in[i*OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]]
				153	- xx;
				154	if (abs(p0) < threshold) sum += taps[k]*p0;
				155	if (abs(p1) < threshold) sum += taps[k]*p1;
				156	}
				157	yy = xx + ((sum + 8) >> 4);
				158	y[i*ystride + j] = yy;
				159	}
				160	}
				161	}
				162
				163	void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
				164	const int16_t *in, int threshold, int dir) {
				165	od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
				166	}
				167
				168	void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
				169	const int16_t *in, int threshold, int dir) {
				170	od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
				171	}
				172
				173	/* Smooth in the direction orthogonal to what was detected. */
				174	void od_filter_dering_orthogonal_c(int16_t y, int ystride, const int16_t in,
				175	const od_dering_in *x, int xstride, int ln, int threshold, int dir) {
				176	int i;
				177	int j;
				178	int offset;
				179	if (dir > 0 && dir < 4) offset = OD_FILT_BSTRIDE;
				180	else offset = 1;
				181	for (i = 0; i < 1 << ln; i++) {
				182	for (j = 0; j < 1 << ln; j++) {
				183	int16_t athresh;
				184	int16_t yy;
				185	int16_t sum;
				186	int16_t p;
				187	/* Deringing orthogonal to the direction uses a tighter threshold
				188	because we want to be conservative. We've presumably already
				189	achieved some deringing, so the amount of change is expected
				190	to be low. Also, since we might be filtering across an edge, we
				191	want to make sure not to blur it. That being said, we might want
				192	to be a little bit more aggressive on pure horizontal/vertical
				193	since the ringing there tends to be directional, so it doesn't
				194	get removed by the directional filtering. */
				195	athresh = OD_MINI(threshold, threshold/3
				196	+ abs(in[iOD_FILT_BSTRIDE + j] - x[ixstride + j]));
				197	yy = in[i*OD_FILT_BSTRIDE + j];
				198	sum = 0;
				199	p = in[i*OD_FILT_BSTRIDE + j + offset] - yy;
				200	if (abs(p) < athresh) sum += p;
				201	p = in[i*OD_FILT_BSTRIDE + j - offset] - yy;
				202	if (abs(p) < athresh) sum += p;
				203	p = in[iOD_FILT_BSTRIDE + j + 2offset] - yy;
				204	if (abs(p) < athresh) sum += p;
				205	p = in[iOD_FILT_BSTRIDE + j - 2offset] - yy;
				206	if (abs(p) < athresh) sum += p;
				207	y[iystride + j] = yy + ((3sum + 8) >> 4);
				208	}
				209	}
				210	}
				211
				212	void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
				213	const int16_t in, const od_dering_in x, int xstride, int threshold,
				214	int dir) {
				215	od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
				216	}
				217
				218	void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
				219	const int16_t in, const od_dering_in x, int xstride, int threshold,
				220	int dir) {
				221	od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
				222	}
				223
				224	/* This table approximates x^0.16 with the index being log2(x). It is clamped
				225	to [-.5, 3]. The table is computed as:
				226	round(256min(3, max(.5, 1.08(sqrt(2)2.^([0:17]+8)/256/256).^.16))) /
				227	static const int16_t OD_THRESH_TABLE_Q8[18] = {
				228	128, 134, 150, 168, 188, 210, 234, 262,
				229	292, 327, 365, 408, 455, 509, 569, 635,
				230	710, 768,
				231	};
				232
				233	/* Compute deringing filter threshold for each 8x8 block based on the
				234	directional variance difference. A high variance difference means that we
				235	have a highly directional pattern (e.g. a high contrast edge), so we can
				236	apply more deringing. A low variance means that we either have a low
				237	contrast edge, or a non-directional texture, so we want to be careful not
				238	to blur. */
				239	static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
				240	int threshold, int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
				241	int nhb, int nvb) {
				242	int bx;
				243	int by;
				244	for (by = 0; by < nvb; by++) {
				245	for (bx = 0; bx < nhb; bx++) {
				246	int v1;
				247	/* We use the variance of 8x8 blocks to adjust the threshold. */
				248	v1 = OD_MINI(32767, var[by][bx] >> 6);
				249	thresh[by][bx] = (threshold*OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
				250	}
				251	}
				252	}
				253
				254	void od_dering(const od_dering_opt_vtbl vtbl, int16_t y, int ystride,
				255	const od_dering_in *x, int xstride, int nhb, int nvb, int sbx, int sby,
				256	int nhsb, int nvsb, int xdec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
				257	int pli, unsigned char *bskip, int skip_stride, int threshold, int overlap,
				258	int coeff_shift) {
				259	int i;
				260	int j;
				261	int bx;
				262	int by;
				263	int16_t inbuf[OD_DERING_INBUF_SIZE];
				264	int16_t *in;
				265	int bsize;
				266	int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
				267	int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
				268	bsize = 3 - xdec;
				269	in = inbuf + OD_FILT_BORDER*OD_FILT_BSTRIDE + OD_FILT_BORDER;
				270	/* We avoid filtering the pixels for which some of the pixels to average
				271	are outside the frame. We could change the filter instead, but it would
				272	add special cases for any future vectorization. */
				273	for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
				274	for (i = -OD_FILT_BORDER*(sby != 0); i < (nvb << bsize)
				275	+ OD_FILT_BORDER*(sby != nvsb - 1); i++) {
				276	for (j = -OD_FILT_BORDER*(sbx != 0); j < (nhb << bsize)
				277	+ OD_FILT_BORDER*(sbx != nhsb - 1); j++) {
				278	in[iOD_FILT_BSTRIDE + j] = x[ixstride + j];
				279	}
				280	}
				281	if (pli == 0) {
				282	for (by = 0; by < nvb; by++) {
				283	for (bx = 0; bx < nhb; bx++) {
				284	dir[by][bx] = od_dir_find8(&x[8byxstride + 8*bx], xstride,
				285	&var[by][bx], coeff_shift);
				286	}
				287	}
				288	od_compute_thresh(thresh, threshold, var, nhb, nvb);
				289	}
				290	else {
				291	for (by = 0; by < nvb; by++) {
				292	for (bx = 0; bx < nhb; bx++) {
				293	thresh[by][bx] = threshold;
				294	}
				295	}
				296	}
				297	for (by = 0; by < nvb; by++) {
				298	for (bx = 0; bx < nhb; bx++) {
				299	int skip;
				300	# if defined(DAALA_ODINTRIN)
				301	int xstart;
				302	int ystart;
				303	int xend;
				304	int yend;
				305	xstart = ystart = 0;
				306	xend = yend = (2 >> xdec);
				307	if (overlap) {
				308	xstart -= (sbx != 0);
				309	ystart -= (sby != 0);
				310	xend += (sbx != nhsb - 1);
				311	yend += (sby != nvsb - 1);
				312	}
				313	skip = 1;
				314	/* We look at whether the current block and its 4x4 surrounding (due to
				315	lapping) are skipped to avoid filtering the same content multiple
				316	times. */
				317	for (i = ystart; i < yend; i++) {
				318	for (j = xstart; j < xend; j++) {
				319	skip = skip && bskip[((by << 1 >> xdec) + i)*skip_stride
				320	+ (bx << 1 >> xdec) + j];
				321	}
				322	}
				323	#else
				324	(void)overlap;
				325	skip = bskip[by*skip_stride + bx];
				326	#endif
				327	if (skip) thresh[by][bx] = 0;
				328	}
				329	}
				330	for (by = 0; by < nvb; by++) {
				331	for (bx = 0; bx < nhb; bx++) {
				332	(vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
				333	&y[(by*ystride << bsize) + (bx << bsize)], ystride,
				334	&in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
				335	thresh[by][bx], dir[by][bx]);
				336	}
				337	}
				338	for (i = 0; i < nvb << bsize; i++) {
				339	for (j = 0; j < nhb << bsize; j++) {
				340	in[iOD_FILT_BSTRIDE + j] = y[iystride + j];
				341	}
				342	}
				343	for (by = 0; by < nvb; by++) {
				344	for (bx = 0; bx < nhb; bx++) {
				345	(vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
				346	&y[(by*ystride << bsize) + (bx << bsize)], ystride,
				347	&in[(by*OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
				348	&x[(by*xstride << bsize) + (bx << bsize)], xstride,
				349	thresh[by][bx], dir[by][bx]);
				350	}
				351	}
				352	}