Blame - av1/encoder/wedge_utils.c - aom

blob: e6edbb6af00d93e6e78f669bf68cfcfc79883786 [file] [log] [blame]

Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	1	/*
Yaowu Xu	bde4ac8	2016-11-28 15:26:06 -0800	[diff] [blame]	2	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	3	*
Yaowu Xu	bde4ac8	2016-11-28 15:26:06 -0800	[diff] [blame]	4	* This source code is subject to the terms of the BSD 2 Clause License and
				5	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
				6	* was not distributed with this source code in the LICENSE file, you can
				7	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
				8	* Media Patent License 1.0 was not distributed with this source code in the
				9	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	10	*/
				11
				12	#include <assert.h>
				13
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	14	#include "aom/aom_integer.h"
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	15
				16	#include "aom_ports/mem.h"
				17
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	18	#include "aom_dsp/aom_dsp_common.h"
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	19
				20	#include "av1/common/reconinter.h"
				21
				22	#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
				23
				24	/**
				25	* Computes SSE of a compound predictor constructed from 2 fundamental
				26	* predictors p0 and p1 using blending with mask.
				27	*
				28	* r1: Residuals of p1.
				29	* (source - p1)
				30	* d: Difference of p1 and p0.
				31	* (p1 - p0)
				32	* m: The blending mask
				33	* N: Number of pixels
				34	*
				35	* 'r1', 'd', and 'm' are contiguous.
				36	*
				37	* Computes:
				38	* Sum((MAX_MASK_VALUEr1 + maskd)**2), which is equivalent to:
				39	* Sum((maskr0 + (MAX_MASK_VALUE-mask)r1)**2),
				40	* where r0 is (source - p0), and r1 is (source - p1), which is in turn
				41	* is equivalent to:
				42	* Sum((sourceMAX_MASK_VALUE - (maskp0 + (MAX_MASK_VALUE-mask)p1))*2),
				43	* which is the SSE of the residuals of the compound predictor scaled up by
				44	* MAX_MASK_VALUE**2.
				45	*
				46	* Note that we clamp the partial term in the loop to 16 bits signed. This is
				47	* to facilitate equivalent SIMD implementation. It should have no effect if
				48	* residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
				49	* holds for 8 bit input, and on real input, it should hold practically always,
				50	* as residuals are expected to be small.
				51	*/
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	52	uint64_t av1_wedge_sse_from_residuals_c(const int16_t r1, const int16_t d,
				53	const uint8_t *m, int N) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	54	uint64_t csse = 0;
				55	int i;
Jingning Han	61418bb	2017-01-23 17:12:48 -0800	[diff] [blame]	56
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	57	for (i = 0; i < N; i++) {
				58	int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
				59	t = clamp(t, INT16_MIN, INT16_MAX);
				60	csse += t * t;
				61	}
				62	return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
				63	}
				64
				65	/**
				66	* Choose the mask sign for a compound predictor.
				67	*
				68	* ds: Difference of the squares of the residuals.
				69	* r02 - r12
				70	* m: The blending mask
				71	* N: Number of pixels
				72	* limit: Pre-computed threshold value.
				73	* MAX_MASK_VALUE/2 * (sum(r02) - sum(r12))
				74	*
				75	* 'ds' and 'm' are contiguous.
				76	*
				77	* Returns true if the negated mask has lower SSE compared to the positive
				78	* mask. Computation is based on:
				79	* Sum((maskr0 + (MAX_MASK_VALUE-mask)r1)**2)
				80	* >
				81	* Sum(((MAX_MASK_VALUE-mask)r0 + maskr1)**2)
				82	*
				83	* which can be simplified to:
				84	*
				85	* Sum(mask(r02 - r12)) > MAX_MASK_VALUE/2 (sum(r02) - sum(r12))
				86	*
				87	* The right hand side does not depend on the mask, and needs to be passed as
				88	* the 'limit' parameter.
				89	*
				90	* After pre-computing (r02 - r12), which is passed in as 'ds', the left
				91	* hand side is simply a scalar product between an int16_t and uint8_t vector.
				92	*
				93	* Note that for efficiency, ds is stored on 16 bits. Real input residuals
				94	* being small, this should not cause a noticeable issue.
				95	*/
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	96	int av1_wedge_sign_from_residuals_c(const int16_t ds, const uint8_t m, int N,
				97	int64_t limit) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	98	int64_t acc = 0;
				99
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	100	do {
				101	acc += ds++ *m++;
				102	} while (--N);
				103
				104	return acc > limit;
				105	}
				106
				107	/**
				108	* Compute the element-wise difference of the squares of 2 arrays.
				109	*
				110	* d: Difference of the squares of the inputs: a2 - b2
				111	* a: First input array
				112	* b: Second input array
				113	* N: Number of elements
				114	*
				115	* 'd', 'a', and 'b' are contiguous.
				116	*
				117	* The result is saturated to signed 16 bits.
				118	*/
Yaowu Xu	f883b42	2016-08-30 14:01:10 -0700	[diff] [blame]	119	void av1_wedge_compute_delta_squares_c(int16_t d, const int16_t a,
				120	const int16_t *b, int N) {
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	121	int i;
				122
Yaowu Xu	c27fc14	2016-08-22 16:08:15 -0700	[diff] [blame]	123	for (i = 0; i < N; i++)
				124	d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
				125	}