Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | bde4ac8 | 2016-11-28 15:26:06 -0800 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 10 | */ |
| 11 | |
| 12 | #include <assert.h> |
| 13 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 14 | #include "aom/aom_integer.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 15 | |
| 16 | #include "aom_ports/mem.h" |
| 17 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 18 | #include "aom_dsp/aom_dsp_common.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 19 | |
| 20 | #include "av1/common/reconinter.h" |
| 21 | |
| 22 | #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS) |
| 23 | |
| 24 | /** |
| 25 | * Computes SSE of a compound predictor constructed from 2 fundamental |
| 26 | * predictors p0 and p1 using blending with mask. |
| 27 | * |
| 28 | * r1: Residuals of p1. |
| 29 | * (source - p1) |
| 30 | * d: Difference of p1 and p0. |
| 31 | * (p1 - p0) |
| 32 | * m: The blending mask |
| 33 | * N: Number of pixels |
| 34 | * |
| 35 | * 'r1', 'd', and 'm' are contiguous. |
| 36 | * |
| 37 | * Computes: |
| 38 | * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to: |
| 39 | * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2), |
| 40 | * where r0 is (source - p0), and r1 is (source - p1), which is in turn |
| 41 | * is equivalent to: |
| 42 | * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2), |
| 43 | * which is the SSE of the residuals of the compound predictor scaled up by |
| 44 | * MAX_MASK_VALUE**2. |
| 45 | * |
| 46 | * Note that we clamp the partial term in the loop to 16 bits signed. This is |
| 47 | * to facilitate equivalent SIMD implementation. It should have no effect if |
| 48 | * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always |
| 49 | * holds for 8 bit input, and on real input, it should hold practically always, |
| 50 | * as residuals are expected to be small. |
| 51 | */ |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 52 | uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, |
| 53 | const uint8_t *m, int N) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 54 | uint64_t csse = 0; |
| 55 | int i; |
Jingning Han | 61418bb | 2017-01-23 17:12:48 -0800 | [diff] [blame] | 56 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 57 | for (i = 0; i < N; i++) { |
| 58 | int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i]; |
| 59 | t = clamp(t, INT16_MIN, INT16_MAX); |
| 60 | csse += t * t; |
| 61 | } |
| 62 | return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); |
| 63 | } |
| 64 | |
| 65 | /** |
| 66 | * Choose the mask sign for a compound predictor. |
| 67 | * |
| 68 | * ds: Difference of the squares of the residuals. |
| 69 | * r0**2 - r1**2 |
| 70 | * m: The blending mask |
| 71 | * N: Number of pixels |
| 72 | * limit: Pre-computed threshold value. |
| 73 | * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2)) |
| 74 | * |
| 75 | * 'ds' and 'm' are contiguous. |
| 76 | * |
| 77 | * Returns true if the negated mask has lower SSE compared to the positive |
| 78 | * mask. Computation is based on: |
| 79 | * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2) |
| 80 | * > |
| 81 | * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2) |
| 82 | * |
| 83 | * which can be simplified to: |
| 84 | * |
| 85 | * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2)) |
| 86 | * |
| 87 | * The right hand side does not depend on the mask, and needs to be passed as |
| 88 | * the 'limit' parameter. |
| 89 | * |
| 90 | * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left |
| 91 | * hand side is simply a scalar product between an int16_t and uint8_t vector. |
| 92 | * |
| 93 | * Note that for efficiency, ds is stored on 16 bits. Real input residuals |
| 94 | * being small, this should not cause a noticeable issue. |
| 95 | */ |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 96 | int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, |
| 97 | int64_t limit) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 98 | int64_t acc = 0; |
| 99 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 100 | do { |
| 101 | acc += *ds++ * *m++; |
| 102 | } while (--N); |
| 103 | |
| 104 | return acc > limit; |
| 105 | } |
| 106 | |
| 107 | /** |
| 108 | * Compute the element-wise difference of the squares of 2 arrays. |
| 109 | * |
| 110 | * d: Difference of the squares of the inputs: a**2 - b**2 |
| 111 | * a: First input array |
| 112 | * b: Second input array |
| 113 | * N: Number of elements |
| 114 | * |
| 115 | * 'd', 'a', and 'b' are contiguous. |
| 116 | * |
| 117 | * The result is saturated to signed 16 bits. |
| 118 | */ |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 119 | void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a, |
| 120 | const int16_t *b, int N) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 121 | int i; |
| 122 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 123 | for (i = 0; i < N; i++) |
| 124 | d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX); |
| 125 | } |