Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | * |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 11 | */ |
| 12 | |
| 13 | #include <math.h> |
| 14 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 15 | #include "./aom_config.h" |
| 16 | #include "./aom_dsp_rtcd.h" |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 17 | #include "./aom_scale_rtcd.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 18 | #include "av1/common/onyxc_int.h" |
| 19 | #include "av1/common/restoration.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 20 | #include "aom_dsp/aom_dsp_common.h" |
| 21 | #include "aom_mem/aom_mem.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 22 | #include "aom_ports/mem.h" |
| 23 | |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 24 | const sgr_params_type sgr_params[SGRPROJ_PARAMS] = { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 25 | #if USE_HIGHPASS_IN_SGRPROJ |
| 26 | // corner, edge, r2, eps2 |
| 27 | { -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 }, |
| 28 | { -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 }, |
| 29 | { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 }, |
| 30 | { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 } |
| 31 | #else |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 32 | // r1, eps1, r2, eps2 |
Debargha Mukherjee | b3c43bc | 2017-02-01 13:09:03 -0800 | [diff] [blame] | 33 | { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 }, |
| 34 | { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 }, |
| 35 | { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 }, |
| 36 | { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 }, |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 37 | #endif |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 38 | }; |
| 39 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 40 | typedef void (*restore_func_type)(uint8_t *data8, int width, int height, |
| 41 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 42 | uint8_t *dst8, int dst_stride); |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 43 | #if CONFIG_HIGHBITDEPTH |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 44 | typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height, |
| 45 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 46 | int bit_depth, uint8_t *dst8, |
| 47 | int dst_stride); |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 48 | #endif // CONFIG_HIGHBITDEPTH |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 49 | |
David Barker | befcc42 | 2017-01-31 09:42:10 +0000 | [diff] [blame] | 50 | int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info, |
| 51 | int width, int height) { |
Debargha Mukherjee | 1008c1e | 2017-03-06 19:18:43 -0800 | [diff] [blame] | 52 | const int ntiles = av1_get_rest_ntiles( |
| 53 | width, height, rst_info->restoration_tilesize, NULL, NULL, NULL, NULL); |
Alex Converse | 7f094f1 | 2017-02-23 17:29:40 -0800 | [diff] [blame] | 54 | aom_free(rst_info->restoration_type); |
Alex Converse | 232e384 | 2017-02-24 12:24:36 -0800 | [diff] [blame] | 55 | CHECK_MEM_ERROR(cm, rst_info->restoration_type, |
Alex Converse | 7f094f1 | 2017-02-23 17:29:40 -0800 | [diff] [blame] | 56 | (RestorationType *)aom_malloc( |
Alex Converse | 232e384 | 2017-02-24 12:24:36 -0800 | [diff] [blame] | 57 | sizeof(*rst_info->restoration_type) * ntiles)); |
David Barker | befcc42 | 2017-01-31 09:42:10 +0000 | [diff] [blame] | 58 | aom_free(rst_info->wiener_info); |
| 59 | CHECK_MEM_ERROR( |
| 60 | cm, rst_info->wiener_info, |
| 61 | (WienerInfo *)aom_memalign(16, sizeof(*rst_info->wiener_info) * ntiles)); |
David Barker | 1e8e6b9 | 2017-01-13 13:45:51 +0000 | [diff] [blame] | 62 | memset(rst_info->wiener_info, 0, sizeof(*rst_info->wiener_info) * ntiles); |
Alex Converse | 7f094f1 | 2017-02-23 17:29:40 -0800 | [diff] [blame] | 63 | aom_free(rst_info->sgrproj_info); |
David Barker | befcc42 | 2017-01-31 09:42:10 +0000 | [diff] [blame] | 64 | CHECK_MEM_ERROR( |
| 65 | cm, rst_info->sgrproj_info, |
Alex Converse | 7f094f1 | 2017-02-23 17:29:40 -0800 | [diff] [blame] | 66 | (SgrprojInfo *)aom_malloc(sizeof(*rst_info->sgrproj_info) * ntiles)); |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 67 | return ntiles; |
| 68 | } |
| 69 | |
| 70 | void av1_free_restoration_struct(RestorationInfo *rst_info) { |
| 71 | aom_free(rst_info->restoration_type); |
| 72 | rst_info->restoration_type = NULL; |
| 73 | aom_free(rst_info->wiener_info); |
| 74 | rst_info->wiener_info = NULL; |
| 75 | aom_free(rst_info->sgrproj_info); |
| 76 | rst_info->sgrproj_info = NULL; |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 77 | } |
| 78 | |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 79 | #define MAX_RADIUS 3 // Only 1, 2, 3 allowed |
| 80 | #define MAX_EPS 80 // Max value of eps |
| 81 | #define MAX_NELEM ((2 * MAX_RADIUS + 1) * (2 * MAX_RADIUS + 1)) |
David Barker | 9198d13 | 2017-02-17 14:27:05 +0000 | [diff] [blame] | 82 | #define SGRPROJ_MTABLE_BITS 20 |
| 83 | #define SGRPROJ_RECIP_BITS 12 |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 84 | |
| 85 | // TODO(debargha): This table can be substantially reduced since only a few |
| 86 | // values are actually used. |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 87 | int sgrproj_mtable[MAX_EPS][MAX_NELEM]; |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 88 | |
| 89 | static void GenSgrprojVtable() { |
| 90 | int e, n; |
| 91 | for (e = 1; e <= MAX_EPS; ++e) |
| 92 | for (n = 1; n <= MAX_NELEM; ++n) { |
| 93 | const int n2e = n * n * e; |
| 94 | sgrproj_mtable[e - 1][n - 1] = |
| 95 | (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e); |
| 96 | } |
| 97 | } |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 98 | |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 99 | void av1_loop_restoration_precal() { GenSgrprojVtable(); } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 100 | |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 101 | static void loop_restoration_init(RestorationInternal *rst, int kf) { |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 102 | rst->keyframe = kf; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 103 | } |
| 104 | |
David Barker | 33f3bfd | 2017-01-06 15:34:50 +0000 | [diff] [blame] | 105 | void extend_frame(uint8_t *data, int width, int height, int stride) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 106 | uint8_t *data_p; |
| 107 | int i; |
| 108 | for (i = 0; i < height; ++i) { |
| 109 | data_p = data + i * stride; |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 110 | memset(data_p - WIENER_HALFWIN, data_p[0], WIENER_HALFWIN); |
| 111 | memset(data_p + width, data_p[width - 1], WIENER_HALFWIN); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 112 | } |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 113 | data_p = data - WIENER_HALFWIN; |
| 114 | for (i = -WIENER_HALFWIN; i < 0; ++i) { |
| 115 | memcpy(data_p + i * stride, data_p, width + 2 * WIENER_HALFWIN); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 116 | } |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 117 | for (i = height; i < height + WIENER_HALFWIN; ++i) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 118 | memcpy(data_p + i * stride, data_p + (height - 1) * stride, |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 119 | width + 2 * WIENER_HALFWIN); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 120 | } |
| 121 | } |
| 122 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 123 | static void loop_copy_tile(uint8_t *data, int tile_idx, int subtile_idx, |
| 124 | int subtile_bits, int width, int height, int stride, |
| 125 | RestorationInternal *rst, uint8_t *dst, |
| 126 | int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 127 | const int tile_width = rst->tile_width; |
| 128 | const int tile_height = rst->tile_height; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 129 | int i; |
| 130 | int h_start, h_end, v_start, v_end; |
| 131 | av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles, |
| 132 | rst->nvtiles, tile_width, tile_height, width, height, |
| 133 | 0, 0, &h_start, &h_end, &v_start, &v_end); |
| 134 | for (i = v_start; i < v_end; ++i) |
| 135 | memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start, |
| 136 | h_end - h_start); |
| 137 | } |
| 138 | |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 139 | static void loop_wiener_filter_tile(uint8_t *data, int tile_idx, int width, |
| 140 | int height, int stride, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 141 | RestorationInternal *rst, uint8_t *dst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 142 | int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 143 | const int tile_width = rst->tile_width; |
| 144 | const int tile_height = rst->tile_height; |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 145 | int i, j; |
| 146 | int h_start, h_end, v_start, v_end; |
Debargha Mukherjee | 994ccd7 | 2017-01-06 11:18:23 -0800 | [diff] [blame] | 147 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 148 | loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst, |
| 149 | dst_stride); |
| 150 | return; |
| 151 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 152 | av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 153 | tile_width, tile_height, width, height, 0, 0, |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 154 | &h_start, &h_end, &v_start, &v_end); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 155 | // Convolve the whole tile (done in blocks here to match the requirements |
| 156 | // of the vectorized convolve functions, but the result is equivalent) |
| 157 | for (i = v_start; i < v_end; i += MAX_SB_SIZE) |
| 158 | for (j = h_start; j < h_end; j += MAX_SB_SIZE) { |
| 159 | int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15); |
| 160 | int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15); |
| 161 | const uint8_t *data_p = data + i * stride + j; |
| 162 | uint8_t *dst_p = dst + i * dst_stride + j; |
Debargha Mukherjee | 28d15c7 | 2017-05-12 10:44:03 -0700 | [diff] [blame] | 163 | #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
| 164 | aom_convolve8_add_src_hip(data_p, stride, dst_p, dst_stride, |
| 165 | rst->rsi->wiener_info[tile_idx].hfilter, 16, |
| 166 | rst->rsi->wiener_info[tile_idx].vfilter, 16, w, |
| 167 | h); |
| 168 | #else |
David Barker | 1e8e6b9 | 2017-01-13 13:45:51 +0000 | [diff] [blame] | 169 | aom_convolve8_add_src(data_p, stride, dst_p, dst_stride, |
| 170 | rst->rsi->wiener_info[tile_idx].hfilter, 16, |
| 171 | rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h); |
Debargha Mukherjee | 28d15c7 | 2017-05-12 10:44:03 -0700 | [diff] [blame] | 172 | #endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 173 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 174 | } |
| 175 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 176 | static void loop_wiener_filter(uint8_t *data, int width, int height, int stride, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 177 | RestorationInternal *rst, uint8_t *dst, |
| 178 | int dst_stride) { |
| 179 | int tile_idx; |
| 180 | extend_frame(data, width, height, stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 181 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 182 | loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst, |
| 183 | dst_stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 184 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 185 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 186 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 187 | /* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1) |
| 188 | over the input. The window is of size (2r + 1)x(2r + 1), and we |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 189 | specialize to r = 1, 2, 3. A default function is used for r > 3. |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 190 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 191 | Each loop follows the same format: We keep a window's worth of input |
| 192 | in individual variables and select data out of that as appropriate. |
| 193 | */ |
| 194 | static void boxsum1(int32_t *src, int width, int height, int src_stride, |
| 195 | int sqr, int32_t *dst, int dst_stride) { |
| 196 | int i, j, a, b, c; |
| 197 | |
| 198 | // Vertical sum over 3-pixel regions, from src into dst. |
| 199 | if (!sqr) { |
| 200 | for (j = 0; j < width; ++j) { |
| 201 | a = src[j]; |
| 202 | b = src[src_stride + j]; |
| 203 | c = src[2 * src_stride + j]; |
| 204 | |
| 205 | dst[j] = a + b; |
| 206 | for (i = 1; i < height - 2; ++i) { |
| 207 | // Loop invariant: At the start of each iteration, |
| 208 | // a = src[(i - 1) * src_stride + j] |
| 209 | // b = src[(i ) * src_stride + j] |
| 210 | // c = src[(i + 1) * src_stride + j] |
| 211 | dst[i * dst_stride + j] = a + b + c; |
| 212 | a = b; |
| 213 | b = c; |
| 214 | c = src[(i + 2) * src_stride + j]; |
| 215 | } |
| 216 | dst[i * dst_stride + j] = a + b + c; |
| 217 | dst[(i + 1) * dst_stride + j] = b + c; |
| 218 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 219 | } else { |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 220 | for (j = 0; j < width; ++j) { |
| 221 | a = src[j] * src[j]; |
| 222 | b = src[src_stride + j] * src[src_stride + j]; |
| 223 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 224 | |
| 225 | dst[j] = a + b; |
| 226 | for (i = 1; i < height - 2; ++i) { |
| 227 | dst[i * dst_stride + j] = a + b + c; |
| 228 | a = b; |
| 229 | b = c; |
| 230 | c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j]; |
| 231 | } |
| 232 | dst[i * dst_stride + j] = a + b + c; |
| 233 | dst[(i + 1) * dst_stride + j] = b + c; |
| 234 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 235 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 236 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 237 | // Horizontal sum over 3-pixel regions of dst |
| 238 | for (i = 0; i < height; ++i) { |
| 239 | a = dst[i * dst_stride]; |
| 240 | b = dst[i * dst_stride + 1]; |
| 241 | c = dst[i * dst_stride + 2]; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 242 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 243 | dst[i * dst_stride] = a + b; |
| 244 | for (j = 1; j < width - 2; ++j) { |
| 245 | // Loop invariant: At the start of each iteration, |
| 246 | // a = src[i * src_stride + (j - 1)] |
| 247 | // b = src[i * src_stride + (j )] |
| 248 | // c = src[i * src_stride + (j + 1)] |
| 249 | dst[i * dst_stride + j] = a + b + c; |
| 250 | a = b; |
| 251 | b = c; |
| 252 | c = dst[i * dst_stride + (j + 2)]; |
| 253 | } |
| 254 | dst[i * dst_stride + j] = a + b + c; |
| 255 | dst[i * dst_stride + (j + 1)] = b + c; |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | static void boxsum2(int32_t *src, int width, int height, int src_stride, |
| 260 | int sqr, int32_t *dst, int dst_stride) { |
| 261 | int i, j, a, b, c, d, e; |
| 262 | |
| 263 | // Vertical sum over 5-pixel regions, from src into dst. |
| 264 | if (!sqr) { |
| 265 | for (j = 0; j < width; ++j) { |
| 266 | a = src[j]; |
| 267 | b = src[src_stride + j]; |
| 268 | c = src[2 * src_stride + j]; |
| 269 | d = src[3 * src_stride + j]; |
| 270 | e = src[4 * src_stride + j]; |
| 271 | |
| 272 | dst[j] = a + b + c; |
| 273 | dst[dst_stride + j] = a + b + c + d; |
| 274 | for (i = 2; i < height - 3; ++i) { |
| 275 | // Loop invariant: At the start of each iteration, |
| 276 | // a = src[(i - 2) * src_stride + j] |
| 277 | // b = src[(i - 1) * src_stride + j] |
| 278 | // c = src[(i ) * src_stride + j] |
| 279 | // d = src[(i + 1) * src_stride + j] |
| 280 | // e = src[(i + 2) * src_stride + j] |
| 281 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 282 | a = b; |
| 283 | b = c; |
| 284 | c = d; |
| 285 | d = e; |
| 286 | e = src[(i + 3) * src_stride + j]; |
| 287 | } |
| 288 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 289 | dst[(i + 1) * dst_stride + j] = b + c + d + e; |
| 290 | dst[(i + 2) * dst_stride + j] = c + d + e; |
| 291 | } |
| 292 | } else { |
| 293 | for (j = 0; j < width; ++j) { |
| 294 | a = src[j] * src[j]; |
| 295 | b = src[src_stride + j] * src[src_stride + j]; |
| 296 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 297 | d = src[3 * src_stride + j] * src[3 * src_stride + j]; |
| 298 | e = src[4 * src_stride + j] * src[4 * src_stride + j]; |
| 299 | |
| 300 | dst[j] = a + b + c; |
| 301 | dst[dst_stride + j] = a + b + c + d; |
| 302 | for (i = 2; i < height - 3; ++i) { |
| 303 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 304 | a = b; |
| 305 | b = c; |
| 306 | c = d; |
| 307 | d = e; |
| 308 | e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j]; |
| 309 | } |
| 310 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 311 | dst[(i + 1) * dst_stride + j] = b + c + d + e; |
| 312 | dst[(i + 2) * dst_stride + j] = c + d + e; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | // Horizontal sum over 5-pixel regions of dst |
| 317 | for (i = 0; i < height; ++i) { |
| 318 | a = dst[i * dst_stride]; |
| 319 | b = dst[i * dst_stride + 1]; |
| 320 | c = dst[i * dst_stride + 2]; |
| 321 | d = dst[i * dst_stride + 3]; |
| 322 | e = dst[i * dst_stride + 4]; |
| 323 | |
| 324 | dst[i * dst_stride] = a + b + c; |
| 325 | dst[i * dst_stride + 1] = a + b + c + d; |
| 326 | for (j = 2; j < width - 3; ++j) { |
| 327 | // Loop invariant: At the start of each iteration, |
| 328 | // a = src[i * src_stride + (j - 2)] |
| 329 | // b = src[i * src_stride + (j - 1)] |
| 330 | // c = src[i * src_stride + (j )] |
| 331 | // d = src[i * src_stride + (j + 1)] |
| 332 | // e = src[i * src_stride + (j + 2)] |
| 333 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 334 | a = b; |
| 335 | b = c; |
| 336 | c = d; |
| 337 | d = e; |
| 338 | e = dst[i * dst_stride + (j + 3)]; |
| 339 | } |
| 340 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 341 | dst[i * dst_stride + (j + 1)] = b + c + d + e; |
| 342 | dst[i * dst_stride + (j + 2)] = c + d + e; |
| 343 | } |
| 344 | } |
| 345 | |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 346 | static void boxsum3(int32_t *src, int width, int height, int src_stride, |
| 347 | int sqr, int32_t *dst, int dst_stride) { |
| 348 | int i, j, a, b, c, d, e, f, g; |
| 349 | |
| 350 | // Vertical sum over 7-pixel regions, from src into dst. |
| 351 | if (!sqr) { |
| 352 | for (j = 0; j < width; ++j) { |
| 353 | a = src[j]; |
| 354 | b = src[1 * src_stride + j]; |
| 355 | c = src[2 * src_stride + j]; |
| 356 | d = src[3 * src_stride + j]; |
| 357 | e = src[4 * src_stride + j]; |
| 358 | f = src[5 * src_stride + j]; |
| 359 | g = src[6 * src_stride + j]; |
| 360 | |
| 361 | dst[j] = a + b + c + d; |
| 362 | dst[dst_stride + j] = a + b + c + d + e; |
| 363 | dst[2 * dst_stride + j] = a + b + c + d + e + f; |
| 364 | for (i = 3; i < height - 4; ++i) { |
| 365 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 366 | a = b; |
| 367 | b = c; |
| 368 | c = d; |
| 369 | d = e; |
| 370 | e = f; |
| 371 | f = g; |
| 372 | g = src[(i + 4) * src_stride + j]; |
| 373 | } |
| 374 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 375 | dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g; |
| 376 | dst[(i + 2) * dst_stride + j] = c + d + e + f + g; |
| 377 | dst[(i + 3) * dst_stride + j] = d + e + f + g; |
| 378 | } |
| 379 | } else { |
| 380 | for (j = 0; j < width; ++j) { |
| 381 | a = src[j] * src[j]; |
| 382 | b = src[1 * src_stride + j] * src[1 * src_stride + j]; |
| 383 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 384 | d = src[3 * src_stride + j] * src[3 * src_stride + j]; |
| 385 | e = src[4 * src_stride + j] * src[4 * src_stride + j]; |
| 386 | f = src[5 * src_stride + j] * src[5 * src_stride + j]; |
| 387 | g = src[6 * src_stride + j] * src[6 * src_stride + j]; |
| 388 | |
| 389 | dst[j] = a + b + c + d; |
| 390 | dst[dst_stride + j] = a + b + c + d + e; |
| 391 | dst[2 * dst_stride + j] = a + b + c + d + e + f; |
| 392 | for (i = 3; i < height - 4; ++i) { |
| 393 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 394 | a = b; |
| 395 | b = c; |
| 396 | c = d; |
| 397 | d = e; |
| 398 | e = f; |
| 399 | f = g; |
| 400 | g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j]; |
| 401 | } |
| 402 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 403 | dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g; |
| 404 | dst[(i + 2) * dst_stride + j] = c + d + e + f + g; |
| 405 | dst[(i + 3) * dst_stride + j] = d + e + f + g; |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | // Horizontal sum over 7-pixel regions of dst |
| 410 | for (i = 0; i < height; ++i) { |
| 411 | a = dst[i * dst_stride]; |
| 412 | b = dst[i * dst_stride + 1]; |
| 413 | c = dst[i * dst_stride + 2]; |
| 414 | d = dst[i * dst_stride + 3]; |
| 415 | e = dst[i * dst_stride + 4]; |
| 416 | f = dst[i * dst_stride + 5]; |
| 417 | g = dst[i * dst_stride + 6]; |
| 418 | |
| 419 | dst[i * dst_stride] = a + b + c + d; |
| 420 | dst[i * dst_stride + 1] = a + b + c + d + e; |
| 421 | dst[i * dst_stride + 2] = a + b + c + d + e + f; |
| 422 | for (j = 3; j < width - 4; ++j) { |
| 423 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 424 | a = b; |
| 425 | b = c; |
| 426 | c = d; |
| 427 | d = e; |
| 428 | e = f; |
| 429 | f = g; |
| 430 | g = dst[i * dst_stride + (j + 4)]; |
| 431 | } |
| 432 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 433 | dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g; |
| 434 | dst[i * dst_stride + (j + 2)] = c + d + e + f + g; |
| 435 | dst[i * dst_stride + (j + 3)] = d + e + f + g; |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | // Generic version for any r. To be removed after experiments are done. |
| 440 | static void boxsumr(int32_t *src, int width, int height, int src_stride, int r, |
| 441 | int sqr, int32_t *dst, int dst_stride) { |
| 442 | int32_t *tmp = aom_malloc(width * height * sizeof(*tmp)); |
| 443 | int tmp_stride = width; |
| 444 | int i, j; |
| 445 | if (sqr) { |
| 446 | for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j]; |
| 447 | for (j = 0; j < width; ++j) |
| 448 | for (i = 1; i < height; ++i) |
| 449 | tmp[i * tmp_stride + j] = |
| 450 | tmp[(i - 1) * tmp_stride + j] + |
| 451 | src[i * src_stride + j] * src[i * src_stride + j]; |
| 452 | } else { |
| 453 | memcpy(tmp, src, sizeof(*tmp) * width); |
| 454 | for (j = 0; j < width; ++j) |
| 455 | for (i = 1; i < height; ++i) |
| 456 | tmp[i * tmp_stride + j] = |
| 457 | tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j]; |
| 458 | } |
| 459 | for (i = 0; i <= r; ++i) |
| 460 | memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride], |
| 461 | sizeof(*tmp) * width); |
| 462 | for (i = r + 1; i < height - r; ++i) |
| 463 | for (j = 0; j < width; ++j) |
| 464 | dst[i * dst_stride + j] = |
| 465 | tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j]; |
| 466 | for (i = height - r; i < height; ++i) |
| 467 | for (j = 0; j < width; ++j) |
| 468 | dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] - |
| 469 | tmp[(i - r - 1) * tmp_stride + j]; |
| 470 | |
| 471 | for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride]; |
| 472 | for (i = 0; i < height; ++i) |
| 473 | for (j = 1; j < width; ++j) |
| 474 | tmp[i * tmp_stride + j] = |
| 475 | tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j]; |
| 476 | |
| 477 | for (j = 0; j <= r; ++j) |
| 478 | for (i = 0; i < height; ++i) |
| 479 | dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r]; |
| 480 | for (j = r + 1; j < width - r; ++j) |
| 481 | for (i = 0; i < height; ++i) |
| 482 | dst[i * dst_stride + j] = |
| 483 | tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1]; |
| 484 | for (j = width - r; j < width; ++j) |
| 485 | for (i = 0; i < height; ++i) |
| 486 | dst[i * dst_stride + j] = |
| 487 | tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1]; |
| 488 | aom_free(tmp); |
| 489 | } |
| 490 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 491 | static void boxsum(int32_t *src, int width, int height, int src_stride, int r, |
| 492 | int sqr, int32_t *dst, int dst_stride) { |
| 493 | if (r == 1) |
| 494 | boxsum1(src, width, height, src_stride, sqr, dst, dst_stride); |
| 495 | else if (r == 2) |
| 496 | boxsum2(src, width, height, src_stride, sqr, dst, dst_stride); |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 497 | else if (r == 3) |
| 498 | boxsum3(src, width, height, src_stride, sqr, dst, dst_stride); |
| 499 | else |
| 500 | boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 501 | } |
| 502 | |
| 503 | static void boxnum(int width, int height, int r, int8_t *num, int num_stride) { |
| 504 | int i, j; |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 505 | for (i = 0; i <= r; ++i) { |
| 506 | for (j = 0; j <= r; ++j) { |
| 507 | num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 508 | num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j]; |
| 509 | num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j]; |
| 510 | num[(height - 1 - i) * num_stride + (width - 1 - j)] = |
| 511 | num[i * num_stride + j]; |
| 512 | } |
| 513 | } |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 514 | for (j = 0; j <= r; ++j) { |
| 515 | const int val = (2 * r + 1) * (r + 1 + j); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 516 | for (i = r + 1; i < height - r; ++i) { |
| 517 | num[i * num_stride + j] = val; |
| 518 | num[i * num_stride + (width - 1 - j)] = val; |
| 519 | } |
| 520 | } |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 521 | for (i = 0; i <= r; ++i) { |
| 522 | const int val = (2 * r + 1) * (r + 1 + i); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 523 | for (j = r + 1; j < width - r; ++j) { |
| 524 | num[i * num_stride + j] = val; |
| 525 | num[(height - 1 - i) * num_stride + j] = val; |
| 526 | } |
| 527 | } |
| 528 | for (i = r + 1; i < height - r; ++i) { |
| 529 | for (j = r + 1; j < width - r; ++j) { |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 530 | num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 531 | } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | void decode_xq(int *xqd, int *xq) { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 536 | xq[0] = xqd[0]; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 537 | xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1]; |
| 538 | } |
| 539 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 540 | const int32_t x_by_xplus1[256] = { |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 541 | 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239, |
| 542 | 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247, |
| 543 | 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250, |
| 544 | 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252, |
| 545 | 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253, |
| 546 | 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, |
| 547 | 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254, |
| 548 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 549 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 550 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 551 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 552 | 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 553 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 554 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 555 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 556 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 557 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 558 | 256, |
| 559 | }; |
| 560 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 561 | const int32_t one_by_x[MAX_NELEM] = { |
David Barker | 9198d13 | 2017-02-17 14:27:05 +0000 | [diff] [blame] | 562 | 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315, |
| 563 | 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164, 158, |
| 564 | 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108, 105, |
| 565 | 102, 100, 98, 95, 93, 91, 89, 87, 85, 84 |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 566 | }; |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 567 | |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 568 | static void av1_selfguided_restoration_internal(int32_t *dgd, int width, |
| 569 | int height, int stride, |
| 570 | int bit_depth, int r, int eps, |
| 571 | int32_t *tmpbuf) { |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 572 | int32_t *A = tmpbuf; |
David Barker | cff43bb | 2017-03-08 13:15:17 +0000 | [diff] [blame] | 573 | int32_t *B = A + SGRPROJ_OUTBUF_SIZE; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 574 | int8_t num[RESTORATION_TILEPELS_MAX]; |
| 575 | int i, j; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 576 | // Adjusting the stride of A and B here appears to avoid bad cache effects, |
| 577 | // leading to a significant speed improvement. |
| 578 | // We also align the stride to a multiple of 16 bytes, for consistency |
| 579 | // with the SIMD version of this function. |
| 580 | int buf_stride = ((width + 3) & ~3) + 16; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 581 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 582 | // Don't filter tiles with dimensions < 5 on any axis |
| 583 | if ((width < 5) || (height < 5)) return; |
| 584 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 585 | boxsum(dgd, width, height, stride, r, 0, B, buf_stride); |
| 586 | boxsum(dgd, width, height, stride, r, 1, A, buf_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 587 | boxnum(width, height, r, num, width); |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 588 | assert(r <= 3); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 589 | for (i = 0; i < height; ++i) { |
| 590 | for (j = 0; j < width; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 591 | const int k = i * buf_stride + j; |
| 592 | const int n = num[i * width + j]; |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 593 | |
David Barker | 9198d13 | 2017-02-17 14:27:05 +0000 | [diff] [blame] | 594 | // a < 2^16 * n < 2^22 regardless of bit depth |
| 595 | uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8)); |
| 596 | // b < 2^8 * n < 2^14 regardless of bit depth |
| 597 | uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8); |
| 598 | |
| 599 | // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28, |
| 600 | // and p itself satisfies p < 2^14 * n^2 < 2^26. |
| 601 | // Note: Sometimes, in high bit depth, we can end up with a*n < b*b. |
| 602 | // This is an artefact of rounding, and can only happen if all pixels |
| 603 | // are (almost) identical, so in this case we saturate to p=0. |
| 604 | uint32_t p = (a * n < b * b) ? 0 : a * n - b * b; |
| 605 | uint32_t s = sgrproj_mtable[eps - 1][n - 1]; |
| 606 | |
| 607 | // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32 |
| 608 | // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12 |
| 609 | // (this holds even after accounting for the rounding in s) |
| 610 | const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS); |
| 611 | |
| 612 | A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8 |
| 613 | |
| 614 | // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n, |
| 615 | // one_by_x[n - 1] = round(2^12 / n) |
| 616 | // => the product here is < 2^(20 + bit_depth) <= 2^32, |
| 617 | // and B[k] is set to a value < 2^(8 + bit depth) |
| 618 | B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) * |
| 619 | (uint32_t)B[k] * |
| 620 | (uint32_t)one_by_x[n - 1], |
| 621 | SGRPROJ_RECIP_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 622 | } |
| 623 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 624 | i = 0; |
| 625 | j = 0; |
| 626 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 627 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 628 | const int l = i * stride + j; |
| 629 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 630 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 631 | 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 632 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 633 | 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 634 | const int32_t v = a * dgd[l] + b; |
| 635 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 636 | } |
| 637 | i = 0; |
| 638 | j = width - 1; |
| 639 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 640 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 641 | const int l = i * stride + j; |
| 642 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 643 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 644 | 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 645 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 646 | 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 647 | const int32_t v = a * dgd[l] + b; |
| 648 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 649 | } |
| 650 | i = height - 1; |
| 651 | j = 0; |
| 652 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 653 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 654 | const int l = i * stride + j; |
| 655 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 656 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 657 | 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 658 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 659 | 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 660 | const int32_t v = a * dgd[l] + b; |
| 661 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 662 | } |
| 663 | i = height - 1; |
| 664 | j = width - 1; |
| 665 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 666 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 667 | const int l = i * stride + j; |
| 668 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 669 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 670 | 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 671 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 672 | 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 673 | const int32_t v = a * dgd[l] + b; |
| 674 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 675 | } |
| 676 | i = 0; |
| 677 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 678 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 679 | const int l = i * stride + j; |
| 680 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 681 | const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] + |
| 682 | A[k + buf_stride - 1] + A[k + buf_stride + 1]; |
| 683 | const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] + |
| 684 | B[k + buf_stride - 1] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 685 | const int32_t v = a * dgd[l] + b; |
| 686 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 687 | } |
| 688 | i = height - 1; |
| 689 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 690 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 691 | const int l = i * stride + j; |
| 692 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 693 | const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] + |
| 694 | A[k - buf_stride - 1] + A[k - buf_stride + 1]; |
| 695 | const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] + |
| 696 | B[k - buf_stride - 1] + B[k - buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 697 | const int32_t v = a * dgd[l] + b; |
| 698 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 699 | } |
| 700 | j = 0; |
| 701 | for (i = 1; i < height - 1; ++i) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 702 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 703 | const int l = i * stride + j; |
| 704 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 705 | const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) + |
| 706 | A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1]; |
| 707 | const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) + |
| 708 | B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 709 | const int32_t v = a * dgd[l] + b; |
| 710 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 711 | } |
| 712 | j = width - 1; |
| 713 | for (i = 1; i < height - 1; ++i) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 714 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 715 | const int l = i * stride + j; |
| 716 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 717 | const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) + |
| 718 | A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1]; |
| 719 | const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) + |
| 720 | B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 721 | const int32_t v = a * dgd[l] + b; |
| 722 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 723 | } |
| 724 | for (i = 1; i < height - 1; ++i) { |
| 725 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 726 | const int k = i * buf_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 727 | const int l = i * stride + j; |
| 728 | const int nb = 5; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 729 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 730 | (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) * |
| 731 | 4 + |
| 732 | (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + |
| 733 | A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 734 | 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 735 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 736 | (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) * |
| 737 | 4 + |
| 738 | (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + |
| 739 | B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 740 | 3; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 741 | const int32_t v = a * dgd[l] + b; |
| 742 | dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 743 | } |
| 744 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 745 | } |
| 746 | |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 747 | void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height, |
| 748 | int stride, int32_t *dst, int dst_stride, |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 749 | int r, int eps, int32_t *tmpbuf) { |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 750 | int i, j; |
| 751 | for (i = 0; i < height; ++i) { |
| 752 | for (j = 0; j < width; ++j) { |
| 753 | dst[i * dst_stride + j] = dgd[i * stride + j]; |
| 754 | } |
| 755 | } |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 756 | av1_selfguided_restoration_internal(dst, width, height, dst_stride, 8, r, eps, |
| 757 | tmpbuf); |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 758 | } |
| 759 | |
Urvang Joshi | 0c45941 | 2017-04-21 18:10:09 +0000 | [diff] [blame] | 760 | void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride, |
| 761 | int32_t *dst, int dst_stride, int corner, int edge) { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 762 | int i, j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 763 | const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 764 | |
| 765 | i = 0; |
| 766 | j = 0; |
| 767 | { |
| 768 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 769 | const int l = i * dst_stride + j; |
| 770 | dst[l] = |
| 771 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) + |
| 772 | corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 773 | } |
| 774 | i = 0; |
| 775 | j = width - 1; |
| 776 | { |
| 777 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 778 | const int l = i * dst_stride + j; |
| 779 | dst[l] = |
| 780 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) + |
| 781 | corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 782 | } |
| 783 | i = height - 1; |
| 784 | j = 0; |
| 785 | { |
| 786 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 787 | const int l = i * dst_stride + j; |
| 788 | dst[l] = |
| 789 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) + |
| 790 | corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 791 | } |
| 792 | i = height - 1; |
| 793 | j = width - 1; |
| 794 | { |
| 795 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 796 | const int l = i * dst_stride + j; |
| 797 | dst[l] = |
| 798 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) + |
| 799 | corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 800 | } |
| 801 | i = 0; |
| 802 | for (j = 1; j < width - 1; ++j) { |
| 803 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 804 | const int l = i * dst_stride + j; |
| 805 | dst[l] = center * dgd[k] + |
| 806 | edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) + |
| 807 | corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] + |
| 808 | dgd[k + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 809 | } |
| 810 | i = height - 1; |
| 811 | for (j = 1; j < width - 1; ++j) { |
| 812 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 813 | const int l = i * dst_stride + j; |
| 814 | dst[l] = center * dgd[k] + |
| 815 | edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) + |
| 816 | corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] + |
| 817 | dgd[k + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 818 | } |
| 819 | j = 0; |
| 820 | for (i = 1; i < height - 1; ++i) { |
| 821 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 822 | const int l = i * dst_stride + j; |
| 823 | dst[l] = center * dgd[k] + |
| 824 | edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) + |
| 825 | corner * (dgd[k + stride + 1] + dgd[k - stride + 1] + |
| 826 | dgd[k - stride] + dgd[k + stride]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 827 | } |
| 828 | j = width - 1; |
| 829 | for (i = 1; i < height - 1; ++i) { |
| 830 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 831 | const int l = i * dst_stride + j; |
| 832 | dst[l] = center * dgd[k] + |
| 833 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) + |
| 834 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 835 | dgd[k - stride] + dgd[k + stride]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 836 | } |
| 837 | for (i = 1; i < height - 1; ++i) { |
| 838 | for (j = 1; j < width - 1; ++j) { |
| 839 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 840 | const int l = i * dst_stride + j; |
| 841 | dst[l] = |
| 842 | center * dgd[k] + |
| 843 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) + |
| 844 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 845 | dgd[k - stride + 1] + dgd[k + stride + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 846 | } |
| 847 | } |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 848 | } |
| 849 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 850 | void apply_selfguided_restoration_c(uint8_t *dat, int width, int height, |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 851 | int stride, int eps, int *xqd, uint8_t *dst, |
| 852 | int dst_stride, int32_t *tmpbuf) { |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 853 | int xq[2]; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 854 | int32_t *flt1 = tmpbuf; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 855 | int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 856 | int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 857 | int i, j; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 858 | assert(width * height <= RESTORATION_TILEPELS_MAX); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 859 | #if USE_HIGHPASS_IN_SGRPROJ |
| 860 | av1_highpass_filter_c(dat, width, height, stride, flt1, width, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 861 | sgr_params[eps].corner, sgr_params[eps].edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 862 | #else |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 863 | av1_selfguided_restoration_c(dat, width, height, stride, flt1, width, |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 864 | sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 865 | #endif // USE_HIGHPASS_IN_SGRPROJ |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 866 | av1_selfguided_restoration_c(dat, width, height, stride, flt2, width, |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 867 | sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 868 | decode_xq(xqd, xq); |
| 869 | for (i = 0; i < height; ++i) { |
| 870 | for (j = 0; j < width; ++j) { |
| 871 | const int k = i * width + j; |
| 872 | const int l = i * stride + j; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 873 | const int m = i * dst_stride + j; |
| 874 | const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); |
| 875 | const int32_t f1 = (int32_t)flt1[k] - u; |
| 876 | const int32_t f2 = (int32_t)flt2[k] - u; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 877 | const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 878 | const int16_t w = |
| 879 | (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 880 | dst[m] = clip_pixel(w); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 881 | } |
| 882 | } |
| 883 | } |
| 884 | |
| 885 | static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width, |
| 886 | int height, int stride, |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 887 | RestorationInternal *rst, uint8_t *dst, |
| 888 | int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 889 | const int tile_width = rst->tile_width; |
| 890 | const int tile_height = rst->tile_height; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 891 | int h_start, h_end, v_start, v_end; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 892 | uint8_t *data_p, *dst_p; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 893 | |
Debargha Mukherjee | 994ccd7 | 2017-01-06 11:18:23 -0800 | [diff] [blame] | 894 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 895 | loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst, |
| 896 | dst_stride); |
| 897 | return; |
| 898 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 899 | av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles, |
| 900 | tile_width, tile_height, width, height, 0, 0, |
| 901 | &h_start, &h_end, &v_start, &v_end); |
| 902 | data_p = data + h_start + v_start * stride; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 903 | dst_p = dst + h_start + v_start * dst_stride; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 904 | apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride, |
David Barker | 4d2af5d | 2017-03-09 11:46:50 +0000 | [diff] [blame] | 905 | rst->rsi->sgrproj_info[tile_idx].ep, |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 906 | rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 907 | dst_stride, rst->tmpbuf); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 908 | } |
| 909 | |
| 910 | static void loop_sgrproj_filter(uint8_t *data, int width, int height, |
| 911 | int stride, RestorationInternal *rst, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 912 | uint8_t *dst, int dst_stride) { |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 913 | int tile_idx; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 914 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 915 | loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst, |
| 916 | dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 917 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 918 | } |
| 919 | |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 920 | static void loop_switchable_filter(uint8_t *data, int width, int height, |
| 921 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 922 | uint8_t *dst, int dst_stride) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 923 | int tile_idx; |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 924 | extend_frame(data, width, height, stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 925 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 926 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
| 927 | loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst, |
| 928 | dst_stride); |
| 929 | } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 930 | loop_wiener_filter_tile(data, tile_idx, width, height, stride, rst, dst, |
| 931 | dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 932 | } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) { |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 933 | loop_sgrproj_filter_tile(data, tile_idx, width, height, stride, rst, dst, |
| 934 | dst_stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 935 | } |
| 936 | } |
| 937 | } |
| 938 | |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 939 | #if CONFIG_HIGHBITDEPTH |
David Barker | 33f3bfd | 2017-01-06 15:34:50 +0000 | [diff] [blame] | 940 | void extend_frame_highbd(uint16_t *data, int width, int height, int stride) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 941 | uint16_t *data_p; |
| 942 | int i, j; |
| 943 | for (i = 0; i < height; ++i) { |
| 944 | data_p = data + i * stride; |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 945 | for (j = -WIENER_HALFWIN; j < 0; ++j) data_p[j] = data_p[0]; |
| 946 | for (j = width; j < width + WIENER_HALFWIN; ++j) |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 947 | data_p[j] = data_p[width - 1]; |
| 948 | } |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 949 | data_p = data - WIENER_HALFWIN; |
| 950 | for (i = -WIENER_HALFWIN; i < 0; ++i) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 951 | memcpy(data_p + i * stride, data_p, |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 952 | (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t)); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 953 | } |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 954 | for (i = height; i < height + WIENER_HALFWIN; ++i) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 955 | memcpy(data_p + i * stride, data_p + (height - 1) * stride, |
Debargha Mukherjee | 999d2f6 | 2016-12-15 13:23:21 -0800 | [diff] [blame] | 956 | (width + 2 * WIENER_HALFWIN) * sizeof(uint16_t)); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 957 | } |
| 958 | } |
| 959 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 960 | static void loop_copy_tile_highbd(uint16_t *data, int tile_idx, int subtile_idx, |
| 961 | int subtile_bits, int width, int height, |
| 962 | int stride, RestorationInternal *rst, |
| 963 | uint16_t *dst, int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 964 | const int tile_width = rst->tile_width; |
| 965 | const int tile_height = rst->tile_height; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 966 | int i; |
| 967 | int h_start, h_end, v_start, v_end; |
| 968 | av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, rst->nhtiles, |
| 969 | rst->nvtiles, tile_width, tile_height, width, height, |
| 970 | 0, 0, &h_start, &h_end, &v_start, &v_end); |
| 971 | for (i = v_start; i < v_end; ++i) |
| 972 | memcpy(dst + i * dst_stride + h_start, data + i * stride + h_start, |
| 973 | (h_end - h_start) * sizeof(*dst)); |
| 974 | } |
| 975 | |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 976 | static void loop_wiener_filter_tile_highbd(uint16_t *data, int tile_idx, |
| 977 | int width, int height, int stride, |
| 978 | RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 979 | int bit_depth, uint16_t *dst, |
| 980 | int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 981 | const int tile_width = rst->tile_width; |
| 982 | const int tile_height = rst->tile_height; |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 983 | int h_start, h_end, v_start, v_end; |
| 984 | int i, j; |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 985 | |
Debargha Mukherjee | 994ccd7 | 2017-01-06 11:18:23 -0800 | [diff] [blame] | 986 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 987 | loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst, |
| 988 | dst_stride); |
| 989 | return; |
| 990 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 991 | av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 992 | tile_width, tile_height, width, height, 0, 0, |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 993 | &h_start, &h_end, &v_start, &v_end); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 994 | // Convolve the whole tile (done in blocks here to match the requirements |
| 995 | // of the vectorized convolve functions, but the result is equivalent) |
| 996 | for (i = v_start; i < v_end; i += MAX_SB_SIZE) |
| 997 | for (j = h_start; j < h_end; j += MAX_SB_SIZE) { |
| 998 | int w = AOMMIN(MAX_SB_SIZE, (h_end - j + 15) & ~15); |
| 999 | int h = AOMMIN(MAX_SB_SIZE, (v_end - i + 15) & ~15); |
| 1000 | const uint16_t *data_p = data + i * stride + j; |
| 1001 | uint16_t *dst_p = dst + i * dst_stride + j; |
Debargha Mukherjee | 28d15c7 | 2017-05-12 10:44:03 -0700 | [diff] [blame] | 1002 | #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
| 1003 | aom_highbd_convolve8_add_src_hip( |
| 1004 | CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p), |
| 1005 | dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16, |
| 1006 | rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth); |
| 1007 | #else |
David Barker | 1e8e6b9 | 2017-01-13 13:45:51 +0000 | [diff] [blame] | 1008 | aom_highbd_convolve8_add_src( |
| 1009 | CONVERT_TO_BYTEPTR(data_p), stride, CONVERT_TO_BYTEPTR(dst_p), |
| 1010 | dst_stride, rst->rsi->wiener_info[tile_idx].hfilter, 16, |
| 1011 | rst->rsi->wiener_info[tile_idx].vfilter, 16, w, h, bit_depth); |
Debargha Mukherjee | 28d15c7 | 2017-05-12 10:44:03 -0700 | [diff] [blame] | 1012 | #endif // USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1013 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1014 | } |
| 1015 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1016 | static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, |
| 1017 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1018 | int bit_depth, uint8_t *dst8, |
| 1019 | int dst_stride) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1020 | uint16_t *data = CONVERT_TO_SHORTPTR(data8); |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1021 | uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 1022 | int tile_idx; |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 1023 | extend_frame_highbd(data, width, height, stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1024 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1025 | loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 1026 | bit_depth, dst, dst_stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1027 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1028 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1029 | |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1030 | void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height, |
| 1031 | int stride, int32_t *dst, |
| 1032 | int dst_stride, int bit_depth, int r, |
| 1033 | int eps, int32_t *tmpbuf) { |
| 1034 | int i, j; |
| 1035 | for (i = 0; i < height; ++i) { |
| 1036 | for (j = 0; j < width; ++j) { |
| 1037 | dst[i * dst_stride + j] = dgd[i * stride + j]; |
| 1038 | } |
| 1039 | } |
| 1040 | av1_selfguided_restoration_internal(dst, width, height, dst_stride, bit_depth, |
| 1041 | r, eps, tmpbuf); |
| 1042 | } |
| 1043 | |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1044 | void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height, |
| 1045 | int stride, int32_t *dst, int dst_stride, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1046 | int corner, int edge) { |
Urvang Joshi | 0c45941 | 2017-04-21 18:10:09 +0000 | [diff] [blame] | 1047 | int i, j; |
| 1048 | const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge); |
| 1049 | |
| 1050 | i = 0; |
| 1051 | j = 0; |
| 1052 | { |
| 1053 | const int k = i * stride + j; |
| 1054 | const int l = i * dst_stride + j; |
| 1055 | dst[l] = |
| 1056 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) + |
| 1057 | corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]); |
| 1058 | } |
| 1059 | i = 0; |
| 1060 | j = width - 1; |
| 1061 | { |
| 1062 | const int k = i * stride + j; |
| 1063 | const int l = i * dst_stride + j; |
| 1064 | dst[l] = |
| 1065 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) + |
| 1066 | corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]); |
| 1067 | } |
| 1068 | i = height - 1; |
| 1069 | j = 0; |
| 1070 | { |
| 1071 | const int k = i * stride + j; |
| 1072 | const int l = i * dst_stride + j; |
| 1073 | dst[l] = |
| 1074 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) + |
| 1075 | corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]); |
| 1076 | } |
| 1077 | i = height - 1; |
| 1078 | j = width - 1; |
| 1079 | { |
| 1080 | const int k = i * stride + j; |
| 1081 | const int l = i * dst_stride + j; |
| 1082 | dst[l] = |
| 1083 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) + |
| 1084 | corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]); |
| 1085 | } |
| 1086 | i = 0; |
| 1087 | for (j = 1; j < width - 1; ++j) { |
| 1088 | const int k = i * stride + j; |
| 1089 | const int l = i * dst_stride + j; |
| 1090 | dst[l] = center * dgd[k] + |
| 1091 | edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) + |
| 1092 | corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] + |
| 1093 | dgd[k + 1]); |
| 1094 | } |
| 1095 | i = height - 1; |
| 1096 | for (j = 1; j < width - 1; ++j) { |
| 1097 | const int k = i * stride + j; |
| 1098 | const int l = i * dst_stride + j; |
| 1099 | dst[l] = center * dgd[k] + |
| 1100 | edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) + |
| 1101 | corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] + |
| 1102 | dgd[k + 1]); |
| 1103 | } |
| 1104 | j = 0; |
| 1105 | for (i = 1; i < height - 1; ++i) { |
| 1106 | const int k = i * stride + j; |
| 1107 | const int l = i * dst_stride + j; |
| 1108 | dst[l] = center * dgd[k] + |
| 1109 | edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) + |
| 1110 | corner * (dgd[k + stride + 1] + dgd[k - stride + 1] + |
| 1111 | dgd[k - stride] + dgd[k + stride]); |
| 1112 | } |
| 1113 | j = width - 1; |
| 1114 | for (i = 1; i < height - 1; ++i) { |
| 1115 | const int k = i * stride + j; |
| 1116 | const int l = i * dst_stride + j; |
| 1117 | dst[l] = center * dgd[k] + |
| 1118 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) + |
| 1119 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1120 | dgd[k - stride] + dgd[k + stride]); |
| 1121 | } |
| 1122 | for (i = 1; i < height - 1; ++i) { |
| 1123 | for (j = 1; j < width - 1; ++j) { |
| 1124 | const int k = i * stride + j; |
| 1125 | const int l = i * dst_stride + j; |
| 1126 | dst[l] = |
| 1127 | center * dgd[k] + |
| 1128 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) + |
| 1129 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1130 | dgd[k - stride + 1] + dgd[k + stride + 1]); |
| 1131 | } |
| 1132 | } |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1133 | } |
| 1134 | |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1135 | void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height, |
| 1136 | int stride, int bit_depth, int eps, |
| 1137 | int *xqd, uint16_t *dst, |
| 1138 | int dst_stride, int32_t *tmpbuf) { |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1139 | int xq[2]; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1140 | int32_t *flt1 = tmpbuf; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1141 | int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1142 | int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1143 | int i, j; |
David Barker | 0b04e9b | 2017-01-18 15:29:20 +0000 | [diff] [blame] | 1144 | assert(width * height <= RESTORATION_TILEPELS_MAX); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1145 | #if USE_HIGHPASS_IN_SGRPROJ |
| 1146 | av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1147 | sgr_params[eps].corner, sgr_params[eps].edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1148 | #else |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1149 | av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width, |
| 1150 | bit_depth, sgr_params[eps].r1, |
| 1151 | sgr_params[eps].e1, tmpbuf2); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1152 | #endif // USE_HIGHPASS_IN_SGRPROJ |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1153 | av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width, |
| 1154 | bit_depth, sgr_params[eps].r2, |
| 1155 | sgr_params[eps].e2, tmpbuf2); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1156 | decode_xq(xqd, xq); |
| 1157 | for (i = 0; i < height; ++i) { |
| 1158 | for (j = 0; j < width; ++j) { |
| 1159 | const int k = i * width + j; |
| 1160 | const int l = i * stride + j; |
| 1161 | const int m = i * dst_stride + j; |
| 1162 | const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); |
| 1163 | const int32_t f1 = (int32_t)flt1[k] - u; |
| 1164 | const int32_t f2 = (int32_t)flt2[k] - u; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 1165 | const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1166 | const int16_t w = |
| 1167 | (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); |
| 1168 | dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth); |
| 1169 | } |
| 1170 | } |
| 1171 | } |
| 1172 | |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1173 | static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, |
| 1174 | int width, int height, int stride, |
| 1175 | RestorationInternal *rst, |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 1176 | int bit_depth, uint16_t *dst, |
| 1177 | int dst_stride) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1178 | const int tile_width = rst->tile_width; |
| 1179 | const int tile_height = rst->tile_height; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1180 | int h_start, h_end, v_start, v_end; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1181 | uint16_t *data_p, *dst_p; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1182 | |
Debargha Mukherjee | 994ccd7 | 2017-01-06 11:18:23 -0800 | [diff] [blame] | 1183 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1184 | loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst, |
| 1185 | dst_stride); |
| 1186 | return; |
| 1187 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1188 | av1_get_rest_tile_limits(tile_idx, 0, 0, rst->nhtiles, rst->nvtiles, |
| 1189 | tile_width, tile_height, width, height, 0, 0, |
| 1190 | &h_start, &h_end, &v_start, &v_end); |
| 1191 | data_p = data + h_start + v_start * stride; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1192 | dst_p = dst + h_start + v_start * dst_stride; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1193 | apply_selfguided_restoration_highbd( |
| 1194 | data_p, h_end - h_start, v_end - v_start, stride, bit_depth, |
| 1195 | rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd, |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1196 | dst_p, dst_stride, rst->tmpbuf); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1197 | } |
| 1198 | |
| 1199 | static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height, |
| 1200 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1201 | int bit_depth, uint8_t *dst8, |
| 1202 | int dst_stride) { |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1203 | int tile_idx; |
| 1204 | uint16_t *data = CONVERT_TO_SHORTPTR(data8); |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1205 | uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1206 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
| 1207 | loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, rst, |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 1208 | bit_depth, dst, dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1209 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1210 | } |
| 1211 | |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1212 | static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height, |
| 1213 | int stride, RestorationInternal *rst, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1214 | int bit_depth, uint8_t *dst8, |
| 1215 | int dst_stride) { |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1216 | uint16_t *data = CONVERT_TO_SHORTPTR(data8); |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1217 | uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 1218 | int tile_idx; |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 1219 | extend_frame_highbd(data, width, height, stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1220 | for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1221 | if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) { |
| 1222 | loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, |
| 1223 | dst, dst_stride); |
| 1224 | } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_WIENER) { |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1225 | loop_wiener_filter_tile_highbd(data, tile_idx, width, height, stride, rst, |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 1226 | bit_depth, dst, dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1227 | } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) { |
| 1228 | loop_sgrproj_filter_tile_highbd(data, tile_idx, width, height, stride, |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 1229 | rst, bit_depth, dst, dst_stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1230 | } |
| 1231 | } |
| 1232 | } |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1233 | #endif // CONFIG_HIGHBITDEPTH |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1234 | |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1235 | static void loop_restoration_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, |
| 1236 | int start_mi_row, int end_mi_row, |
| 1237 | int components_pattern, RestorationInfo *rsi, |
| 1238 | YV12_BUFFER_CONFIG *dst) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1239 | const int ywidth = frame->y_crop_width; |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1240 | const int yheight = frame->y_crop_height; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1241 | const int uvwidth = frame->uv_crop_width; |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1242 | const int uvheight = frame->uv_crop_height; |
| 1243 | const int ystride = frame->y_stride; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1244 | const int uvstride = frame->uv_stride; |
| 1245 | const int ystart = start_mi_row << MI_SIZE_LOG2; |
| 1246 | const int uvstart = ystart >> cm->subsampling_y; |
| 1247 | int yend = end_mi_row << MI_SIZE_LOG2; |
| 1248 | int uvend = yend >> cm->subsampling_y; |
Debargha Mukherjee | b3c43bc | 2017-02-01 13:09:03 -0800 | [diff] [blame] | 1249 | restore_func_type restore_funcs[RESTORE_TYPES] = { |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 1250 | NULL, loop_wiener_filter, loop_sgrproj_filter, loop_switchable_filter |
Debargha Mukherjee | b3c43bc | 2017-02-01 13:09:03 -0800 | [diff] [blame] | 1251 | }; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1252 | #if CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1253 | restore_func_highbd_type restore_funcs_highbd[RESTORE_TYPES] = { |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 1254 | NULL, loop_wiener_filter_highbd, loop_sgrproj_filter_highbd, |
Debargha Mukherjee | b3c43bc | 2017-02-01 13:09:03 -0800 | [diff] [blame] | 1255 | loop_switchable_filter_highbd |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1256 | }; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1257 | #endif // CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1258 | restore_func_type restore_func; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1259 | #if CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1260 | restore_func_highbd_type restore_func_highbd; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1261 | #endif // CONFIG_HIGHBITDEPTH |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1262 | YV12_BUFFER_CONFIG dst_; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1263 | |
Fergus Simpson | 9cd57cf | 2017-06-12 17:02:03 -0700 | [diff] [blame] | 1264 | yend = AOMMIN(yend, yheight); |
| 1265 | uvend = AOMMIN(uvend, uvheight); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1266 | |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1267 | if (components_pattern == (1 << AOM_PLANE_Y)) { |
| 1268 | // Only y |
| 1269 | if (rsi[0].frame_restoration_type == RESTORE_NONE) { |
| 1270 | if (dst) aom_yv12_copy_y(frame, dst); |
| 1271 | return; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1272 | } |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1273 | } else if (components_pattern == (1 << AOM_PLANE_U)) { |
| 1274 | // Only U |
| 1275 | if (rsi[1].frame_restoration_type == RESTORE_NONE) { |
| 1276 | if (dst) aom_yv12_copy_u(frame, dst); |
| 1277 | return; |
| 1278 | } |
| 1279 | } else if (components_pattern == (1 << AOM_PLANE_V)) { |
| 1280 | // Only V |
| 1281 | if (rsi[2].frame_restoration_type == RESTORE_NONE) { |
| 1282 | if (dst) aom_yv12_copy_v(frame, dst); |
| 1283 | return; |
| 1284 | } |
| 1285 | } else if (components_pattern == |
| 1286 | ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) { |
| 1287 | // All components |
| 1288 | if (rsi[0].frame_restoration_type == RESTORE_NONE && |
| 1289 | rsi[1].frame_restoration_type == RESTORE_NONE && |
| 1290 | rsi[2].frame_restoration_type == RESTORE_NONE) { |
| 1291 | if (dst) aom_yv12_copy_frame(frame, dst); |
| 1292 | return; |
| 1293 | } |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1294 | } |
| 1295 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1296 | if (!dst) { |
| 1297 | dst = &dst_; |
| 1298 | memset(dst, 0, sizeof(YV12_BUFFER_CONFIG)); |
| 1299 | if (aom_realloc_frame_buffer( |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1300 | dst, ywidth, yheight, cm->subsampling_x, cm->subsampling_y, |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1301 | #if CONFIG_HIGHBITDEPTH |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1302 | cm->use_highbitdepth, |
| 1303 | #endif |
| 1304 | AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0) |
| 1305 | aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, |
| 1306 | "Failed to allocate restoration dst buffer"); |
| 1307 | } |
Debargha Mukherjee | 818e42a | 2016-12-12 11:52:56 -0800 | [diff] [blame] | 1308 | |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1309 | if ((components_pattern >> AOM_PLANE_Y) & 1) { |
| 1310 | if (rsi[0].frame_restoration_type != RESTORE_NONE) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1311 | cm->rst_internal.ntiles = av1_get_rest_ntiles( |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1312 | ywidth, yheight, cm->rst_info[AOM_PLANE_Y].restoration_tilesize, |
Debargha Mukherjee | 1008c1e | 2017-03-06 19:18:43 -0800 | [diff] [blame] | 1313 | &cm->rst_internal.tile_width, &cm->rst_internal.tile_height, |
| 1314 | &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles); |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1315 | cm->rst_internal.rsi = &rsi[0]; |
| 1316 | restore_func = |
| 1317 | restore_funcs[cm->rst_internal.rsi->frame_restoration_type]; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1318 | #if CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1319 | restore_func_highbd = |
| 1320 | restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type]; |
| 1321 | if (cm->use_highbitdepth) |
| 1322 | restore_func_highbd( |
| 1323 | frame->y_buffer + ystart * ystride, ywidth, yend - ystart, ystride, |
| 1324 | &cm->rst_internal, cm->bit_depth, |
| 1325 | dst->y_buffer + ystart * dst->y_stride, dst->y_stride); |
| 1326 | else |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1327 | #endif // CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1328 | restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart, |
| 1329 | ystride, &cm->rst_internal, |
| 1330 | dst->y_buffer + ystart * dst->y_stride, dst->y_stride); |
| 1331 | } else { |
| 1332 | aom_yv12_copy_y(frame, dst); |
| 1333 | } |
| 1334 | } |
| 1335 | |
| 1336 | if ((components_pattern >> AOM_PLANE_U) & 1) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1337 | if (rsi[AOM_PLANE_U].frame_restoration_type != RESTORE_NONE) { |
| 1338 | cm->rst_internal.ntiles = av1_get_rest_ntiles( |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1339 | uvwidth, uvheight, cm->rst_info[AOM_PLANE_U].restoration_tilesize, |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1340 | &cm->rst_internal.tile_width, &cm->rst_internal.tile_height, |
| 1341 | &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles); |
| 1342 | cm->rst_internal.rsi = &rsi[AOM_PLANE_U]; |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1343 | restore_func = |
| 1344 | restore_funcs[cm->rst_internal.rsi->frame_restoration_type]; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1345 | #if CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1346 | restore_func_highbd = |
| 1347 | restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type]; |
| 1348 | if (cm->use_highbitdepth) |
| 1349 | restore_func_highbd( |
| 1350 | frame->u_buffer + uvstart * uvstride, uvwidth, uvend - uvstart, |
| 1351 | uvstride, &cm->rst_internal, cm->bit_depth, |
| 1352 | dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride); |
| 1353 | else |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1354 | #endif // CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1355 | restore_func(frame->u_buffer + uvstart * uvstride, uvwidth, |
| 1356 | uvend - uvstart, uvstride, &cm->rst_internal, |
| 1357 | dst->u_buffer + uvstart * dst->uv_stride, dst->uv_stride); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1358 | } else { |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1359 | aom_yv12_copy_u(frame, dst); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1360 | } |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1361 | } |
| 1362 | |
| 1363 | if ((components_pattern >> AOM_PLANE_V) & 1) { |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1364 | if (rsi[AOM_PLANE_V].frame_restoration_type != RESTORE_NONE) { |
| 1365 | cm->rst_internal.ntiles = av1_get_rest_ntiles( |
Debargha Mukherjee | 2dd982e | 2017-06-05 13:55:12 -0700 | [diff] [blame] | 1366 | uvwidth, uvheight, cm->rst_info[AOM_PLANE_V].restoration_tilesize, |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1367 | &cm->rst_internal.tile_width, &cm->rst_internal.tile_height, |
| 1368 | &cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles); |
| 1369 | cm->rst_internal.rsi = &rsi[AOM_PLANE_V]; |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1370 | restore_func = |
| 1371 | restore_funcs[cm->rst_internal.rsi->frame_restoration_type]; |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1372 | #if CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1373 | restore_func_highbd = |
| 1374 | restore_funcs_highbd[cm->rst_internal.rsi->frame_restoration_type]; |
| 1375 | if (cm->use_highbitdepth) |
| 1376 | restore_func_highbd( |
| 1377 | frame->v_buffer + uvstart * uvstride, uvwidth, uvend - uvstart, |
| 1378 | uvstride, &cm->rst_internal, cm->bit_depth, |
| 1379 | dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride); |
| 1380 | else |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1381 | #endif // CONFIG_HIGHBITDEPTH |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1382 | restore_func(frame->v_buffer + uvstart * uvstride, uvwidth, |
| 1383 | uvend - uvstart, uvstride, &cm->rst_internal, |
| 1384 | dst->v_buffer + uvstart * dst->uv_stride, dst->uv_stride); |
| 1385 | } else { |
| 1386 | aom_yv12_copy_v(frame, dst); |
| 1387 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1388 | } |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1389 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1390 | if (dst == &dst_) { |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1391 | if ((components_pattern >> AOM_PLANE_Y) & 1) aom_yv12_copy_y(dst, frame); |
| 1392 | if ((components_pattern >> AOM_PLANE_U) & 1) aom_yv12_copy_u(dst, frame); |
| 1393 | if ((components_pattern >> AOM_PLANE_V) & 1) aom_yv12_copy_v(dst, frame); |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1394 | aom_free_frame_buffer(dst); |
| 1395 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1396 | } |
| 1397 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 1398 | void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1399 | RestorationInfo *rsi, int components_pattern, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1400 | int partial_frame, YV12_BUFFER_CONFIG *dst) { |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1401 | int start_mi_row, end_mi_row, mi_rows_to_filter; |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1402 | start_mi_row = 0; |
Fergus Simpson | 9cd57cf | 2017-06-12 17:02:03 -0700 | [diff] [blame] | 1403 | #if CONFIG_FRAME_SUPERRES |
| 1404 | mi_rows_to_filter = |
| 1405 | ALIGN_POWER_OF_TWO(cm->superres_upscaled_height, 3) >> MI_SIZE_LOG2; |
| 1406 | #else |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1407 | mi_rows_to_filter = cm->mi_rows; |
Fergus Simpson | 9cd57cf | 2017-06-12 17:02:03 -0700 | [diff] [blame] | 1408 | #endif // CONFIG_FRAME_SUPERRES |
| 1409 | if (partial_frame && mi_rows_to_filter > 8) { |
| 1410 | start_mi_row = mi_rows_to_filter >> 1; |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1411 | start_mi_row &= 0xfffffff8; |
Fergus Simpson | 9cd57cf | 2017-06-12 17:02:03 -0700 | [diff] [blame] | 1412 | mi_rows_to_filter = AOMMAX(mi_rows_to_filter / 8, 8); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1413 | } |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1414 | end_mi_row = start_mi_row + mi_rows_to_filter; |
Debargha Mukherjee | d748914 | 2017-01-05 13:58:16 -0800 | [diff] [blame] | 1415 | loop_restoration_init(&cm->rst_internal, cm->frame_type == KEY_FRAME); |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1416 | loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, components_pattern, |
| 1417 | rsi, dst); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1418 | } |