Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | /* |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | * |
Yaowu Xu | 2ab7ff0 | 2016-09-02 12:04:54 -0700 | [diff] [blame] | 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | * |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 11 | */ |
| 12 | |
| 13 | #include <math.h> |
| 14 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 15 | #include "./aom_config.h" |
| 16 | #include "./aom_dsp_rtcd.h" |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 17 | #include "./aom_scale_rtcd.h" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 18 | #include "av1/common/onyxc_int.h" |
| 19 | #include "av1/common/restoration.h" |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 20 | #include "aom_dsp/aom_dsp_common.h" |
| 21 | #include "aom_mem/aom_mem.h" |
Debargha Mukherjee | 76be32d | 2017-08-15 16:45:13 -0700 | [diff] [blame] | 22 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 23 | #include "aom_ports/mem.h" |
| 24 | |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 25 | const sgr_params_type sgr_params[SGRPROJ_PARAMS] = { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 26 | #if USE_HIGHPASS_IN_SGRPROJ |
| 27 | // corner, edge, r2, eps2 |
| 28 | { -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 }, |
| 29 | { -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 }, |
| 30 | { -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 }, |
| 31 | { -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 } |
| 32 | #else |
Debargha Mukherjee | 76be32d | 2017-08-15 16:45:13 -0700 | [diff] [blame] | 33 | // r1, eps1, r2, eps2 |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 34 | #if MAX_RADIUS == 2 |
Debargha Mukherjee | 76be32d | 2017-08-15 16:45:13 -0700 | [diff] [blame] | 35 | { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 }, |
| 36 | { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 }, |
Debargha Mukherjee | e5fabfb | 2017-10-10 09:10:24 -0700 | [diff] [blame] | 37 | { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 6 }, |
Debargha Mukherjee | 76be32d | 2017-08-15 16:45:13 -0700 | [diff] [blame] | 38 | { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 }, |
| 39 | #else |
Debargha Mukherjee | b3c43bc | 2017-02-01 13:09:03 -0800 | [diff] [blame] | 40 | { 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 }, |
| 41 | { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 }, |
| 42 | { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 }, |
| 43 | { 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 }, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 44 | #endif // MAX_RADIUS == 2 |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 45 | #endif |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 46 | }; |
| 47 | |
David Barker | befcc42 | 2017-01-31 09:42:10 +0000 | [diff] [blame] | 48 | int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info, |
| 49 | int width, int height) { |
Debargha Mukherjee | 1008c1e | 2017-03-06 19:18:43 -0800 | [diff] [blame] | 50 | const int ntiles = av1_get_rest_ntiles( |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 51 | width, height, rst_info->restoration_tilesize, NULL, NULL); |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 52 | aom_free(rst_info->unit_info); |
David Barker | befcc42 | 2017-01-31 09:42:10 +0000 | [diff] [blame] | 53 | CHECK_MEM_ERROR( |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 54 | cm, rst_info->unit_info, |
| 55 | (RestorationUnitInfo *)aom_malloc(sizeof(*rst_info->unit_info) * ntiles)); |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 56 | return ntiles; |
| 57 | } |
| 58 | |
| 59 | void av1_free_restoration_struct(RestorationInfo *rst_info) { |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 60 | aom_free(rst_info->unit_info); |
| 61 | rst_info->unit_info = NULL; |
Debargha Mukherjee | 874d36d | 2016-12-14 16:53:17 -0800 | [diff] [blame] | 62 | } |
| 63 | |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 64 | // TODO(debargha): This table can be substantially reduced since only a few |
| 65 | // values are actually used. |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 66 | int sgrproj_mtable[MAX_EPS][MAX_NELEM]; |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 67 | |
| 68 | static void GenSgrprojVtable() { |
| 69 | int e, n; |
| 70 | for (e = 1; e <= MAX_EPS; ++e) |
| 71 | for (n = 1; n <= MAX_NELEM; ++n) { |
| 72 | const int n2e = n * n * e; |
| 73 | sgrproj_mtable[e - 1][n - 1] = |
| 74 | (((1 << SGRPROJ_MTABLE_BITS) + n2e / 2) / n2e); |
| 75 | } |
| 76 | } |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 77 | |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 78 | void av1_loop_restoration_precal() { GenSgrprojVtable(); } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 79 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 80 | static void extend_frame_lowbd(uint8_t *data, int width, int height, int stride, |
| 81 | int border_horz, int border_vert) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 82 | uint8_t *data_p; |
| 83 | int i; |
| 84 | for (i = 0; i < height; ++i) { |
| 85 | data_p = data + i * stride; |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 86 | memset(data_p - border_horz, data_p[0], border_horz); |
| 87 | memset(data_p + width, data_p[width - 1], border_horz); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 88 | } |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 89 | data_p = data - border_horz; |
| 90 | for (i = -border_vert; i < 0; ++i) { |
| 91 | memcpy(data_p + i * stride, data_p, width + 2 * border_horz); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 92 | } |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 93 | for (i = height; i < height + border_vert; ++i) { |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 94 | memcpy(data_p + i * stride, data_p + (height - 1) * stride, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 95 | width + 2 * border_horz); |
David Barker | 025b254 | 2016-12-08 11:50:42 +0000 | [diff] [blame] | 96 | } |
| 97 | } |
| 98 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 99 | #if CONFIG_HIGHBITDEPTH |
| 100 | static void extend_frame_highbd(uint16_t *data, int width, int height, |
| 101 | int stride, int border_horz, int border_vert) { |
| 102 | uint16_t *data_p; |
| 103 | int i, j; |
| 104 | for (i = 0; i < height; ++i) { |
| 105 | data_p = data + i * stride; |
| 106 | for (j = -border_horz; j < 0; ++j) data_p[j] = data_p[0]; |
| 107 | for (j = width; j < width + border_horz; ++j) data_p[j] = data_p[width - 1]; |
| 108 | } |
| 109 | data_p = data - border_horz; |
| 110 | for (i = -border_vert; i < 0; ++i) { |
| 111 | memcpy(data_p + i * stride, data_p, |
| 112 | (width + 2 * border_horz) * sizeof(uint16_t)); |
| 113 | } |
| 114 | for (i = height; i < height + border_vert; ++i) { |
| 115 | memcpy(data_p + i * stride, data_p + (height - 1) * stride, |
| 116 | (width + 2 * border_horz) * sizeof(uint16_t)); |
| 117 | } |
| 118 | } |
| 119 | #endif |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 120 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 121 | void extend_frame(uint8_t *data, int width, int height, int stride, |
| 122 | int border_horz, int border_vert, int highbd) { |
| 123 | #if !CONFIG_HIGHBITDEPTH |
| 124 | assert(highbd == 0); |
| 125 | (void)highbd; |
| 126 | #else |
| 127 | if (highbd) |
| 128 | extend_frame_highbd(CONVERT_TO_SHORTPTR(data), width, height, stride, |
| 129 | border_horz, border_vert); |
| 130 | else |
| 131 | #endif |
| 132 | extend_frame_lowbd(data, width, height, stride, border_horz, border_vert); |
| 133 | } |
| 134 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 135 | static void copy_tile_lowbd(int width, int height, const uint8_t *src, |
| 136 | int src_stride, uint8_t *dst, int dst_stride) { |
| 137 | for (int i = 0; i < height; ++i) |
| 138 | memcpy(dst + i * dst_stride, src + i * src_stride, width); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 139 | } |
| 140 | |
| 141 | #if CONFIG_HIGHBITDEPTH |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 142 | static void copy_tile_highbd(int width, int height, const uint16_t *src, |
| 143 | int src_stride, uint16_t *dst, int dst_stride) { |
| 144 | for (int i = 0; i < height; ++i) |
| 145 | memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst)); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 146 | } |
| 147 | #endif |
| 148 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 149 | static void copy_tile(int width, int height, const uint8_t *src, int src_stride, |
| 150 | uint8_t *dst, int dst_stride, int highbd) { |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 151 | #if !CONFIG_HIGHBITDEPTH |
| 152 | assert(highbd == 0); |
| 153 | (void)highbd; |
| 154 | #else |
| 155 | if (highbd) |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 156 | copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 157 | CONVERT_TO_SHORTPTR(dst), dst_stride); |
| 158 | else |
| 159 | #endif |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 160 | copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 161 | } |
| 162 | |
| 163 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 164 | #define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d)) |
| 165 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 166 | // With striped loop restoration, the filtering for each 64-pixel stripe gets |
| 167 | // most of its input from the output of CDEF (stored in data8), but pixels just |
| 168 | // above and below the stripe come straight from the deblocker. These have been |
| 169 | // stored away in separate buffers. |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 170 | // |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 171 | // This function modifies data8 (which was the output from CDEF) by copying in |
| 172 | // the boundary pixels. Before doing so, it saves the pixels that get |
| 173 | // overwritten into a temporary buffer. They will be restored again by |
| 174 | // restore_processing_stripe_boundary. |
| 175 | // |
| 176 | // limits gives the rectangular limits of the remaining stripes for the current |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 177 | // restoration unit. rsb is the stored stripe boundaries (the saved output from |
| 178 | // the deblocker). stripe_height is the height of each stripe. ss_y is true if |
| 179 | // we're on a chroma plane with vertical subsampling. use_highbd is true if the |
| 180 | // data has 2 bytes per pixel. rlbs contain scratch buffers to hold the CDEF |
| 181 | // data (written back to the frame by restore_processing_stripe_boundary) |
| 182 | static int setup_processing_stripe_boundary( |
| 183 | const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb, |
| 184 | int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride, |
| 185 | RestorationLineBuffers *rlbs) { |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 186 | // Which stripe is this? limits->v_start is the top of the stripe in pixel |
| 187 | // units, but we add tile_offset to get the number of pixels from the top of |
| 188 | // the first stripe, which lies off the image. |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 189 | const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y; |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 190 | const int stripe_index = (limits->v_start + tile_offset) / stripe_height; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 191 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 192 | // Horizontal offsets within the line buffers. The buffer logically starts at |
| 193 | // column -RESTORATION_EXTRA_HORZ. We'll start our copy from the column |
| 194 | // limits->h_start - RESTORATION_EXTRA_HORZ and copy up to the column |
| 195 | // limits->h_end + RESTORATION_EXTRA_HORZ. |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 196 | const int buf_stride = rsb->stripe_boundary_stride; |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 197 | const int buf_x0_off = limits->h_start; |
| 198 | const int line_width = |
| 199 | (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ; |
| 200 | const int line_size = line_width << use_highbd; |
| 201 | const int data_x0_off = limits->h_start - RESTORATION_EXTRA_HORZ; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 202 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 203 | assert(CONFIG_HIGHBITDEPTH || !use_highbd); |
| 204 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 205 | // Replace the pixels above the top of the stripe, unless this is the top of |
| 206 | // the image. |
| 207 | if (stripe_index > 0) { |
| 208 | const int above_buf_y = 2 * (stripe_index - 1); |
| 209 | uint8_t *data8_tl = data8 + (limits->v_start - 2) * stride + data_x0_off; |
| 210 | |
| 211 | for (int i = 0; i < 2; ++i) { |
| 212 | const int buf_off = buf_x0_off + (above_buf_y + i) * buf_stride; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 213 | const uint8_t *src = rsb->stripe_boundary_above + (buf_off << use_highbd); |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 214 | uint8_t *dst8 = data8_tl + i * stride; |
| 215 | // Save old pixels, then replace with data from boundary_above_buf |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 216 | memcpy(rlbs->tmp_save_above[i], REAL_PTR(use_highbd, dst8), line_size); |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 217 | memcpy(REAL_PTR(use_highbd, dst8), src, line_size); |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 218 | } |
| 219 | } |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 220 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 221 | // Replace the pixels below the bottom of the stripe if necessary. This might |
| 222 | // not be needed if the stripe is less than stripe_height high (which might |
| 223 | // happen on the bottom of a loop restoration unit), in which case |
| 224 | // rows_needed_below might be negative. |
| 225 | const int stripe_bottom = stripe_height * (1 + stripe_index) - tile_offset; |
| 226 | const int rows_needed_below = AOMMIN(limits->v_end + 2 - stripe_bottom, 2); |
| 227 | |
| 228 | const int below_buf_y = 2 * stripe_index; |
| 229 | uint8_t *data8_bl = data8 + stripe_bottom * stride + data_x0_off; |
| 230 | |
| 231 | for (int i = 0; i < rows_needed_below; ++i) { |
| 232 | const int buf_off = buf_x0_off + (below_buf_y + i) * buf_stride; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 233 | const uint8_t *src = rsb->stripe_boundary_below + (buf_off << use_highbd); |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 234 | uint8_t *dst8 = data8_bl + i * stride; |
| 235 | // Save old pixels, then replace with data from boundary_below_buf |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 236 | memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size); |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 237 | memcpy(REAL_PTR(use_highbd, dst8), src, line_size); |
| 238 | } |
| 239 | |
| 240 | // Finally, return the actual height of this stripe. |
| 241 | return AOMMIN(limits->v_end, stripe_bottom) - limits->v_start; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 242 | } |
| 243 | |
| 244 | // This function restores the boundary lines modified by |
| 245 | // setup_processing_stripe_boundary. |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 246 | static void restore_processing_stripe_boundary( |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 247 | const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs, |
| 248 | int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride) { |
| 249 | const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y; |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 250 | const int stripe_index = (limits->v_start + tile_offset) / stripe_height; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 251 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 252 | const int line_width = |
| 253 | (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ; |
| 254 | const int line_size = line_width << use_highbd; |
| 255 | const int data_x0_off = limits->h_start - RESTORATION_EXTRA_HORZ; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 256 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 257 | assert(CONFIG_HIGHBITDEPTH || !use_highbd); |
| 258 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 259 | if (stripe_index > 0) { |
| 260 | uint8_t *data8_tl = data8 + (limits->v_start - 2) * stride + data_x0_off; |
| 261 | for (int i = 0; i < 2; ++i) { |
| 262 | uint8_t *dst8 = data8_tl + i * stride; |
| 263 | // Save old pixels, then replace with data from boundary_above_buf |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 264 | memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[i], line_size); |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 265 | } |
| 266 | } |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 267 | |
| 268 | const int stripe_bottom = stripe_height * (1 + stripe_index) - tile_offset; |
| 269 | const int rows_needed_below = AOMMIN(limits->v_end + 2 - stripe_bottom, 2); |
| 270 | |
| 271 | uint8_t *data8_bl = data8 + stripe_bottom * stride + data_x0_off; |
| 272 | |
| 273 | for (int i = 0; i < rows_needed_below; ++i) { |
| 274 | uint8_t *dst8 = data8_bl + i * stride; |
| 275 | // Save old pixels, then replace with data from boundary_below_buf |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 276 | memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size); |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 277 | } |
| 278 | } |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 279 | #undef REAL_PTR |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 280 | #endif |
| 281 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 282 | static void stepdown_wiener_kernel(const InterpKernel orig, InterpKernel vert, |
| 283 | int boundary_dist, int istop) { |
Debargha Mukherjee | 22bbe4c | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 284 | memcpy(vert, orig, sizeof(InterpKernel)); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 285 | switch (boundary_dist) { |
| 286 | case 0: |
| 287 | vert[WIENER_HALFWIN] += vert[2] + vert[1] + vert[0]; |
| 288 | vert[2] = vert[1] = vert[0] = 0; |
| 289 | break; |
| 290 | case 1: |
| 291 | vert[2] += vert[1] + vert[0]; |
| 292 | vert[1] = vert[0] = 0; |
| 293 | break; |
| 294 | case 2: |
| 295 | vert[1] += vert[0]; |
| 296 | vert[0] = 0; |
| 297 | break; |
| 298 | default: break; |
| 299 | } |
| 300 | if (!istop) { |
| 301 | int tmp; |
| 302 | tmp = vert[0]; |
| 303 | vert[0] = vert[WIENER_WIN - 1]; |
| 304 | vert[WIENER_WIN - 1] = tmp; |
| 305 | tmp = vert[1]; |
| 306 | vert[1] = vert[WIENER_WIN - 2]; |
| 307 | vert[WIENER_WIN - 2] = tmp; |
| 308 | tmp = vert[2]; |
| 309 | vert[2] = vert[WIENER_WIN - 3]; |
| 310 | vert[WIENER_WIN - 3] = tmp; |
| 311 | } |
Debargha Mukherjee | 22bbe4c | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 312 | } |
| 313 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 314 | #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
| 315 | #define wiener_convolve8_add_src aom_convolve8_add_src_hip |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 316 | #else |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 317 | #define wiener_convolve8_add_src aom_convolve8_add_src |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 318 | #endif |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 319 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 320 | static void wiener_filter_stripe(const RestorationUnitInfo *rui, |
| 321 | int stripe_width, int stripe_height, |
| 322 | int procunit_width, const uint8_t *src, |
| 323 | int src_stride, uint8_t *dst, int dst_stride, |
| 324 | int32_t *tmpbuf, int bit_depth) { |
| 325 | (void)tmpbuf; |
| 326 | (void)bit_depth; |
| 327 | assert(bit_depth == 8); |
| 328 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 329 | const int mid_height = |
| 330 | stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2; |
| 331 | assert(mid_height > 0); |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 332 | for (int j = 0; j < stripe_width; j += procunit_width) { |
| 333 | int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 334 | const uint8_t *src_p = src + j; |
| 335 | uint8_t *dst_p = dst + j; |
| 336 | for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) { |
| 337 | InterpKernel vertical_top; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 338 | stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 339 | WIENER_BORDER_VERT + b, 1); |
| 340 | wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 341 | rui->wiener_info.hfilter, 16, vertical_top, 16, |
| 342 | w, 1); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 343 | src_p += src_stride; |
| 344 | dst_p += dst_stride; |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 345 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 346 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 347 | wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 348 | rui->wiener_info.hfilter, 16, |
| 349 | rui->wiener_info.vfilter, 16, w, mid_height); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 350 | src_p += src_stride * mid_height; |
| 351 | dst_p += dst_stride * mid_height; |
| 352 | |
| 353 | for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) { |
| 354 | InterpKernel vertical_bot; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 355 | stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 356 | WIENER_BORDER_VERT + b, 0); |
| 357 | wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 358 | rui->wiener_info.hfilter, 16, vertical_bot, 16, |
| 359 | w, 1); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 360 | src_p += src_stride; |
| 361 | dst_p += dst_stride; |
| 362 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 363 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 364 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 365 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 366 | /* Calculate windowed sums (if sqr=0) or sums of squares (if sqr=1) |
| 367 | over the input. The window is of size (2r + 1)x(2r + 1), and we |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 368 | specialize to r = 1, 2, 3. A default function is used for r > 3. |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 369 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 370 | Each loop follows the same format: We keep a window's worth of input |
| 371 | in individual variables and select data out of that as appropriate. |
| 372 | */ |
| 373 | static void boxsum1(int32_t *src, int width, int height, int src_stride, |
| 374 | int sqr, int32_t *dst, int dst_stride) { |
| 375 | int i, j, a, b, c; |
| 376 | |
| 377 | // Vertical sum over 3-pixel regions, from src into dst. |
| 378 | if (!sqr) { |
| 379 | for (j = 0; j < width; ++j) { |
| 380 | a = src[j]; |
| 381 | b = src[src_stride + j]; |
| 382 | c = src[2 * src_stride + j]; |
| 383 | |
| 384 | dst[j] = a + b; |
| 385 | for (i = 1; i < height - 2; ++i) { |
| 386 | // Loop invariant: At the start of each iteration, |
| 387 | // a = src[(i - 1) * src_stride + j] |
| 388 | // b = src[(i ) * src_stride + j] |
| 389 | // c = src[(i + 1) * src_stride + j] |
| 390 | dst[i * dst_stride + j] = a + b + c; |
| 391 | a = b; |
| 392 | b = c; |
| 393 | c = src[(i + 2) * src_stride + j]; |
| 394 | } |
| 395 | dst[i * dst_stride + j] = a + b + c; |
| 396 | dst[(i + 1) * dst_stride + j] = b + c; |
| 397 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 398 | } else { |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 399 | for (j = 0; j < width; ++j) { |
| 400 | a = src[j] * src[j]; |
| 401 | b = src[src_stride + j] * src[src_stride + j]; |
| 402 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 403 | |
| 404 | dst[j] = a + b; |
| 405 | for (i = 1; i < height - 2; ++i) { |
| 406 | dst[i * dst_stride + j] = a + b + c; |
| 407 | a = b; |
| 408 | b = c; |
| 409 | c = src[(i + 2) * src_stride + j] * src[(i + 2) * src_stride + j]; |
| 410 | } |
| 411 | dst[i * dst_stride + j] = a + b + c; |
| 412 | dst[(i + 1) * dst_stride + j] = b + c; |
| 413 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 414 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 415 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 416 | // Horizontal sum over 3-pixel regions of dst |
| 417 | for (i = 0; i < height; ++i) { |
| 418 | a = dst[i * dst_stride]; |
| 419 | b = dst[i * dst_stride + 1]; |
| 420 | c = dst[i * dst_stride + 2]; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 421 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 422 | dst[i * dst_stride] = a + b; |
| 423 | for (j = 1; j < width - 2; ++j) { |
| 424 | // Loop invariant: At the start of each iteration, |
| 425 | // a = src[i * src_stride + (j - 1)] |
| 426 | // b = src[i * src_stride + (j )] |
| 427 | // c = src[i * src_stride + (j + 1)] |
| 428 | dst[i * dst_stride + j] = a + b + c; |
| 429 | a = b; |
| 430 | b = c; |
| 431 | c = dst[i * dst_stride + (j + 2)]; |
| 432 | } |
| 433 | dst[i * dst_stride + j] = a + b + c; |
| 434 | dst[i * dst_stride + (j + 1)] = b + c; |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | static void boxsum2(int32_t *src, int width, int height, int src_stride, |
| 439 | int sqr, int32_t *dst, int dst_stride) { |
| 440 | int i, j, a, b, c, d, e; |
| 441 | |
| 442 | // Vertical sum over 5-pixel regions, from src into dst. |
| 443 | if (!sqr) { |
| 444 | for (j = 0; j < width; ++j) { |
| 445 | a = src[j]; |
| 446 | b = src[src_stride + j]; |
| 447 | c = src[2 * src_stride + j]; |
| 448 | d = src[3 * src_stride + j]; |
| 449 | e = src[4 * src_stride + j]; |
| 450 | |
| 451 | dst[j] = a + b + c; |
| 452 | dst[dst_stride + j] = a + b + c + d; |
| 453 | for (i = 2; i < height - 3; ++i) { |
| 454 | // Loop invariant: At the start of each iteration, |
| 455 | // a = src[(i - 2) * src_stride + j] |
| 456 | // b = src[(i - 1) * src_stride + j] |
| 457 | // c = src[(i ) * src_stride + j] |
| 458 | // d = src[(i + 1) * src_stride + j] |
| 459 | // e = src[(i + 2) * src_stride + j] |
| 460 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 461 | a = b; |
| 462 | b = c; |
| 463 | c = d; |
| 464 | d = e; |
| 465 | e = src[(i + 3) * src_stride + j]; |
| 466 | } |
| 467 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 468 | dst[(i + 1) * dst_stride + j] = b + c + d + e; |
| 469 | dst[(i + 2) * dst_stride + j] = c + d + e; |
| 470 | } |
| 471 | } else { |
| 472 | for (j = 0; j < width; ++j) { |
| 473 | a = src[j] * src[j]; |
| 474 | b = src[src_stride + j] * src[src_stride + j]; |
| 475 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 476 | d = src[3 * src_stride + j] * src[3 * src_stride + j]; |
| 477 | e = src[4 * src_stride + j] * src[4 * src_stride + j]; |
| 478 | |
| 479 | dst[j] = a + b + c; |
| 480 | dst[dst_stride + j] = a + b + c + d; |
| 481 | for (i = 2; i < height - 3; ++i) { |
| 482 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 483 | a = b; |
| 484 | b = c; |
| 485 | c = d; |
| 486 | d = e; |
| 487 | e = src[(i + 3) * src_stride + j] * src[(i + 3) * src_stride + j]; |
| 488 | } |
| 489 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 490 | dst[(i + 1) * dst_stride + j] = b + c + d + e; |
| 491 | dst[(i + 2) * dst_stride + j] = c + d + e; |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | // Horizontal sum over 5-pixel regions of dst |
| 496 | for (i = 0; i < height; ++i) { |
| 497 | a = dst[i * dst_stride]; |
| 498 | b = dst[i * dst_stride + 1]; |
| 499 | c = dst[i * dst_stride + 2]; |
| 500 | d = dst[i * dst_stride + 3]; |
| 501 | e = dst[i * dst_stride + 4]; |
| 502 | |
| 503 | dst[i * dst_stride] = a + b + c; |
| 504 | dst[i * dst_stride + 1] = a + b + c + d; |
| 505 | for (j = 2; j < width - 3; ++j) { |
| 506 | // Loop invariant: At the start of each iteration, |
| 507 | // a = src[i * src_stride + (j - 2)] |
| 508 | // b = src[i * src_stride + (j - 1)] |
| 509 | // c = src[i * src_stride + (j )] |
| 510 | // d = src[i * src_stride + (j + 1)] |
| 511 | // e = src[i * src_stride + (j + 2)] |
| 512 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 513 | a = b; |
| 514 | b = c; |
| 515 | c = d; |
| 516 | d = e; |
| 517 | e = dst[i * dst_stride + (j + 3)]; |
| 518 | } |
| 519 | dst[i * dst_stride + j] = a + b + c + d + e; |
| 520 | dst[i * dst_stride + (j + 1)] = b + c + d + e; |
| 521 | dst[i * dst_stride + (j + 2)] = c + d + e; |
| 522 | } |
| 523 | } |
| 524 | |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 525 | static void boxsum3(int32_t *src, int width, int height, int src_stride, |
| 526 | int sqr, int32_t *dst, int dst_stride) { |
| 527 | int i, j, a, b, c, d, e, f, g; |
| 528 | |
| 529 | // Vertical sum over 7-pixel regions, from src into dst. |
| 530 | if (!sqr) { |
| 531 | for (j = 0; j < width; ++j) { |
| 532 | a = src[j]; |
| 533 | b = src[1 * src_stride + j]; |
| 534 | c = src[2 * src_stride + j]; |
| 535 | d = src[3 * src_stride + j]; |
| 536 | e = src[4 * src_stride + j]; |
| 537 | f = src[5 * src_stride + j]; |
| 538 | g = src[6 * src_stride + j]; |
| 539 | |
| 540 | dst[j] = a + b + c + d; |
| 541 | dst[dst_stride + j] = a + b + c + d + e; |
| 542 | dst[2 * dst_stride + j] = a + b + c + d + e + f; |
| 543 | for (i = 3; i < height - 4; ++i) { |
| 544 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 545 | a = b; |
| 546 | b = c; |
| 547 | c = d; |
| 548 | d = e; |
| 549 | e = f; |
| 550 | f = g; |
| 551 | g = src[(i + 4) * src_stride + j]; |
| 552 | } |
| 553 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 554 | dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g; |
| 555 | dst[(i + 2) * dst_stride + j] = c + d + e + f + g; |
| 556 | dst[(i + 3) * dst_stride + j] = d + e + f + g; |
| 557 | } |
| 558 | } else { |
| 559 | for (j = 0; j < width; ++j) { |
| 560 | a = src[j] * src[j]; |
| 561 | b = src[1 * src_stride + j] * src[1 * src_stride + j]; |
| 562 | c = src[2 * src_stride + j] * src[2 * src_stride + j]; |
| 563 | d = src[3 * src_stride + j] * src[3 * src_stride + j]; |
| 564 | e = src[4 * src_stride + j] * src[4 * src_stride + j]; |
| 565 | f = src[5 * src_stride + j] * src[5 * src_stride + j]; |
| 566 | g = src[6 * src_stride + j] * src[6 * src_stride + j]; |
| 567 | |
| 568 | dst[j] = a + b + c + d; |
| 569 | dst[dst_stride + j] = a + b + c + d + e; |
| 570 | dst[2 * dst_stride + j] = a + b + c + d + e + f; |
| 571 | for (i = 3; i < height - 4; ++i) { |
| 572 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 573 | a = b; |
| 574 | b = c; |
| 575 | c = d; |
| 576 | d = e; |
| 577 | e = f; |
| 578 | f = g; |
| 579 | g = src[(i + 4) * src_stride + j] * src[(i + 4) * src_stride + j]; |
| 580 | } |
| 581 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 582 | dst[(i + 1) * dst_stride + j] = b + c + d + e + f + g; |
| 583 | dst[(i + 2) * dst_stride + j] = c + d + e + f + g; |
| 584 | dst[(i + 3) * dst_stride + j] = d + e + f + g; |
| 585 | } |
| 586 | } |
| 587 | |
| 588 | // Horizontal sum over 7-pixel regions of dst |
| 589 | for (i = 0; i < height; ++i) { |
| 590 | a = dst[i * dst_stride]; |
| 591 | b = dst[i * dst_stride + 1]; |
| 592 | c = dst[i * dst_stride + 2]; |
| 593 | d = dst[i * dst_stride + 3]; |
| 594 | e = dst[i * dst_stride + 4]; |
| 595 | f = dst[i * dst_stride + 5]; |
| 596 | g = dst[i * dst_stride + 6]; |
| 597 | |
| 598 | dst[i * dst_stride] = a + b + c + d; |
| 599 | dst[i * dst_stride + 1] = a + b + c + d + e; |
| 600 | dst[i * dst_stride + 2] = a + b + c + d + e + f; |
| 601 | for (j = 3; j < width - 4; ++j) { |
| 602 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 603 | a = b; |
| 604 | b = c; |
| 605 | c = d; |
| 606 | d = e; |
| 607 | e = f; |
| 608 | f = g; |
| 609 | g = dst[i * dst_stride + (j + 4)]; |
| 610 | } |
| 611 | dst[i * dst_stride + j] = a + b + c + d + e + f + g; |
| 612 | dst[i * dst_stride + (j + 1)] = b + c + d + e + f + g; |
| 613 | dst[i * dst_stride + (j + 2)] = c + d + e + f + g; |
| 614 | dst[i * dst_stride + (j + 3)] = d + e + f + g; |
| 615 | } |
| 616 | } |
| 617 | |
| 618 | // Generic version for any r. To be removed after experiments are done. |
| 619 | static void boxsumr(int32_t *src, int width, int height, int src_stride, int r, |
| 620 | int sqr, int32_t *dst, int dst_stride) { |
| 621 | int32_t *tmp = aom_malloc(width * height * sizeof(*tmp)); |
| 622 | int tmp_stride = width; |
| 623 | int i, j; |
| 624 | if (sqr) { |
| 625 | for (j = 0; j < width; ++j) tmp[j] = src[j] * src[j]; |
| 626 | for (j = 0; j < width; ++j) |
| 627 | for (i = 1; i < height; ++i) |
| 628 | tmp[i * tmp_stride + j] = |
| 629 | tmp[(i - 1) * tmp_stride + j] + |
| 630 | src[i * src_stride + j] * src[i * src_stride + j]; |
| 631 | } else { |
| 632 | memcpy(tmp, src, sizeof(*tmp) * width); |
| 633 | for (j = 0; j < width; ++j) |
| 634 | for (i = 1; i < height; ++i) |
| 635 | tmp[i * tmp_stride + j] = |
| 636 | tmp[(i - 1) * tmp_stride + j] + src[i * src_stride + j]; |
| 637 | } |
| 638 | for (i = 0; i <= r; ++i) |
| 639 | memcpy(&dst[i * dst_stride], &tmp[(i + r) * tmp_stride], |
| 640 | sizeof(*tmp) * width); |
| 641 | for (i = r + 1; i < height - r; ++i) |
| 642 | for (j = 0; j < width; ++j) |
| 643 | dst[i * dst_stride + j] = |
| 644 | tmp[(i + r) * tmp_stride + j] - tmp[(i - r - 1) * tmp_stride + j]; |
| 645 | for (i = height - r; i < height; ++i) |
| 646 | for (j = 0; j < width; ++j) |
| 647 | dst[i * dst_stride + j] = tmp[(height - 1) * tmp_stride + j] - |
| 648 | tmp[(i - r - 1) * tmp_stride + j]; |
| 649 | |
| 650 | for (i = 0; i < height; ++i) tmp[i * tmp_stride] = dst[i * dst_stride]; |
| 651 | for (i = 0; i < height; ++i) |
| 652 | for (j = 1; j < width; ++j) |
| 653 | tmp[i * tmp_stride + j] = |
| 654 | tmp[i * tmp_stride + j - 1] + dst[i * src_stride + j]; |
| 655 | |
| 656 | for (j = 0; j <= r; ++j) |
| 657 | for (i = 0; i < height; ++i) |
| 658 | dst[i * dst_stride + j] = tmp[i * tmp_stride + j + r]; |
| 659 | for (j = r + 1; j < width - r; ++j) |
| 660 | for (i = 0; i < height; ++i) |
| 661 | dst[i * dst_stride + j] = |
| 662 | tmp[i * tmp_stride + j + r] - tmp[i * tmp_stride + j - r - 1]; |
| 663 | for (j = width - r; j < width; ++j) |
| 664 | for (i = 0; i < height; ++i) |
| 665 | dst[i * dst_stride + j] = |
| 666 | tmp[i * tmp_stride + width - 1] - tmp[i * tmp_stride + j - r - 1]; |
| 667 | aom_free(tmp); |
| 668 | } |
| 669 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 670 | static void boxsum(int32_t *src, int width, int height, int src_stride, int r, |
| 671 | int sqr, int32_t *dst, int dst_stride) { |
| 672 | if (r == 1) |
| 673 | boxsum1(src, width, height, src_stride, sqr, dst, dst_stride); |
| 674 | else if (r == 2) |
| 675 | boxsum2(src, width, height, src_stride, sqr, dst, dst_stride); |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 676 | else if (r == 3) |
| 677 | boxsum3(src, width, height, src_stride, sqr, dst, dst_stride); |
| 678 | else |
| 679 | boxsumr(src, width, height, src_stride, r, sqr, dst, dst_stride); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 680 | } |
| 681 | |
| 682 | static void boxnum(int width, int height, int r, int8_t *num, int num_stride) { |
| 683 | int i, j; |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 684 | for (i = 0; i <= r; ++i) { |
| 685 | for (j = 0; j <= r; ++j) { |
| 686 | num[i * num_stride + j] = (r + 1 + i) * (r + 1 + j); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 687 | num[i * num_stride + (width - 1 - j)] = num[i * num_stride + j]; |
| 688 | num[(height - 1 - i) * num_stride + j] = num[i * num_stride + j]; |
| 689 | num[(height - 1 - i) * num_stride + (width - 1 - j)] = |
| 690 | num[i * num_stride + j]; |
| 691 | } |
| 692 | } |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 693 | for (j = 0; j <= r; ++j) { |
| 694 | const int val = (2 * r + 1) * (r + 1 + j); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 695 | for (i = r + 1; i < height - r; ++i) { |
| 696 | num[i * num_stride + j] = val; |
| 697 | num[i * num_stride + (width - 1 - j)] = val; |
| 698 | } |
| 699 | } |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 700 | for (i = 0; i <= r; ++i) { |
| 701 | const int val = (2 * r + 1) * (r + 1 + i); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 702 | for (j = r + 1; j < width - r; ++j) { |
| 703 | num[i * num_stride + j] = val; |
| 704 | num[(height - 1 - i) * num_stride + j] = val; |
| 705 | } |
| 706 | } |
| 707 | for (i = r + 1; i < height - r; ++i) { |
| 708 | for (j = r + 1; j < width - r; ++j) { |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 709 | num[i * num_stride + j] = (2 * r + 1) * (2 * r + 1); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 710 | } |
| 711 | } |
| 712 | } |
| 713 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 714 | void decode_xq(const int *xqd, int *xq) { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 715 | xq[0] = xqd[0]; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 716 | xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1]; |
| 717 | } |
| 718 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 719 | const int32_t x_by_xplus1[256] = { |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 720 | 0, 128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239, |
| 721 | 240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247, |
| 722 | 248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250, |
| 723 | 250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252, |
| 724 | 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253, |
| 725 | 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, |
| 726 | 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254, |
| 727 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 728 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 729 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 730 | 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, |
| 731 | 254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 732 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 733 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 734 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 735 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 736 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| 737 | 256, |
| 738 | }; |
| 739 | |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 740 | const int32_t one_by_x[MAX_NELEM] = { |
David Barker | 9198d13 | 2017-02-17 14:27:05 +0000 | [diff] [blame] | 741 | 4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 742 | 293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164, |
| 743 | #if MAX_RADIUS > 2 |
| 744 | 158, 152, 146, 141, 137, 132, 128, 124, 120, 117, 114, 111, 108, |
| 745 | 105, 102, 100, 98, 95, 93, 91, 89, 87, 85, 84 |
| 746 | #endif // MAX_RADIUS > 2 |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 747 | }; |
Debargha Mukherjee | 4be1262 | 2017-02-15 21:38:02 -0800 | [diff] [blame] | 748 | |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 749 | static void av1_selfguided_restoration_internal(int32_t *dgd, int width, |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 750 | int height, int dgd_stride, |
| 751 | int32_t *dst, int dst_stride, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 752 | int bit_depth, int r, int eps) { |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 753 | const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ; |
| 754 | const int height_ext = height + 2 * SGRPROJ_BORDER_VERT; |
| 755 | const int num_stride = width_ext; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 756 | // Adjusting the stride of A and B here appears to avoid bad cache effects, |
| 757 | // leading to a significant speed improvement. |
| 758 | // We also align the stride to a multiple of 16 bytes, for consistency |
| 759 | // with the SIMD version of this function. |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 760 | int buf_stride = ((width_ext + 3) & ~3) + 16; |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 761 | int32_t A_[RESTORATION_PROC_UNIT_PELS]; |
| 762 | int32_t B_[RESTORATION_PROC_UNIT_PELS]; |
| 763 | int32_t *A = A_; |
| 764 | int32_t *B = B_; |
| 765 | int8_t num_[RESTORATION_PROC_UNIT_PELS]; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 766 | int8_t *num = num_ + SGRPROJ_BORDER_VERT * num_stride + SGRPROJ_BORDER_HORZ; |
| 767 | int i, j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 768 | |
David Barker | 6928a5d | 2017-01-05 11:29:22 +0000 | [diff] [blame] | 769 | // Don't filter tiles with dimensions < 5 on any axis |
| 770 | if ((width < 5) || (height < 5)) return; |
| 771 | |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 772 | boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ, |
| 773 | width_ext, height_ext, dgd_stride, r, 0, B, buf_stride); |
| 774 | boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ, |
| 775 | width_ext, height_ext, dgd_stride, r, 1, A, buf_stride); |
| 776 | boxnum(width_ext, height_ext, r, num_, num_stride); |
Debargha Mukherjee | 8a70919 | 2017-01-10 11:29:31 -0800 | [diff] [blame] | 777 | assert(r <= 3); |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 778 | A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; |
| 779 | B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 780 | for (i = 0; i < height; ++i) { |
| 781 | for (j = 0; j < width; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 782 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 783 | const int n = num[i * num_stride + j]; |
Debargha Mukherjee | 4bfd72e | 2017-03-08 22:20:31 -0800 | [diff] [blame] | 784 | |
David Barker | 9198d13 | 2017-02-17 14:27:05 +0000 | [diff] [blame] | 785 | // a < 2^16 * n < 2^22 regardless of bit depth |
| 786 | uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8)); |
| 787 | // b < 2^8 * n < 2^14 regardless of bit depth |
| 788 | uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8); |
| 789 | |
| 790 | // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28, |
| 791 | // and p itself satisfies p < 2^14 * n^2 < 2^26. |
| 792 | // Note: Sometimes, in high bit depth, we can end up with a*n < b*b. |
| 793 | // This is an artefact of rounding, and can only happen if all pixels |
| 794 | // are (almost) identical, so in this case we saturate to p=0. |
| 795 | uint32_t p = (a * n < b * b) ? 0 : a * n - b * b; |
| 796 | uint32_t s = sgrproj_mtable[eps - 1][n - 1]; |
| 797 | |
| 798 | // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32 |
| 799 | // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12 |
| 800 | // (this holds even after accounting for the rounding in s) |
| 801 | const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS); |
| 802 | |
| 803 | A[k] = x_by_xplus1[AOMMIN(z, 255)]; // < 2^8 |
| 804 | |
| 805 | // SGRPROJ_SGR - A[k] < 2^8, B[k] < 2^(bit_depth) * n, |
| 806 | // one_by_x[n - 1] = round(2^12 / n) |
| 807 | // => the product here is < 2^(20 + bit_depth) <= 2^32, |
| 808 | // and B[k] is set to a value < 2^(8 + bit depth) |
| 809 | B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) * |
| 810 | (uint32_t)B[k] * |
| 811 | (uint32_t)one_by_x[n - 1], |
| 812 | SGRPROJ_RECIP_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 813 | } |
| 814 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 815 | i = 0; |
| 816 | j = 0; |
| 817 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 818 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 819 | const int l = i * dgd_stride + j; |
| 820 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 821 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 822 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 823 | 3 * A[k] + 2 * A[k + 1] + 2 * A[k + buf_stride] + A[k + buf_stride + 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 824 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 825 | 3 * B[k] + 2 * B[k + 1] + 2 * B[k + buf_stride] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 826 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 827 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 828 | } |
| 829 | i = 0; |
| 830 | j = width - 1; |
| 831 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 832 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 833 | const int l = i * dgd_stride + j; |
| 834 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 835 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 836 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 837 | 3 * A[k] + 2 * A[k - 1] + 2 * A[k + buf_stride] + A[k + buf_stride - 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 838 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 839 | 3 * B[k] + 2 * B[k - 1] + 2 * B[k + buf_stride] + B[k + buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 840 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 841 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 842 | } |
| 843 | i = height - 1; |
| 844 | j = 0; |
| 845 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 846 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 847 | const int l = i * dgd_stride + j; |
| 848 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 849 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 850 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 851 | 3 * A[k] + 2 * A[k + 1] + 2 * A[k - buf_stride] + A[k - buf_stride + 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 852 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 853 | 3 * B[k] + 2 * B[k + 1] + 2 * B[k - buf_stride] + B[k - buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 854 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 855 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 856 | } |
| 857 | i = height - 1; |
| 858 | j = width - 1; |
| 859 | { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 860 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 861 | const int l = i * dgd_stride + j; |
| 862 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 863 | const int nb = 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 864 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 865 | 3 * A[k] + 2 * A[k - 1] + 2 * A[k - buf_stride] + A[k - buf_stride - 1]; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 866 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 867 | 3 * B[k] + 2 * B[k - 1] + 2 * B[k - buf_stride] + B[k - buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 868 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 869 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 870 | } |
| 871 | i = 0; |
| 872 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 873 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 874 | const int l = i * dgd_stride + j; |
| 875 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 876 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 877 | const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + buf_stride] + |
| 878 | A[k + buf_stride - 1] + A[k + buf_stride + 1]; |
| 879 | const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + buf_stride] + |
| 880 | B[k + buf_stride - 1] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 881 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 882 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 883 | } |
| 884 | i = height - 1; |
| 885 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 886 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 887 | const int l = i * dgd_stride + j; |
| 888 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 889 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 890 | const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - buf_stride] + |
| 891 | A[k - buf_stride - 1] + A[k - buf_stride + 1]; |
| 892 | const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - buf_stride] + |
| 893 | B[k - buf_stride - 1] + B[k - buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 894 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 895 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 896 | } |
| 897 | j = 0; |
| 898 | for (i = 1; i < height - 1; ++i) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 899 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 900 | const int l = i * dgd_stride + j; |
| 901 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 902 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 903 | const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) + |
| 904 | A[k + 1] + A[k - buf_stride + 1] + A[k + buf_stride + 1]; |
| 905 | const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) + |
| 906 | B[k + 1] + B[k - buf_stride + 1] + B[k + buf_stride + 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 907 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 908 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 909 | } |
| 910 | j = width - 1; |
| 911 | for (i = 1; i < height - 1; ++i) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 912 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 913 | const int l = i * dgd_stride + j; |
| 914 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 915 | const int nb = 3; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 916 | const int32_t a = A[k] + 2 * (A[k - buf_stride] + A[k + buf_stride]) + |
| 917 | A[k - 1] + A[k - buf_stride - 1] + A[k + buf_stride - 1]; |
| 918 | const int32_t b = B[k] + 2 * (B[k - buf_stride] + B[k + buf_stride]) + |
| 919 | B[k - 1] + B[k - buf_stride - 1] + B[k + buf_stride - 1]; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 920 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 921 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 922 | } |
| 923 | for (i = 1; i < height - 1; ++i) { |
| 924 | for (j = 1; j < width - 1; ++j) { |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 925 | const int k = i * buf_stride + j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 926 | const int l = i * dgd_stride + j; |
| 927 | const int m = i * dst_stride + j; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 928 | const int nb = 5; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 929 | const int32_t a = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 930 | (A[k] + A[k - 1] + A[k + 1] + A[k - buf_stride] + A[k + buf_stride]) * |
| 931 | 4 + |
| 932 | (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + |
| 933 | A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 934 | 3; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 935 | const int32_t b = |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 936 | (B[k] + B[k - 1] + B[k + 1] + B[k - buf_stride] + B[k + buf_stride]) * |
| 937 | 4 + |
| 938 | (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + |
| 939 | B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 940 | 3; |
David Barker | 7dcd7f5 | 2017-03-01 12:53:00 +0000 | [diff] [blame] | 941 | const int32_t v = a * dgd[l] + b; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 942 | dst[m] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 943 | } |
| 944 | } |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 945 | } |
| 946 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 947 | void av1_selfguided_restoration_c(const uint8_t *dgd, int width, int height, |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 948 | int stride, int32_t *dst, int dst_stride, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 949 | int r, int eps) { |
| 950 | int32_t dgd32_[RESTORATION_PROC_UNIT_PELS]; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 951 | const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ; |
| 952 | int32_t *dgd32 = |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 953 | dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ; |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 954 | int i, j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 955 | for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) { |
| 956 | for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) { |
| 957 | dgd32[i * dgd32_stride + j] = dgd[i * stride + j]; |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 958 | } |
| 959 | } |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 960 | av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 961 | dst_stride, 8, r, eps); |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 962 | } |
| 963 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 964 | void av1_highpass_filter_c(const uint8_t *dgd, int width, int height, |
| 965 | int stride, int32_t *dst, int dst_stride, int corner, |
| 966 | int edge) { |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 967 | int i, j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 968 | const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 969 | |
| 970 | i = 0; |
| 971 | j = 0; |
| 972 | { |
| 973 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 974 | const int l = i * dst_stride + j; |
| 975 | dst[l] = |
| 976 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) + |
| 977 | corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 978 | } |
| 979 | i = 0; |
| 980 | j = width - 1; |
| 981 | { |
| 982 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 983 | const int l = i * dst_stride + j; |
| 984 | dst[l] = |
| 985 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) + |
| 986 | corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 987 | } |
| 988 | i = height - 1; |
| 989 | j = 0; |
| 990 | { |
| 991 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 992 | const int l = i * dst_stride + j; |
| 993 | dst[l] = |
| 994 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) + |
| 995 | corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 996 | } |
| 997 | i = height - 1; |
| 998 | j = width - 1; |
| 999 | { |
| 1000 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1001 | const int l = i * dst_stride + j; |
| 1002 | dst[l] = |
| 1003 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) + |
| 1004 | corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1005 | } |
| 1006 | i = 0; |
| 1007 | for (j = 1; j < width - 1; ++j) { |
| 1008 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1009 | const int l = i * dst_stride + j; |
| 1010 | dst[l] = center * dgd[k] + |
| 1011 | edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) + |
| 1012 | corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] + |
| 1013 | dgd[k + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1014 | } |
| 1015 | i = height - 1; |
| 1016 | for (j = 1; j < width - 1; ++j) { |
| 1017 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1018 | const int l = i * dst_stride + j; |
| 1019 | dst[l] = center * dgd[k] + |
| 1020 | edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) + |
| 1021 | corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] + |
| 1022 | dgd[k + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1023 | } |
| 1024 | j = 0; |
| 1025 | for (i = 1; i < height - 1; ++i) { |
| 1026 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1027 | const int l = i * dst_stride + j; |
| 1028 | dst[l] = center * dgd[k] + |
| 1029 | edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) + |
| 1030 | corner * (dgd[k + stride + 1] + dgd[k - stride + 1] + |
| 1031 | dgd[k - stride] + dgd[k + stride]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1032 | } |
| 1033 | j = width - 1; |
| 1034 | for (i = 1; i < height - 1; ++i) { |
| 1035 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1036 | const int l = i * dst_stride + j; |
| 1037 | dst[l] = center * dgd[k] + |
| 1038 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) + |
| 1039 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1040 | dgd[k - stride] + dgd[k + stride]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1041 | } |
| 1042 | for (i = 1; i < height - 1; ++i) { |
| 1043 | for (j = 1; j < width - 1; ++j) { |
| 1044 | const int k = i * stride + j; |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1045 | const int l = i * dst_stride + j; |
| 1046 | dst[l] = |
| 1047 | center * dgd[k] + |
| 1048 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) + |
| 1049 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1050 | dgd[k - stride + 1] + dgd[k + stride + 1]); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1051 | } |
| 1052 | } |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1053 | } |
| 1054 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1055 | void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1056 | int stride, int eps, const int *xqd, |
| 1057 | uint8_t *dst, int dst_stride, |
| 1058 | int32_t *tmpbuf) { |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1059 | int xq[2]; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1060 | int32_t *flt1 = tmpbuf; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1061 | int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1062 | int i, j; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1063 | assert(width * height <= RESTORATION_TILEPELS_MAX); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1064 | #if USE_HIGHPASS_IN_SGRPROJ |
| 1065 | av1_highpass_filter_c(dat, width, height, stride, flt1, width, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1066 | sgr_params[eps].corner, sgr_params[eps].edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1067 | #else |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1068 | av1_selfguided_restoration_c(dat, width, height, stride, flt1, width, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1069 | sgr_params[eps].r1, sgr_params[eps].e1); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1070 | #endif // USE_HIGHPASS_IN_SGRPROJ |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1071 | av1_selfguided_restoration_c(dat, width, height, stride, flt2, width, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1072 | sgr_params[eps].r2, sgr_params[eps].e2); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1073 | decode_xq(xqd, xq); |
| 1074 | for (i = 0; i < height; ++i) { |
| 1075 | for (j = 0; j < width; ++j) { |
| 1076 | const int k = i * width + j; |
| 1077 | const int l = i * stride + j; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1078 | const int m = i * dst_stride + j; |
| 1079 | const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); |
| 1080 | const int32_t f1 = (int32_t)flt1[k] - u; |
| 1081 | const int32_t f2 = (int32_t)flt2[k] - u; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 1082 | const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1083 | const int16_t w = |
| 1084 | (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1085 | dst[m] = clip_pixel(w); |
Debargha Mukherjee | 8f209a8 | 2016-10-12 10:47:01 -0700 | [diff] [blame] | 1086 | } |
| 1087 | } |
| 1088 | } |
| 1089 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1090 | static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, |
| 1091 | int stripe_width, int stripe_height, |
| 1092 | int procunit_width, const uint8_t *src, |
| 1093 | int src_stride, uint8_t *dst, int dst_stride, |
| 1094 | int32_t *tmpbuf, int bit_depth) { |
| 1095 | (void)bit_depth; |
| 1096 | assert(bit_depth == 8); |
| 1097 | |
| 1098 | for (int j = 0; j < stripe_width; j += procunit_width) { |
| 1099 | int w = AOMMIN(procunit_width, stripe_width - j); |
| 1100 | apply_selfguided_restoration(src + j, w, stripe_height, src_stride, |
| 1101 | rui->sgrproj_info.ep, rui->sgrproj_info.xqd, |
| 1102 | dst + j, dst_stride, tmpbuf); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1103 | } |
| 1104 | } |
| 1105 | |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1106 | #if CONFIG_HIGHBITDEPTH |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1107 | #if USE_WIENER_HIGH_INTERMEDIATE_PRECISION |
| 1108 | #define wiener_highbd_convolve8_add_src aom_highbd_convolve8_add_src_hip |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1109 | #else |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1110 | #define wiener_highbd_convolve8_add_src aom_highbd_convolve8_add_src |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1111 | #endif |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1112 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1113 | static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui, |
| 1114 | int stripe_width, int stripe_height, |
| 1115 | int procunit_width, const uint8_t *src8, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1116 | int src_stride, uint8_t *dst8, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1117 | int dst_stride, int32_t *tmpbuf, |
| 1118 | int bit_depth) { |
| 1119 | (void)tmpbuf; |
| 1120 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1121 | const int mid_height = |
| 1122 | stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2; |
| 1123 | assert(mid_height > 0); |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1124 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1125 | for (int j = 0; j < stripe_width; j += procunit_width) { |
| 1126 | int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1127 | const uint8_t *src8_p = src8 + j; |
| 1128 | uint8_t *dst8_p = dst8 + j; |
| 1129 | |
| 1130 | for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) { |
| 1131 | InterpKernel vertical_top; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1132 | stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1133 | WIENER_BORDER_VERT + b, 1); |
| 1134 | wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1135 | rui->wiener_info.hfilter, 16, |
| 1136 | vertical_top, 16, w, 1, bit_depth); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1137 | src8_p += src_stride; |
| 1138 | dst8_p += dst_stride; |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1139 | } |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1140 | assert(stripe_height > (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2); |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1141 | wiener_highbd_convolve8_add_src( |
| 1142 | src8_p, src_stride, dst8_p, dst_stride, rui->wiener_info.hfilter, 16, |
| 1143 | rui->wiener_info.vfilter, 16, w, mid_height, bit_depth); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1144 | src8_p += src_stride * (mid_height); |
| 1145 | dst8_p += dst_stride * (mid_height); |
| 1146 | for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) { |
| 1147 | InterpKernel vertical_bot; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1148 | stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot, |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1149 | WIENER_BORDER_VERT + b, 0); |
| 1150 | wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1151 | rui->wiener_info.hfilter, 16, |
| 1152 | vertical_bot, 16, w, 1, bit_depth); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1153 | src8_p += src_stride; |
| 1154 | dst8_p += dst_stride; |
| 1155 | } |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1156 | } |
Debargha Mukherjee | 5cd2ab9 | 2016-09-08 15:15:17 -0700 | [diff] [blame] | 1157 | } |
| 1158 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1159 | void av1_selfguided_restoration_highbd_c(const uint16_t *dgd, int width, |
| 1160 | int height, int stride, int32_t *dst, |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1161 | int dst_stride, int bit_depth, int r, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1162 | int eps) { |
| 1163 | int32_t dgd32_[RESTORATION_PROC_UNIT_PELS]; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 1164 | const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ; |
| 1165 | int32_t *dgd32 = |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1166 | dgd32_ + dgd32_stride * SGRPROJ_BORDER_VERT + SGRPROJ_BORDER_HORZ; |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1167 | int i, j; |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 1168 | for (i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) { |
| 1169 | for (j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) { |
| 1170 | dgd32[i * dgd32_stride + j] = dgd[i * stride + j]; |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1171 | } |
| 1172 | } |
Debargha Mukherjee | e168a78 | 2017-08-31 12:30:10 -0700 | [diff] [blame] | 1173 | av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1174 | dst_stride, bit_depth, r, eps); |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1175 | } |
| 1176 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1177 | void av1_highpass_filter_highbd_c(const uint16_t *dgd, int width, int height, |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1178 | int stride, int32_t *dst, int dst_stride, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1179 | int corner, int edge) { |
Urvang Joshi | 0c45941 | 2017-04-21 18:10:09 +0000 | [diff] [blame] | 1180 | int i, j; |
| 1181 | const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge); |
| 1182 | |
| 1183 | i = 0; |
| 1184 | j = 0; |
| 1185 | { |
| 1186 | const int k = i * stride + j; |
| 1187 | const int l = i * dst_stride + j; |
| 1188 | dst[l] = |
| 1189 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k + stride] + dgd[k] * 2) + |
| 1190 | corner * (dgd[k + stride + 1] + dgd[k + 1] + dgd[k + stride] + dgd[k]); |
| 1191 | } |
| 1192 | i = 0; |
| 1193 | j = width - 1; |
| 1194 | { |
| 1195 | const int k = i * stride + j; |
| 1196 | const int l = i * dst_stride + j; |
| 1197 | dst[l] = |
| 1198 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k + stride] + dgd[k] * 2) + |
| 1199 | corner * (dgd[k + stride - 1] + dgd[k - 1] + dgd[k + stride] + dgd[k]); |
| 1200 | } |
| 1201 | i = height - 1; |
| 1202 | j = 0; |
| 1203 | { |
| 1204 | const int k = i * stride + j; |
| 1205 | const int l = i * dst_stride + j; |
| 1206 | dst[l] = |
| 1207 | center * dgd[k] + edge * (dgd[k + 1] + dgd[k - stride] + dgd[k] * 2) + |
| 1208 | corner * (dgd[k - stride + 1] + dgd[k + 1] + dgd[k - stride] + dgd[k]); |
| 1209 | } |
| 1210 | i = height - 1; |
| 1211 | j = width - 1; |
| 1212 | { |
| 1213 | const int k = i * stride + j; |
| 1214 | const int l = i * dst_stride + j; |
| 1215 | dst[l] = |
| 1216 | center * dgd[k] + edge * (dgd[k - 1] + dgd[k - stride] + dgd[k] * 2) + |
| 1217 | corner * (dgd[k - stride - 1] + dgd[k - 1] + dgd[k - stride] + dgd[k]); |
| 1218 | } |
| 1219 | i = 0; |
| 1220 | for (j = 1; j < width - 1; ++j) { |
| 1221 | const int k = i * stride + j; |
| 1222 | const int l = i * dst_stride + j; |
| 1223 | dst[l] = center * dgd[k] + |
| 1224 | edge * (dgd[k - 1] + dgd[k + stride] + dgd[k + 1] + dgd[k]) + |
| 1225 | corner * (dgd[k + stride - 1] + dgd[k + stride + 1] + dgd[k - 1] + |
| 1226 | dgd[k + 1]); |
| 1227 | } |
| 1228 | i = height - 1; |
| 1229 | for (j = 1; j < width - 1; ++j) { |
| 1230 | const int k = i * stride + j; |
| 1231 | const int l = i * dst_stride + j; |
| 1232 | dst[l] = center * dgd[k] + |
| 1233 | edge * (dgd[k - 1] + dgd[k - stride] + dgd[k + 1] + dgd[k]) + |
| 1234 | corner * (dgd[k - stride - 1] + dgd[k - stride + 1] + dgd[k - 1] + |
| 1235 | dgd[k + 1]); |
| 1236 | } |
| 1237 | j = 0; |
| 1238 | for (i = 1; i < height - 1; ++i) { |
| 1239 | const int k = i * stride + j; |
| 1240 | const int l = i * dst_stride + j; |
| 1241 | dst[l] = center * dgd[k] + |
| 1242 | edge * (dgd[k - stride] + dgd[k + 1] + dgd[k + stride] + dgd[k]) + |
| 1243 | corner * (dgd[k + stride + 1] + dgd[k - stride + 1] + |
| 1244 | dgd[k - stride] + dgd[k + stride]); |
| 1245 | } |
| 1246 | j = width - 1; |
| 1247 | for (i = 1; i < height - 1; ++i) { |
| 1248 | const int k = i * stride + j; |
| 1249 | const int l = i * dst_stride + j; |
| 1250 | dst[l] = center * dgd[k] + |
| 1251 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k]) + |
| 1252 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1253 | dgd[k - stride] + dgd[k + stride]); |
| 1254 | } |
| 1255 | for (i = 1; i < height - 1; ++i) { |
| 1256 | for (j = 1; j < width - 1; ++j) { |
| 1257 | const int k = i * stride + j; |
| 1258 | const int l = i * dst_stride + j; |
| 1259 | dst[l] = |
| 1260 | center * dgd[k] + |
| 1261 | edge * (dgd[k - stride] + dgd[k - 1] + dgd[k + stride] + dgd[k + 1]) + |
| 1262 | corner * (dgd[k + stride - 1] + dgd[k - stride - 1] + |
| 1263 | dgd[k - stride + 1] + dgd[k + stride + 1]); |
| 1264 | } |
| 1265 | } |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1266 | } |
| 1267 | |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1268 | void apply_selfguided_restoration_highbd_c(const uint16_t *dat, int width, |
| 1269 | int height, int stride, |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1270 | int bit_depth, int eps, |
| 1271 | const int *xqd, uint16_t *dst, |
| 1272 | int dst_stride, int32_t *tmpbuf) { |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1273 | int xq[2]; |
David Barker | 3a0df18 | 2016-12-21 10:44:52 +0000 | [diff] [blame] | 1274 | int32_t *flt1 = tmpbuf; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1275 | int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1276 | int i, j; |
David Barker | 0b04e9b | 2017-01-18 15:29:20 +0000 | [diff] [blame] | 1277 | assert(width * height <= RESTORATION_TILEPELS_MAX); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1278 | #if USE_HIGHPASS_IN_SGRPROJ |
| 1279 | av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width, |
David Barker | eed824e | 2017-03-10 11:35:22 +0000 | [diff] [blame] | 1280 | sgr_params[eps].corner, sgr_params[eps].edge); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1281 | #else |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1282 | av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width, |
| 1283 | bit_depth, sgr_params[eps].r1, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1284 | sgr_params[eps].e1); |
Debargha Mukherjee | b7bb097 | 2017-03-09 06:47:43 -0800 | [diff] [blame] | 1285 | #endif // USE_HIGHPASS_IN_SGRPROJ |
David Barker | 506eb72 | 2017-03-08 13:35:49 +0000 | [diff] [blame] | 1286 | av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width, |
| 1287 | bit_depth, sgr_params[eps].r2, |
Debargha Mukherjee | 1330dfd | 2017-09-03 22:22:27 -0700 | [diff] [blame] | 1288 | sgr_params[eps].e2); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1289 | decode_xq(xqd, xq); |
| 1290 | for (i = 0; i < height; ++i) { |
| 1291 | for (j = 0; j < width; ++j) { |
| 1292 | const int k = i * width + j; |
| 1293 | const int l = i * stride + j; |
| 1294 | const int m = i * dst_stride + j; |
| 1295 | const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); |
| 1296 | const int32_t f1 = (int32_t)flt1[k] - u; |
| 1297 | const int32_t f2 = (int32_t)flt2[k] - u; |
David Barker | ce110cc | 2017-02-22 10:38:59 +0000 | [diff] [blame] | 1298 | const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); |
Debargha Mukherjee | 519dbcf | 2016-12-16 03:13:02 -0800 | [diff] [blame] | 1299 | const int16_t w = |
| 1300 | (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); |
| 1301 | dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth); |
| 1302 | } |
| 1303 | } |
| 1304 | } |
| 1305 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1306 | static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, |
| 1307 | int stripe_width, int stripe_height, |
| 1308 | int procunit_width, |
| 1309 | const uint8_t *src8, int src_stride, |
| 1310 | uint8_t *dst8, int dst_stride, |
| 1311 | int32_t *tmpbuf, int bit_depth) { |
| 1312 | for (int j = 0; j < stripe_width; j += procunit_width) { |
| 1313 | int w = AOMMIN(procunit_width, stripe_width - j); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1314 | const uint16_t *data_p = CONVERT_TO_SHORTPTR(src8) + j; |
| 1315 | uint16_t *dst_p = CONVERT_TO_SHORTPTR(dst8) + j; |
| 1316 | apply_selfguided_restoration_highbd( |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1317 | data_p, w, stripe_height, src_stride, bit_depth, rui->sgrproj_info.ep, |
| 1318 | rui->sgrproj_info.xqd, dst_p, dst_stride, tmpbuf); |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1319 | } |
| 1320 | } |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1321 | #endif // CONFIG_HIGHBITDEPTH |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1322 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1323 | typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui, |
| 1324 | int stripe_width, int stripe_height, |
| 1325 | int procunit_width, const uint8_t *src, |
| 1326 | int src_stride, uint8_t *dst, int dst_stride, |
| 1327 | int32_t *tmpbuf, int bit_depth); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1328 | |
| 1329 | #if CONFIG_HIGHBITDEPTH |
| 1330 | #define NUM_STRIPE_FILTERS 4 |
| 1331 | #else |
| 1332 | #define NUM_STRIPE_FILTERS 2 |
| 1333 | #endif |
| 1334 | |
| 1335 | static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = { |
| 1336 | wiener_filter_stripe, sgrproj_filter_stripe, |
| 1337 | #if CONFIG_HIGHBITDEPTH |
| 1338 | wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd |
| 1339 | #endif // CONFIG_HIGHBITDEPTH |
| 1340 | }; |
| 1341 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1342 | void av1_loop_restoration_filter_unit(const RestorationTileLimits *limits, |
| 1343 | const RestorationUnitInfo *rui, |
| 1344 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 1345 | const RestorationStripeBoundaries *rsb, |
| 1346 | RestorationLineBuffers *rlbs, int ss_y, |
| 1347 | #endif |
| 1348 | int procunit_width, int procunit_height, |
| 1349 | int highbd, int bit_depth, uint8_t *data8, |
| 1350 | int stride, uint8_t *dst8, int dst_stride, |
| 1351 | int32_t *tmpbuf) { |
| 1352 | RestorationType unit_rtype = rui->restoration_type; |
| 1353 | |
| 1354 | int unit_h = limits->v_end - limits->v_start; |
| 1355 | int unit_w = limits->h_end - limits->h_start; |
| 1356 | uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start; |
| 1357 | uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start; |
| 1358 | |
| 1359 | if (unit_rtype == RESTORE_NONE) { |
| 1360 | copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1361 | return; |
| 1362 | } |
| 1363 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1364 | const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1365 | assert(filter_idx < NUM_STRIPE_FILTERS); |
| 1366 | const stripe_filter_fun stripe_filter = stripe_filters[filter_idx]; |
| 1367 | |
Rupert Swarbrick | 9af0cf3 | 2017-10-19 10:43:42 +0100 | [diff] [blame] | 1368 | // Convolve the whole tile one stripe at a time |
| 1369 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 1370 | RestorationTileLimits remaining_stripes = *limits; |
| 1371 | #endif |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1372 | int i = 0; |
| 1373 | while (i < unit_h) { |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1374 | #if CONFIG_STRIPED_LOOP_RESTORATION |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1375 | remaining_stripes.v_start = limits->v_start + i; |
| 1376 | int h = setup_processing_stripe_boundary(&remaining_stripes, rsb, |
| 1377 | procunit_height, ss_y, highbd, |
| 1378 | data8, stride, rlbs); |
| 1379 | if (unit_rtype == RESTORE_WIENER) h = ALIGN_POWER_OF_TWO(h, 1); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1380 | #else |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1381 | const int h = AOMMIN(procunit_height, (unit_h - i + 15) & ~15); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1382 | #endif |
| 1383 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1384 | stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride, |
| 1385 | dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1386 | |
| 1387 | #if CONFIG_STRIPED_LOOP_RESTORATION |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1388 | restore_processing_stripe_boundary( |
| 1389 | &remaining_stripes, rlbs, procunit_height, ss_y, highbd, data8, stride); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1390 | #endif |
| 1391 | |
| 1392 | i += h; |
| 1393 | } |
| 1394 | } |
| 1395 | |
| 1396 | struct restore_borders { |
| 1397 | int hborder, vborder; |
| 1398 | }; |
| 1399 | |
| 1400 | static const struct restore_borders restore_borders[RESTORE_TYPES] = { |
| 1401 | { 0, 0 }, |
| 1402 | { WIENER_BORDER_HORZ, WIENER_BORDER_VERT }, |
| 1403 | { SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT }, |
| 1404 | { RESTORATION_BORDER_HORZ, RESTORATION_BORDER_VERT } |
| 1405 | }; |
| 1406 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1407 | void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame, |
| 1408 | AV1_COMMON *cm, RestorationInfo *rsi, |
| 1409 | int components_pattern, |
| 1410 | YV12_BUFFER_CONFIG *dst) { |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1411 | YV12_BUFFER_CONFIG dst_; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1412 | |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1413 | typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src, |
| 1414 | YV12_BUFFER_CONFIG *dst); |
| 1415 | static const copy_fun copy_funs[3] = { aom_yv12_copy_y, aom_yv12_copy_u, |
| 1416 | aom_yv12_copy_v }; |
| 1417 | |
| 1418 | for (int plane = 0; plane < 3; ++plane) { |
| 1419 | if ((components_pattern == 1 << plane) && |
| 1420 | (rsi[plane].frame_restoration_type == RESTORE_NONE)) { |
| 1421 | if (dst) copy_funs[plane](frame, dst); |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1422 | return; |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1423 | } |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1424 | } |
| 1425 | if (components_pattern == |
| 1426 | ((1 << AOM_PLANE_Y) | (1 << AOM_PLANE_U) | (1 << AOM_PLANE_V))) { |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1427 | // All components |
| 1428 | if (rsi[0].frame_restoration_type == RESTORE_NONE && |
| 1429 | rsi[1].frame_restoration_type == RESTORE_NONE && |
| 1430 | rsi[2].frame_restoration_type == RESTORE_NONE) { |
| 1431 | if (dst) aom_yv12_copy_frame(frame, dst); |
| 1432 | return; |
| 1433 | } |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1434 | } |
| 1435 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1436 | if (!dst) { |
| 1437 | dst = &dst_; |
| 1438 | memset(dst, 0, sizeof(YV12_BUFFER_CONFIG)); |
Rupert Swarbrick | 146a060 | 2017-10-17 16:52:20 +0100 | [diff] [blame] | 1439 | if (aom_realloc_frame_buffer(dst, frame->y_crop_width, frame->y_crop_height, |
| 1440 | cm->subsampling_x, cm->subsampling_y, |
Sebastien Alaiwan | 71e8784 | 2017-04-12 16:03:28 +0200 | [diff] [blame] | 1441 | #if CONFIG_HIGHBITDEPTH |
Rupert Swarbrick | 146a060 | 2017-10-17 16:52:20 +0100 | [diff] [blame] | 1442 | cm->use_highbitdepth, |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1443 | #endif |
Rupert Swarbrick | 146a060 | 2017-10-17 16:52:20 +0100 | [diff] [blame] | 1444 | AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, |
| 1445 | NULL, NULL) < 0) |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1446 | aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, |
| 1447 | "Failed to allocate restoration dst buffer"); |
| 1448 | } |
Debargha Mukherjee | 818e42a | 2016-12-12 11:52:56 -0800 | [diff] [blame] | 1449 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1450 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 1451 | RestorationLineBuffers rlbs; |
| 1452 | #endif |
| 1453 | #if CONFIG_HIGHBITDEPTH |
| 1454 | const int bit_depth = cm->bit_depth; |
| 1455 | const int highbd = cm->use_highbitdepth; |
| 1456 | #else |
| 1457 | const int bit_depth = 8; |
| 1458 | const int highbd = 0; |
| 1459 | #endif |
| 1460 | |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1461 | for (int plane = 0; plane < 3; ++plane) { |
| 1462 | if (!((components_pattern >> plane) & 1)) continue; |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1463 | const RestorationInfo *prsi = &rsi[plane]; |
| 1464 | RestorationType rtype = prsi->frame_restoration_type; |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1465 | if (rtype == RESTORE_NONE) { |
| 1466 | copy_funs[plane](frame, dst); |
| 1467 | continue; |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1468 | } |
Debargha Mukherjee | a43a2d9 | 2017-01-03 15:14:57 -0800 | [diff] [blame] | 1469 | |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1470 | const int is_uv = plane > 0; |
| 1471 | const int ss_y = is_uv && cm->subsampling_y; |
| 1472 | |
| 1473 | const int plane_width = frame->crop_widths[is_uv]; |
| 1474 | const int plane_height = frame->crop_heights[is_uv]; |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1475 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1476 | int nhtiles, nvtiles; |
| 1477 | const int ntiles = |
| 1478 | av1_get_rest_ntiles(plane_width, plane_height, |
| 1479 | prsi->restoration_tilesize, &nhtiles, &nvtiles); |
Rupert Swarbrick | d3d0615 | 2017-10-19 10:31:57 +0100 | [diff] [blame] | 1480 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1481 | const struct restore_borders *borders = |
| 1482 | &restore_borders[prsi->frame_restoration_type]; |
| 1483 | extend_frame(frame->buffers[plane], plane_width, plane_height, |
| 1484 | frame->strides[is_uv], borders->hborder, borders->vborder, |
| 1485 | highbd); |
| 1486 | |
| 1487 | for (int tile_idx = 0; tile_idx < ntiles; ++tile_idx) { |
| 1488 | RestorationTileLimits limits = av1_get_rest_tile_limits( |
| 1489 | tile_idx, nhtiles, nvtiles, prsi->restoration_tilesize, plane_width, |
| 1490 | plane_height, ss_y); |
| 1491 | |
| 1492 | av1_loop_restoration_filter_unit( |
| 1493 | &limits, &prsi->unit_info[tile_idx], |
| 1494 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 1495 | &prsi->boundaries, &rlbs, ss_y, |
| 1496 | #endif |
| 1497 | prsi->procunit_width, prsi->procunit_height, highbd, bit_depth, |
| 1498 | frame->buffers[plane], frame->strides[is_uv], dst->buffers[plane], |
| 1499 | dst->strides[is_uv], cm->rst_tmpbuf); |
| 1500 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1501 | } |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1502 | |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1503 | if (dst == &dst_) { |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1504 | for (int plane = 0; plane < 3; ++plane) { |
| 1505 | if ((components_pattern >> plane) & 1) { |
| 1506 | copy_funs[plane](dst, frame); |
| 1507 | } |
| 1508 | } |
David Barker | 9666e75 | 2016-12-08 11:25:47 +0000 | [diff] [blame] | 1509 | aom_free_frame_buffer(dst); |
| 1510 | } |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1511 | } |
| 1512 | |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1513 | int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane, |
| 1514 | int mi_row, int mi_col, BLOCK_SIZE bsize, |
| 1515 | int *rcol0, int *rcol1, int *rrow0, |
| 1516 | int *rrow1, int *nhtiles) { |
| 1517 | assert(rcol0 && rcol1 && rrow0 && rrow1 && nhtiles); |
| 1518 | |
| 1519 | if (bsize != cm->sb_size) return 0; |
| 1520 | |
| 1521 | #if CONFIG_FRAME_SUPERRES |
| 1522 | const int frame_w = cm->superres_upscaled_width; |
| 1523 | const int frame_h = cm->superres_upscaled_height; |
Urvang Joshi | de71d14 | 2017-10-05 12:12:15 -0700 | [diff] [blame] | 1524 | const int mi_to_px = MI_SIZE * SCALE_NUMERATOR; |
| 1525 | const int denom = cm->superres_scale_denominator; |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1526 | #else |
| 1527 | const int frame_w = cm->width; |
| 1528 | const int frame_h = cm->height; |
| 1529 | const int mi_to_px = MI_SIZE; |
| 1530 | const int denom = 1; |
| 1531 | #endif // CONFIG_FRAME_SUPERRES |
| 1532 | |
Rupert Swarbrick | 7380b25 | 2017-09-05 10:18:21 +0100 | [diff] [blame] | 1533 | const int ss_x = plane > 0 && cm->subsampling_x != 0; |
| 1534 | const int ss_y = plane > 0 && cm->subsampling_y != 0; |
| 1535 | |
| 1536 | const int ss_frame_w = (frame_w + ss_x) >> ss_x; |
| 1537 | const int ss_frame_h = (frame_h + ss_y) >> ss_y; |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1538 | |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 1539 | const int rtile_size = cm->rst_info[plane].restoration_tilesize; |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1540 | |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 1541 | int nvtiles; |
| 1542 | av1_get_rest_ntiles(ss_frame_w, ss_frame_h, rtile_size, nhtiles, &nvtiles); |
| 1543 | |
| 1544 | const int rnd = rtile_size * denom - 1; |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1545 | |
| 1546 | // rcol0/rrow0 should be the first column/row of rtiles that doesn't start |
| 1547 | // left/below of mi_col/mi_row. For this calculation, we need to round up the |
| 1548 | // division (if the sb starts at rtile column 10.1, the first matching rtile |
| 1549 | // has column index 11) |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 1550 | *rcol0 = (mi_col * mi_to_px + rnd) / (rtile_size * denom); |
| 1551 | *rrow0 = (mi_row * mi_to_px + rnd) / (rtile_size * denom); |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1552 | |
| 1553 | // rcol1/rrow1 is the equivalent calculation, but for the superblock |
| 1554 | // below-right. There are some slightly strange boundary effects. First, we |
| 1555 | // need to clamp to nhtiles/nvtiles for the case where it appears there are, |
| 1556 | // say, 2.4 restoration tiles horizontally. There we need a maximum mi_row1 |
| 1557 | // of 2 because tile 1 gets extended. |
| 1558 | // |
| 1559 | // Second, if mi_col1 >= cm->mi_cols then we must manually set *rcol1 to |
| 1560 | // nhtiles. This is needed whenever the frame's width rounded up to the next |
| 1561 | // toplevel superblock is smaller than nhtiles * rtile_w. The same logic is |
| 1562 | // needed for rows. |
| 1563 | const int mi_row1 = mi_row + mi_size_high[bsize]; |
| 1564 | const int mi_col1 = mi_col + mi_size_wide[bsize]; |
| 1565 | |
| 1566 | if (mi_col1 >= cm->mi_cols) |
| 1567 | *rcol1 = *nhtiles; |
| 1568 | else |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 1569 | *rcol1 = |
| 1570 | AOMMIN(*nhtiles, (mi_col1 * mi_to_px + rnd) / (rtile_size * denom)); |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1571 | |
| 1572 | if (mi_row1 >= cm->mi_rows) |
| 1573 | *rrow1 = nvtiles; |
| 1574 | else |
Rupert Swarbrick | 64b8bbd | 2017-10-16 15:53:07 +0100 | [diff] [blame] | 1575 | *rrow1 = AOMMIN(nvtiles, (mi_row1 * mi_to_px + rnd) / (rtile_size * denom)); |
Rupert Swarbrick | 6c54521 | 2017-09-01 17:17:25 +0100 | [diff] [blame] | 1576 | |
| 1577 | return *rcol0 < *rcol1 && *rrow0 < *rrow1; |
| 1578 | } |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1579 | |
| 1580 | #if CONFIG_STRIPED_LOOP_RESTORATION |
| 1581 | |
| 1582 | // Extend to left and right |
| 1583 | static void extend_line(uint8_t *buf, int width, int extend, |
| 1584 | int use_highbitdepth) { |
| 1585 | int i; |
| 1586 | if (use_highbitdepth) { |
| 1587 | uint16_t val, *buf16 = (uint16_t *)buf; |
| 1588 | val = buf16[0]; |
| 1589 | for (i = 0; i < extend; i++) buf16[-1 - i] = val; |
| 1590 | val = buf16[width - 1]; |
| 1591 | for (i = 0; i < extend; i++) buf16[width + i] = val; |
| 1592 | } else { |
| 1593 | uint8_t val; |
| 1594 | val = buf[0]; |
| 1595 | for (i = 0; i < extend; i++) buf[-1 - i] = val; |
| 1596 | val = buf[width - 1]; |
| 1597 | for (i = 0; i < extend; i++) buf[width + i] = val; |
| 1598 | } |
| 1599 | } |
| 1600 | |
| 1601 | // For each 64 pixel high stripe, save 4 scan lines to be used as boundary in |
| 1602 | // the loop restoration process. The lines are saved in |
| 1603 | // rst_internal.stripe_boundary_lines |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1604 | void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame, |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1605 | AV1_COMMON *cm) { |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1606 | for (int p = 0; p < MAX_MB_PLANE; ++p) { |
| 1607 | const int is_uv = p > 0; |
| 1608 | const uint8_t *src_buf = frame->buffers[p]; |
| 1609 | const int src_width = frame->crop_widths[is_uv]; |
| 1610 | const int src_height = frame->crop_heights[is_uv]; |
| 1611 | const int src_stride = frame->strides[is_uv]; |
| 1612 | const int stripe_height = 64 >> (is_uv && cm->subsampling_y); |
| 1613 | const int stripe_offset = (56 >> (is_uv && cm->subsampling_y)) - 2; |
| 1614 | |
Rupert Swarbrick | dd6f09a | 2017-10-19 16:10:23 +0100 | [diff] [blame] | 1615 | RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries; |
| 1616 | uint8_t *boundary_above_buf = boundaries->stripe_boundary_above; |
| 1617 | uint8_t *boundary_below_buf = boundaries->stripe_boundary_below; |
| 1618 | const int boundary_stride = boundaries->stripe_boundary_stride; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1619 | #if CONFIG_HIGHBITDEPTH |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1620 | const int use_highbitdepth = cm->use_highbitdepth; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1621 | if (use_highbitdepth) { |
| 1622 | src_buf = (uint8_t *)CONVERT_TO_SHORTPTR(src_buf); |
| 1623 | } |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1624 | #else |
| 1625 | const int use_highbitdepth = 0; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1626 | #endif |
| 1627 | src_buf += (stripe_offset * src_stride) << use_highbitdepth; |
| 1628 | boundary_above_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth; |
| 1629 | boundary_below_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth; |
| 1630 | // Loop over stripes |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1631 | for (int stripe_y = stripe_offset; stripe_y < src_height; |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1632 | stripe_y += stripe_height) { |
| 1633 | // Save 2 lines above the LR stripe (offset -9, -10) |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1634 | for (int yy = 0; yy < 2; yy++) { |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1635 | if (stripe_y + yy < src_height) { |
| 1636 | memcpy(boundary_above_buf, src_buf, src_width << use_highbitdepth); |
| 1637 | extend_line(boundary_above_buf, src_width, RESTORATION_EXTRA_HORZ, |
| 1638 | use_highbitdepth); |
| 1639 | src_buf += src_stride << use_highbitdepth; |
| 1640 | boundary_above_buf += boundary_stride << use_highbitdepth; |
| 1641 | } |
| 1642 | } |
| 1643 | // Save 2 lines below the LR stripe (offset 56,57) |
Rupert Swarbrick | f88bc04 | 2017-10-18 10:45:51 +0100 | [diff] [blame] | 1644 | for (int yy = 2; yy < 4; yy++) { |
Ola Hugosson | 1e7f2d0 | 2017-09-22 21:36:26 +0200 | [diff] [blame] | 1645 | if (stripe_y + yy < src_height) { |
| 1646 | memcpy(boundary_below_buf, src_buf, src_width << use_highbitdepth); |
| 1647 | extend_line(boundary_below_buf, src_width, RESTORATION_EXTRA_HORZ, |
| 1648 | use_highbitdepth); |
| 1649 | src_buf += src_stride << use_highbitdepth; |
| 1650 | boundary_below_buf += boundary_stride << use_highbitdepth; |
| 1651 | } |
| 1652 | } |
| 1653 | // jump to next stripe |
| 1654 | src_buf += ((stripe_height - 4) * src_stride) << use_highbitdepth; |
| 1655 | } |
| 1656 | } |
| 1657 | } |
| 1658 | |
| 1659 | #endif // CONFIG_STRIPED_LOOP_RESTORATION |