Add striped_loop_restoration experiment
This experiment offset the filter tile grid 8 pixels upwards.
Deblocked pixels (rather than CDEFed pixels) are used for the
2 lines above and below the filter processing unit. The 8 pixel
offset is the offset produced by deblock/cdef. This way the
loop_restoration does not need additional line buffers in a
single pass hardware implementation.
Change-Id: I89e0831dc28413a5d3e02d7a426ce2885ab629d7
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index fe22667..20f3074 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -135,6 +135,33 @@
aom_free(cm->rst_internal.tmpbuf);
CHECK_MEM_ERROR(cm, cm->rst_internal.tmpbuf,
(int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ // Allocate internal storage for the loop restoration stripe boundary lines
+ for (p = 0; p < MAX_MB_PLANE; ++p) {
+ int w = p == 0 ? width : ROUND_POWER_OF_TWO(width, cm->subsampling_x);
+ int align_bits = 5; // align for efficiency
+ int stride = ALIGN_POWER_OF_TWO(w, align_bits);
+ int num_stripes = (height + 63) / 64;
+ // for each processing stripe: 2 lines above, 2 below
+ int buf_size = num_stripes * 2 * stride;
+ uint8_t *above_buf, *below_buf;
+
+ aom_free(cm->rst_internal.stripe_boundary_above[p]);
+ aom_free(cm->rst_internal.stripe_boundary_below[p]);
+
+#if CONFIG_HIGHBITDEPTH
+ if (cm->use_highbitdepth) buf_size = buf_size * 2;
+#endif
+ CHECK_MEM_ERROR(cm, above_buf,
+ (uint8_t *)aom_memalign(1 << align_bits, buf_size));
+ CHECK_MEM_ERROR(cm, below_buf,
+ (uint8_t *)aom_memalign(1 << align_bits, buf_size));
+ cm->rst_internal.stripe_boundary_above[p] = above_buf;
+ cm->rst_internal.stripe_boundary_below[p] = below_buf;
+ cm->rst_internal.stripe_boundary_stride[p] = stride;
+ }
+#endif // CONFIG_STRIPED_LOOP_RESTORATION
}
void av1_free_restoration_buffers(AV1_COMMON *cm) {
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index c703660..613ad83 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -123,6 +123,111 @@
}
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+
+// This function setup a processing stripe by replacing the vertical
+// stripe boundary (2 lines above and 2 lines below) by data coming
+// from the above/below buffers. Before doing so the original
+// frame data is saved into a temporary buffer, such that it
+// can be restored by the restore_processing_stripe_boundary
+// function after the filtering of the processing stripe.
+// Returns the height of the processing stripe
+static int setup_processing_stripe_boundary(int y0, int v_end, int h_start,
+ int h_end, uint8_t *data,
+ int stride,
+ RestorationInternal *rst,
+ int use_highbd) {
+ int y, y_stripe_topmost, stripe_index, i;
+ int tile_offset = RESTORATION_TILE_OFFSET >> rst->subsampling_y;
+ int stripe_height = rst->rsi->procunit_height;
+ int comp = rst->component;
+ uint8_t *boundary_above_buf = rst->stripe_boundary_above[comp];
+ uint8_t *boundary_below_buf = rst->stripe_boundary_below[comp];
+ int boundary_stride = rst->stripe_boundary_stride[comp];
+ int x0 = h_start - RESTORATION_EXTRA_HORZ;
+ int x1 = h_end + RESTORATION_EXTRA_HORZ;
+
+ stripe_index = (y0 + tile_offset) / stripe_height;
+ y_stripe_topmost = stripe_index * stripe_height - tile_offset;
+ boundary_above_buf +=
+ ((stripe_index - 1) * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
+ << use_highbd;
+ boundary_below_buf +=
+ (stripe_index * 2 * boundary_stride + RESTORATION_EXTRA_HORZ)
+ << use_highbd;
+
+ // setup the 2 lines above the stripe
+ for (i = 0; i < 2; i++) {
+ y = y_stripe_topmost - 2 + i;
+ if (y >= 0 && y < y0 && y >= y0 - 2) {
+ uint8_t *p = data + ((y * stride + x0) << use_highbd);
+ uint8_t *new_data =
+ boundary_above_buf + ((i * boundary_stride + x0) << use_highbd);
+ // printf("above %3d %3d: %08x %08x : %08x %08x\n", y, x0,
+ // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
+ // ((uint32_t*)new_data)[1]);
+ // Save old pixels
+ memcpy(rst->tmp_save_above[i], p, (x1 - x0) << use_highbd);
+ // Replace width pixels from boundary_above_buf
+ memcpy(p, new_data, (x1 - x0) << use_highbd);
+ }
+ }
+ // setup the 2 lines below the stripe
+ for (i = 0; i < 2; i++) {
+ y = y_stripe_topmost + stripe_height + i;
+ if (y < v_end + 2) {
+ uint8_t *p = data + ((y * stride + x0) << use_highbd);
+ uint8_t *new_data =
+ boundary_below_buf + ((i * boundary_stride + x0) << use_highbd);
+ // printf("below %3d %3d: %08x %08x : %08x %08x\n", y, x0,
+ // ((uint32_t*)p)[0], ((uint32_t*)p)[1], ((uint32_t*)new_data)[0],
+ // ((uint32_t*)new_data)[1]);
+ // Save old pixels
+ memcpy(rst->tmp_save_below[i], p, (x1 - x0) << use_highbd);
+ // Replace width pixels from boundary_below_buf
+ memcpy(p, new_data, (x1 - x0) << use_highbd);
+ }
+ }
+ // Return actual stripe height
+ return AOMMIN(v_end, y_stripe_topmost + stripe_height) - y0;
+}
+
+// This function restores the boundary lines modified by
+// setup_processing_stripe_boundary.
+static void restore_processing_stripe_boundary(int y0, int v_end, int h_start,
+ int h_end, uint8_t *data,
+ int stride,
+ RestorationInternal *rst,
+ int use_highbd) {
+ int y, y_stripe_topmost, i, stripe_index;
+ int tile_offset = 8 >> rst->subsampling_y;
+ int stripe_height = rst->rsi->procunit_height;
+ int x0 = h_start - RESTORATION_EXTRA_HORZ;
+ int x1 = h_end + RESTORATION_EXTRA_HORZ;
+
+ stripe_index = (y0 + tile_offset) / stripe_height;
+ y_stripe_topmost = stripe_index * stripe_height - tile_offset;
+
+ // restore the 2 lines above the stripe
+ for (i = 0; i < 2; i++) {
+ y = y_stripe_topmost - 2 + i;
+ if (y >= 0 && y < y0 && y >= y0 - 2) {
+ uint8_t *p = data + ((y * stride + x0) << use_highbd);
+ memcpy(p, rst->tmp_save_above[i], (x1 - x0) << use_highbd);
+ }
+ }
+ // restore the 2 lines below the stripe
+ for (i = 0; i < 2; i++) {
+ y = y_stripe_topmost + stripe_height + i;
+ if (y < v_end + 2) {
+ uint8_t *p = data + ((y * stride + x0) << use_highbd);
+ memcpy(p, rst->tmp_save_below[i], (x1 - x0) << use_highbd);
+ }
+ }
+}
+
+#endif
+
static void loop_copy_tile(uint8_t *data, int tile_idx, int width, int height,
int stride, RestorationInternal *rst, uint8_t *dst,
int dst_stride) {
@@ -130,7 +235,11 @@
const int tile_height = rst->tile_height;
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
+#endif
for (int i = limits.v_start; i < limits.v_end; ++i)
memcpy(dst + i * dst_stride + limits.h_start,
data + i * stride + limits.h_start, limits.h_end - limits.h_start);
@@ -173,7 +282,11 @@
RestorationInternal *rst, uint8_t *dst,
int dst_stride) {
const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int procunit_height;
+#else
const int procunit_height = rst->rsi->procunit_height;
+#endif
const int tile_width = rst->tile_width;
const int tile_height = rst->tile_height;
if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
@@ -183,13 +296,25 @@
InterpKernel vertical_topbot;
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
+#endif
+
// Convolve the whole tile (done in blocks here to match the requirements
// of the vectorized convolve functions, but the result is equivalent)
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height)
+ for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int h = setup_processing_stripe_boundary(
+ i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
+ h = ALIGN_POWER_OF_TWO(h, 1);
+ procunit_height = h;
+#else
+ int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
+#endif
for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
- int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
const uint8_t *data_p = data + i * stride + j;
uint8_t *dst_p = dst + i * dst_stride + j;
// Note h is at least 16
@@ -237,6 +362,11 @@
dst_p += dst_stride;
}
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, data, stride, rst, 0);
+#endif
+ }
}
static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
@@ -978,7 +1108,11 @@
RestorationInternal *rst, uint8_t *dst,
int dst_stride) {
const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int procunit_height;
+#else
const int procunit_height = rst->rsi->procunit_height;
+#endif
const int tile_width = rst->tile_width;
const int tile_height = rst->tile_height;
if (rst->rsi->restoration_type[tile_idx] == RESTORE_NONE) {
@@ -987,17 +1121,32 @@
}
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height)
+#endif
+ for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int h = setup_processing_stripe_boundary(
+ i, limits.v_end, limits.h_start, limits.h_end, data, stride, rst, 0);
+ procunit_height = h;
+#else
+ int h = AOMMIN(procunit_height, limits.v_end - i);
+#endif
for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
int w = AOMMIN(procunit_width, limits.h_end - j);
- int h = AOMMIN(procunit_height, limits.v_end - i);
uint8_t *data_p = data + i * stride + j;
uint8_t *dst_p = dst + i * dst_stride + j;
apply_selfguided_restoration(
data_p, w, h, stride, rst->rsi->sgrproj_info[tile_idx].ep,
rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, data, stride, rst, 0);
+#endif
+ }
}
static void loop_sgrproj_filter(uint8_t *data, int width, int height,
@@ -1061,7 +1210,11 @@
const int tile_height = rst->tile_height;
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
+#endif
for (int i = limits.v_start; i < limits.v_end; ++i)
memcpy(dst + i * dst_stride + limits.h_start,
data + i * stride + limits.h_start,
@@ -1074,7 +1227,11 @@
int bit_depth, uint16_t *dst,
int dst_stride) {
const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int procunit_height;
+#else
const int procunit_height = rst->rsi->procunit_height;
+#endif
const int tile_width = rst->tile_width;
const int tile_height = rst->tile_height;
@@ -1085,14 +1242,27 @@
}
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
+#endif
InterpKernel vertical_topbot;
+
// Convolve the whole tile (done in blocks here to match the requirements
// of the vectorized convolve functions, but the result is equivalent)
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height)
+ for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, (uint8_t *)data,
+ stride, rst, 1);
+ h = ALIGN_POWER_OF_TWO(h, 1);
+ procunit_height = h;
+#else
+ int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
+#endif
for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
int w = AOMMIN(procunit_width, (limits.h_end - j + 15) & ~15);
- int h = AOMMIN(procunit_height, (limits.v_end - i + 15) & ~15);
const uint16_t *data_p = data + i * stride + j;
uint16_t *dst_p = dst + i * dst_stride + j;
// Note h is at least 16
@@ -1146,6 +1316,12 @@
dst_p += dst_stride;
}
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, (uint8_t *)data, stride,
+ rst, 1);
+#endif
+ }
}
static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
@@ -1315,7 +1491,11 @@
int bit_depth, uint16_t *dst,
int dst_stride) {
const int procunit_width = rst->rsi->procunit_width;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int procunit_height;
+#else
const int procunit_height = rst->rsi->procunit_height;
+#endif
const int tile_width = rst->tile_width;
const int tile_height = rst->tile_height;
@@ -1326,17 +1506,34 @@
}
RestorationTileLimits limits =
av1_get_rest_tile_limits(tile_idx, rst->nhtiles, rst->nvtiles, tile_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ tile_height, width, height, rst->subsampling_y);
+#else
tile_height, width, height);
- for (int i = limits.v_start; i < limits.v_end; i += procunit_height)
+#endif
+ for (int i = limits.v_start; i < limits.v_end; i += procunit_height) {
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int h = setup_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, (uint8_t *)data,
+ stride, rst, 1);
+ procunit_height = h;
+#else
+ int h = AOMMIN(procunit_height, limits.v_end - i);
+#endif
for (int j = limits.h_start; j < limits.h_end; j += procunit_width) {
int w = AOMMIN(procunit_width, limits.h_end - j);
- int h = AOMMIN(procunit_height, limits.v_end - i);
uint16_t *data_p = data + i * stride + j;
uint16_t *dst_p = dst + i * dst_stride + j;
apply_selfguided_restoration_highbd(
data_p, w, h, stride, bit_depth, rst->rsi->sgrproj_info[tile_idx].ep,
rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ restore_processing_stripe_boundary(i, limits.v_end, limits.h_start,
+ limits.h_end, (uint8_t *)data, stride,
+ rst, 1);
+#endif
+ }
}
static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
@@ -1409,7 +1606,6 @@
yend = AOMMIN(yend, yheight);
uvend = AOMMIN(uvend, uvheight);
-
if (components_pattern == (1 << AOM_PLANE_Y)) {
// Only y
if (rsi[0].frame_restoration_type == RESTORE_NONE) {
@@ -1459,6 +1655,10 @@
&cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
&cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
cm->rst_internal.rsi = &rsi[0];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ cm->rst_internal.component = AOM_PLANE_Y;
+ cm->rst_internal.subsampling_y = 0;
+#endif
restore_func =
restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
#if CONFIG_HIGHBITDEPTH
@@ -1486,6 +1686,10 @@
&cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
&cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
cm->rst_internal.rsi = &rsi[AOM_PLANE_U];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ cm->rst_internal.component = AOM_PLANE_U;
+ cm->rst_internal.subsampling_y = cm->subsampling_y;
+#endif
restore_func =
restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
#if CONFIG_HIGHBITDEPTH
@@ -1513,6 +1717,10 @@
&cm->rst_internal.tile_width, &cm->rst_internal.tile_height,
&cm->rst_internal.nhtiles, &cm->rst_internal.nvtiles);
cm->rst_internal.rsi = &rsi[AOM_PLANE_V];
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ cm->rst_internal.component = AOM_PLANE_V;
+ cm->rst_internal.subsampling_y = cm->subsampling_y;
+#endif
restore_func =
restore_funcs[cm->rst_internal.rsi->frame_restoration_type];
#if CONFIG_HIGHBITDEPTH
@@ -1629,3 +1837,93 @@
return *rcol0 < *rcol1 && *rrow0 < *rrow1;
}
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+
+// Extend to left and right
+static void extend_line(uint8_t *buf, int width, int extend,
+ int use_highbitdepth) {
+ int i;
+ if (use_highbitdepth) {
+ uint16_t val, *buf16 = (uint16_t *)buf;
+ val = buf16[0];
+ for (i = 0; i < extend; i++) buf16[-1 - i] = val;
+ val = buf16[width - 1];
+ for (i = 0; i < extend; i++) buf16[width + i] = val;
+ } else {
+ uint8_t val;
+ val = buf[0];
+ for (i = 0; i < extend; i++) buf[-1 - i] = val;
+ val = buf[width - 1];
+ for (i = 0; i < extend; i++) buf[width + i] = val;
+ }
+}
+
+// For each 64 pixel high stripe, save 4 scan lines to be used as boundary in
+// the loop restoration process. The lines are saved in
+// rst_internal.stripe_boundary_lines
+void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+ AV1_COMMON *cm) {
+ int p, boundary_stride;
+ int src_width, src_height, src_stride, stripe_height, stripe_offset, stripe_y,
+ yy;
+ uint8_t *src_buf, *boundary_below_buf, *boundary_above_buf;
+ int use_highbitdepth = 0;
+ for (p = 0; p < MAX_MB_PLANE; ++p) {
+ if (p == 0) {
+ src_buf = frame->y_buffer;
+ src_width = frame->y_crop_width;
+ src_height = frame->y_crop_height;
+ src_stride = frame->y_stride;
+ stripe_height = 64;
+ stripe_offset = 56 - 2; // offset of first line to copy
+ } else {
+ src_buf = p == 1 ? frame->u_buffer : frame->v_buffer;
+ src_width = frame->uv_crop_width;
+ src_height = frame->uv_crop_height;
+ src_stride = frame->uv_stride;
+ stripe_height = 64 >> cm->subsampling_y;
+ stripe_offset = (56 >> cm->subsampling_y) - 2;
+ }
+ boundary_above_buf = cm->rst_internal.stripe_boundary_above[p];
+ boundary_below_buf = cm->rst_internal.stripe_boundary_below[p];
+ boundary_stride = cm->rst_internal.stripe_boundary_stride[p];
+#if CONFIG_HIGHBITDEPTH
+ use_highbitdepth = cm->use_highbitdepth;
+ if (use_highbitdepth) {
+ src_buf = (uint8_t *)CONVERT_TO_SHORTPTR(src_buf);
+ }
+#endif
+ src_buf += (stripe_offset * src_stride) << use_highbitdepth;
+ boundary_above_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
+ boundary_below_buf += RESTORATION_EXTRA_HORZ << use_highbitdepth;
+ // Loop over stripes
+ for (stripe_y = stripe_offset; stripe_y < src_height;
+ stripe_y += stripe_height) {
+ // Save 2 lines above the LR stripe (offset -9, -10)
+ for (yy = 0; yy < 2; yy++) {
+ if (stripe_y + yy < src_height) {
+ memcpy(boundary_above_buf, src_buf, src_width << use_highbitdepth);
+ extend_line(boundary_above_buf, src_width, RESTORATION_EXTRA_HORZ,
+ use_highbitdepth);
+ src_buf += src_stride << use_highbitdepth;
+ boundary_above_buf += boundary_stride << use_highbitdepth;
+ }
+ }
+ // Save 2 lines below the LR stripe (offset 56,57)
+ for (yy = 2; yy < 4; yy++) {
+ if (stripe_y + yy < src_height) {
+ memcpy(boundary_below_buf, src_buf, src_width << use_highbitdepth);
+ extend_line(boundary_below_buf, src_width, RESTORATION_EXTRA_HORZ,
+ use_highbitdepth);
+ src_buf += src_stride << use_highbitdepth;
+ boundary_below_buf += boundary_stride << use_highbitdepth;
+ }
+ }
+ // jump to next stripe
+ src_buf += ((stripe_height - 4) * src_stride) << use_highbitdepth;
+ }
+ }
+}
+
+#endif // CONFIG_STRIPED_LOOP_RESTORATION
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 75fcefb..23a5387 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -26,10 +26,23 @@
#define RESTORATION_PROC_UNIT_SIZE 64
+#if CONFIG_STRIPED_LOOP_RESTORATION
+// Filter tile grid offset upwards compared to the superblock grid
+#define RESTORATION_TILE_OFFSET 8
+#endif
+
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define SGRPROJ_BORDER_VERT 2 // Vertical border used for Sgr
+#else
#define SGRPROJ_BORDER_VERT 1 // Vertical border used for Sgr
+#endif
#define SGRPROJ_BORDER_HORZ 2 // Horizontal border used for Sgr
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define WIENER_BORDER_VERT 2 // Vertical border used for Wiener
+#else
#define WIENER_BORDER_VERT 1 // Vertical border used for Wiener
+#endif
#define WIENER_HALFWIN 3
#define WIENER_BORDER_HORZ (WIENER_HALFWIN) // Horizontal border for Wiener
@@ -48,6 +61,12 @@
#define RESTORATION_BORDER_HORZ (WIENER_BORDER_HORZ)
#endif // SGRPROJ_BORDER_VERT >= WIENER_BORDER_VERT
+#if CONFIG_STRIPED_LOOP_RESTORATION
+// Additional pixels to the left and right in above/below buffers
+// It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment
+#define RESTORATION_EXTRA_HORZ 4
+#endif
+
// Pad up to 20 more (may be much less is needed)
#define RESTORATION_PADDING 20
#define RESTORATION_PROC_UNIT_PELS \
@@ -57,9 +76,19 @@
RESTORATION_PADDING))
#define RESTORATION_TILESIZE_MAX 256
+#if CONFIG_STRIPED_LOOP_RESTORATION
+#define RESTORATION_TILEPELS_HORZ_MAX \
+ (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
+#define RESTORATION_TILEPELS_VERT_MAX \
+ ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT + \
+ RESTORATION_TILE_OFFSET))
+#define RESTORATION_TILEPELS_MAX \
+ (RESTORATION_TILEPELS_HORZ_MAX * RESTORATION_TILEPELS_VERT_MAX)
+#else
#define RESTORATION_TILEPELS_MAX \
((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) * \
(RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT))
+#endif
// Two 32-bit buffers needed for the restored versions from two filters
// TODO(debargha, rupert): Refactor to not need the large tilesize to be stored
@@ -195,6 +224,20 @@
int tile_width, tile_height;
int nhtiles, nvtiles;
int32_t *tmpbuf;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int component;
+ int subsampling_y;
+ uint8_t *stripe_boundary_above[MAX_MB_PLANE];
+ uint8_t *stripe_boundary_below[MAX_MB_PLANE];
+ int stripe_boundary_stride[MAX_MB_PLANE];
+ // Temporary buffers to save/restore 2 lines above/below the restoration
+ // stripe
+ // Allow for filter margin to left and right
+ uint16_t
+ tmp_save_above[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
+ uint16_t
+ tmp_save_below[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
+#endif
} RestorationInternal;
static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) {
@@ -236,7 +279,12 @@
static INLINE RestorationTileLimits
av1_get_rest_tile_limits(int tile_idx, int nhtiles, int nvtiles, int tile_width,
- int tile_height, int im_width, int im_height) {
+ int tile_height, int im_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ int im_height, int subsampling_y) {
+#else
+ int im_height) {
+#endif
const int htile_idx = tile_idx % nhtiles;
const int vtile_idx = tile_idx / nhtiles;
RestorationTileLimits limits;
@@ -246,6 +294,13 @@
(htile_idx < nhtiles - 1) ? limits.h_start + tile_width : im_width;
limits.v_end =
(vtile_idx < nvtiles - 1) ? limits.v_start + tile_height : im_height;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ // Offset the tile upwards to align with the restoration processing stripe
+ limits.v_start -= RESTORATION_TILE_OFFSET >> subsampling_y;
+ if (limits.v_start < 0) limits.v_start = 0;
+ if (limits.v_end < im_height)
+ limits.v_end -= RESTORATION_TILE_OFFSET >> subsampling_y;
+#endif
return limits;
}
@@ -284,6 +339,9 @@
int mi_row, int mi_col, BLOCK_SIZE bsize,
int *rcol0, int *rcol1, int *rrow0,
int *rrow1, int *nhtiles);
+
+void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+ struct AV1Common *cm);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index b4259c7..3054618 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -86,6 +86,10 @@
#include "av1/common/cfl.h"
#endif
+#if CONFIG_STRIPED_LOOP_RESTORATION && !CONFIG_LOOP_RESTORATION
+#error "striped_loop_restoration requires loop_restoration"
+#endif
+
#if CONFIG_LOOP_RESTORATION
static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
MACROBLOCKD *xd,
@@ -5589,6 +5593,14 @@
cm->tile_rows * cm->tile_cols - 1);
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+ av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm);
+ }
+#endif
+
#if CONFIG_CDEF
if (!cm->skip_loop_filter && !cm->all_lossless) {
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
@@ -5905,6 +5917,14 @@
return;
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
+ cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+ av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm);
+ }
+#endif
+
#if CONFIG_CDEF
if (!cm->skip_loop_filter && !cm->all_lossless) {
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index db09c40..118377b 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4525,6 +4525,10 @@
#endif
}
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm);
+#endif
+
#if CONFIG_CDEF
if (is_lossless_requested(&cpi->oxcf)) {
cm->cdef_bits = 0;
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 4be322b..bffc92c 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -150,7 +150,12 @@
av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern,
partial_frame, dst_frame);
RestorationTileLimits limits = av1_get_rest_tile_limits(
- tile_idx, nhtiles, nvtiles, tile_width, tile_height, width, height);
+ tile_idx, nhtiles, nvtiles, tile_width, tile_height, width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ height, components_pattern > 1 ? cm->subsampling_y : 0);
+#else
+ height);
+#endif
filt_err = sse_restoration_tile(
src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
limits.v_start, limits.v_end - limits.v_start, components_pattern);
@@ -550,7 +555,12 @@
const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col;
RestorationTileLimits limits = av1_get_rest_tile_limits(
rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_width,
- rtile_height, ctxt->plane_width, ctxt->plane_height);
+ rtile_height, ctxt->plane_width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ ctxt->plane_height, ctxt->plane > 0 ? cm->subsampling_y : 0);
+#else
+ ctxt->plane_height);
+#endif
fun(ctxt, rtile_idx, &limits, arg);
}
}
@@ -1324,7 +1334,12 @@
info->frame_restoration_type = RESTORE_NONE;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
RestorationTileLimits limits = av1_get_rest_tile_limits(
- tile_idx, nhtiles, nvtiles, tile_width, tile_height, width, height);
+ tile_idx, nhtiles, nvtiles, tile_width, tile_height, width,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ height, plane != AOM_PLANE_Y ? cm->subsampling_y : 0);
+#else
+ height);
+#endif
err = sse_restoration_tile(src, cm->frame_to_show, cm, limits.h_start,
limits.h_end - limits.h_start, limits.v_start,
limits.v_end - limits.v_start, 1 << plane);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 8ba2048..3d8a004 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -196,6 +196,7 @@
set(CONFIG_SEGMENT_ZEROMV 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SMOOTH_HV 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SPEED_REFS 0 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_STRIPED_LOOP_RESTORATION 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SUPERTX 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_TEMPMV_SIGNALING 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_TPL_MV 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/build/cmake/aom_configure.cmake b/build/cmake/aom_configure.cmake
index 38aa027..2d783dc 100644
--- a/build/cmake/aom_configure.cmake
+++ b/build/cmake/aom_configure.cmake
@@ -220,6 +220,12 @@
endif()
endif()
+if (CONFIG_STRIPED_LOOP_RESTORATION)
+ if (NOT CONFIG_LOOP_RESTORATION)
+ change_config_and_warn(CONFIG_LOOP_RESTORATION 1 CONFIG_STRIPED_LOOP_RESTORATION)
+ endif()
+endif()
+
if (CONFIG_WARPED_MOTION)
if (CONFIG_NCOBMC)
change_config_and_warn(CONFIG_NCOBMC 0 CONFIG_WARPED_MOTION)
diff --git a/configure b/configure
index c59cd2c..a8c5f4f 100755
--- a/configure
+++ b/configure
@@ -276,6 +276,7 @@
supertx
ans
loop_restoration
+ striped_loop_restoration
ext_partition
ext_partition_types
ext_partition_types_ab
@@ -626,6 +627,17 @@
log_echo "amvr requires hash_me"
enable_feature hash_me
fi
+
+ if enabled striped_loop_restoration && ! enabled loop_restoration ; then
+ log_echo "striped_loop_restoration requires loop_restoration"
+ log_echo "enable loop_restoration"
+ enable_feature loop_restoration
+ fi
+ if enabled striped_loop_restoration && enabled frame_superres ; then
+ log_echo "striped_loop_restoration not compatible with frame_superres"
+ log_echo "disabling striped_loop_restoration"
+ disable_feature striped_loop_restoration
+ fi
}
process_targets() {