Make pickrst.c more efficient
The try_restoration_tile function now works by calling
av1_loop_restoration_filter_unit rather than
av1_loop_restoration_filter_frame. This is rather more efficient,
because it only works on the unit in question, rather than memcpy-ing
the rest of the frame.
Change-Id: I7cda078523202bec77e933e7b5bd2f34daae5865
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 4cf657e..1299982 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -49,119 +49,93 @@
const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 };
-static int64_t sse_restoration_tile(const YV12_BUFFER_CONFIG *src,
- const YV12_BUFFER_CONFIG *dst,
- const AV1_COMMON *cm, int h_start,
- int width, int v_start, int height,
- int components_pattern) {
- int64_t filt_err = 0;
- (void)cm;
- // Y and UV components cannot be mixed
- assert(components_pattern == 1 || components_pattern == 2 ||
- components_pattern == 4 || components_pattern == 6);
+typedef int64_t (*sse_extractor_type)(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b);
+typedef int64_t (*sse_part_extractor_type)(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b,
+ int hstart, int width, int vstart,
+ int height);
+
+#define NUM_EXTRACTORS (3 * (1 + CONFIG_HIGHBITDEPTH))
+
+static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
+ aom_get_y_sse_part, aom_get_u_sse_part,
+ aom_get_v_sse_part,
#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- if ((components_pattern >> AOM_PLANE_Y) & 1) {
- filt_err +=
- aom_highbd_get_y_sse_part(src, dst, h_start, width, v_start, height);
- }
- if ((components_pattern >> AOM_PLANE_U) & 1) {
- filt_err +=
- aom_highbd_get_u_sse_part(src, dst, h_start, width, v_start, height);
- }
- if ((components_pattern >> AOM_PLANE_V) & 1) {
- filt_err +=
- aom_highbd_get_v_sse_part(src, dst, h_start, width, v_start, height);
- }
- return filt_err;
- }
+ aom_highbd_get_y_sse_part, aom_highbd_get_u_sse_part,
+ aom_highbd_get_v_sse_part,
#endif // CONFIG_HIGHBITDEPTH
- if ((components_pattern >> AOM_PLANE_Y) & 1) {
- filt_err += aom_get_y_sse_part(src, dst, h_start, width, v_start, height);
- }
- if ((components_pattern >> AOM_PLANE_U) & 1) {
- filt_err += aom_get_u_sse_part(src, dst, h_start, width, v_start, height);
- }
- if ((components_pattern >> AOM_PLANE_V) & 1) {
- filt_err += aom_get_v_sse_part(src, dst, h_start, width, v_start, height);
- }
- return filt_err;
+};
+
+static const sse_extractor_type sse_extractors[NUM_EXTRACTORS] = {
+ aom_get_y_sse, aom_get_u_sse, aom_get_v_sse,
+#if CONFIG_HIGHBITDEPTH
+ aom_highbd_get_y_sse, aom_highbd_get_u_sse, aom_highbd_get_v_sse,
+#endif // CONFIG_HIGHBITDEPTH
+};
+
+static int64_t sse_restoration_tile(const RestorationTileLimits *limits,
+ const YV12_BUFFER_CONFIG *src,
+ const YV12_BUFFER_CONFIG *dst, int plane,
+ int highbd) {
+ assert(CONFIG_HIGHBITDEPTH || !highbd);
+ return sse_part_extractors[3 * highbd + plane](
+ src, dst, limits->h_start, limits->h_end - limits->h_start,
+ limits->v_start, limits->v_end - limits->v_start);
}
-static int64_t sse_restoration_frame(AV1_COMMON *const cm,
- const YV12_BUFFER_CONFIG *src,
- const YV12_BUFFER_CONFIG *dst,
- int components_pattern) {
- int64_t filt_err = 0;
-#if CONFIG_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- if ((components_pattern >> AOM_PLANE_Y) & 1) {
- filt_err += aom_highbd_get_y_sse(src, dst);
- }
- if ((components_pattern >> AOM_PLANE_U) & 1) {
- filt_err += aom_highbd_get_u_sse(src, dst);
- }
- if ((components_pattern >> AOM_PLANE_V) & 1) {
- filt_err += aom_highbd_get_v_sse(src, dst);
- }
- return filt_err;
- }
-#else
- (void)cm;
-#endif // CONFIG_HIGHBITDEPTH
- if ((components_pattern >> AOM_PLANE_Y) & 1) {
- filt_err = aom_get_y_sse(src, dst);
- }
- if ((components_pattern >> AOM_PLANE_U) & 1) {
- filt_err += aom_get_u_sse(src, dst);
- }
- if ((components_pattern >> AOM_PLANE_V) & 1) {
- filt_err += aom_get_v_sse(src, dst);
- }
- return filt_err;
+static int64_t sse_restoration_frame(const YV12_BUFFER_CONFIG *src,
+ const YV12_BUFFER_CONFIG *dst, int plane,
+ int highbd) {
+ assert(CONFIG_HIGHBITDEPTH || !highbd);
+ return sse_extractors[3 * highbd + plane](src, dst);
}
-static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src,
- AV1_COMP *const cpi, RestorationInfo *rsi,
- int components_pattern, int tile_idx,
- YV12_BUFFER_CONFIG *dst_frame) {
- AV1_COMMON *const cm = &cpi->common;
-
- // Y and UV components cannot be mixed
- assert(components_pattern == 1 || components_pattern == 2 ||
- components_pattern == 4 || components_pattern == 6);
-
- const int is_uv = components_pattern > 1;
- const int width = src->crop_widths[is_uv];
- const int height = src->crop_heights[is_uv];
-
- const int rtile_size = cm->rst_info[is_uv].restoration_tilesize;
+static int64_t try_restoration_tile(const AV1_COMMON *cm,
+ const YV12_BUFFER_CONFIG *src,
+ const RestorationTileLimits *limits,
+ const RestorationUnitInfo *rui,
+ YV12_BUFFER_CONFIG *dst, int plane) {
+ const RestorationInfo *prsi = &cm->rst_info[plane];
+ const int is_uv = plane > 0;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ RestorationLineBuffers rlbs;
const int ss_y = is_uv && cm->subsampling_y;
+#endif
+#if CONFIG_HIGHBITDEPTH
+ const int bit_depth = cm->bit_depth;
+ const int highbd = cm->use_highbitdepth;
+#else
+ const int bit_depth = 8;
+ const int highbd = 0;
+#endif
- int nhtiles, nvtiles;
- av1_get_rest_ntiles(width, height, rtile_size, &nhtiles, &nvtiles);
+ const YV12_BUFFER_CONFIG *fts = cm->frame_to_show;
- av1_loop_restoration_filter_frame(cm->frame_to_show, cm, rsi,
- components_pattern, dst_frame);
- RestorationTileLimits limits = av1_get_rest_tile_limits(
- tile_idx, nhtiles, nvtiles, rtile_size, width, height, ss_y);
- int64_t filt_err = sse_restoration_tile(
- src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start,
- limits.v_start, limits.v_end - limits.v_start, components_pattern);
+ av1_loop_restoration_filter_unit(limits, rui,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+ &prsi->boundaries, &rlbs, ss_y,
+#endif
+ prsi->procunit_width, prsi->procunit_height,
+ highbd, bit_depth, fts->buffers[plane],
+ fts->strides[is_uv], dst->buffers[plane],
+ dst->strides[is_uv], cm->rst_tmpbuf);
- return filt_err;
+ return sse_restoration_tile(limits, src, dst, plane, highbd);
}
static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
- AV1_COMP *const cpi, RestorationInfo *rsi,
- int components_pattern,
- YV12_BUFFER_CONFIG *dst_frame) {
- AV1_COMMON *const cm = &cpi->common;
- int64_t filt_err;
- av1_loop_restoration_filter_frame(cm->frame_to_show, cm, rsi,
- components_pattern, dst_frame);
- filt_err = sse_restoration_frame(cm, src, dst_frame, components_pattern);
- return filt_err;
+ AV1_COMMON *cm, RestorationInfo *rsi,
+ YV12_BUFFER_CONFIG *dst, int plane) {
+#if CONFIG_HIGHBITDEPTH
+ const int highbd = cm->use_highbitdepth;
+#else
+ const int highbd = 0;
+#endif // CONFIG_HIGHBITDEPTH
+
+ av1_loop_restoration_filter_frame(cm->frame_to_show, cm, rsi, 1 << plane,
+ dst);
+ return sse_restoration_frame(src, dst, plane, highbd);
}
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
@@ -559,10 +533,16 @@
RestorationInfo *rsi = ctxt->cpi->rst_search;
SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg;
- int64_t err =
- sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
- limits->h_end - limits->h_start, limits->v_start,
- limits->v_end - limits->v_start, (1 << ctxt->plane));
+#if CONFIG_HIGHBITDEPTH
+ const int highbd = cm->use_highbitdepth;
+ const int bit_depth = cm->bit_depth;
+#else
+ const int highbd = 0;
+ const int bit_depth = 8;
+#endif // CONFIG_HIGHBITDEPTH
+
+ int64_t err = sse_restoration_tile(limits, ctxt->src, cm->frame_to_show,
+ ctxt->plane, highbd);
// #bits when a tile is not restored
int bits = x->sgrproj_restore_cost[0];
double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
@@ -578,17 +558,14 @@
search_selfguided_restoration(
dgd_start, limits->h_end - limits->h_start,
limits->v_end - limits->v_start, ctxt->dgd_stride, src_start,
- ctxt->src_stride,
-#if CONFIG_HIGHBITDEPTH
- cm->use_highbitdepth, cm->bit_depth,
-#else
- 0, 8,
-#endif // CONFIG_HIGHBITDEPTH
- rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height,
- &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd, cm->rst_tmpbuf);
+ ctxt->src_stride, highbd, bit_depth, rsi[ctxt->plane].procunit_width,
+ rsi[ctxt->plane].procunit_height, &rtile_sgrproj_info->ep,
+ rtile_sgrproj_info->xqd, cm->rst_tmpbuf);
+
plane_rui->restoration_type = RESTORE_SGRPROJ;
- err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane),
- rtile_idx, ctxt->dst_frame);
+ err = try_restoration_tile(cm, ctxt->src, limits, plane_rui, ctxt->dst_frame,
+ ctxt->plane);
+
bits = count_sgrproj_bits(rtile_sgrproj_info, ref_sgrproj_info)
<< AV1_PROB_COST_SHIFT;
bits += x->sgrproj_restore_cost[1];
@@ -621,7 +598,7 @@
// Compute best Sgrproj filters for each rtile, one (encoder/decoder)
// tile at a time.
- const AV1_COMMON *const cm = &cpi->common;
+ AV1_COMMON *const cm = &cpi->common;
#if CONFIG_HIGHBITDEPTH
const int highbd = cm->use_highbitdepth;
#else
@@ -660,7 +637,7 @@
plane_rui->restoration_type = type[rtile_idx];
}
int64_t err =
- try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane), dst_frame);
+ try_restoration_frame(src, cm, cpi->rst_search, dst_frame, plane);
double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
return cost_sgrproj;
}
@@ -1021,14 +998,12 @@
}
#define USE_WIENER_REFINEMENT_SEARCH 1
-static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src,
- AV1_COMP *cpi, RestorationInfo *rsi,
- int start_step, int plane,
- int wiener_win, int tile_idx,
- YV12_BUFFER_CONFIG *dst_frame) {
+static int64_t finer_tile_search_wiener(
+ const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *src,
+ const RestorationTileLimits *limits, RestorationUnitInfo *rui,
+ int start_step, int plane, int wiener_win, YV12_BUFFER_CONFIG *dst_frame) {
const int plane_off = (WIENER_WIN - wiener_win) >> 1;
- int64_t err =
- try_restoration_tile(src, cpi, rsi, 1 << plane, tile_idx, dst_frame);
+ int64_t err = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
(void)start_step;
#if USE_WIENER_REFINEMENT_SEARCH
int64_t err2;
@@ -1037,8 +1012,7 @@
int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
WIENER_FILT_TAP2_MAXV };
- RestorationUnitInfo *plane_rui = &rsi[plane].unit_info[tile_idx];
- WienerInfo *plane_wiener = &plane_rui->wiener_info;
+ WienerInfo *plane_wiener = &rui->wiener_info;
// printf("err pre = %"PRId64"\n", err);
for (int s = start_step; s >= 1; s >>= 1) {
@@ -1049,8 +1023,7 @@
plane_wiener->hfilter[p] -= s;
plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
- err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, tile_idx,
- dst_frame);
+ err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
if (err2 > err) {
plane_wiener->hfilter[p] += s;
plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
@@ -1070,8 +1043,7 @@
plane_wiener->hfilter[p] += s;
plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
- err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, tile_idx,
- dst_frame);
+ err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
if (err2 > err) {
plane_wiener->hfilter[p] -= s;
plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
@@ -1092,8 +1064,7 @@
plane_wiener->vfilter[p] -= s;
plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
- err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, tile_idx,
- dst_frame);
+ err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
if (err2 > err) {
plane_wiener->vfilter[p] += s;
plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
@@ -1113,8 +1084,7 @@
plane_wiener->vfilter[p] += s;
plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
- err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, tile_idx,
- dst_frame);
+ err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
if (err2 > err) {
plane_wiener->vfilter[p] -= s;
plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
@@ -1151,10 +1121,14 @@
WienerInfo *ref_wiener_info = (WienerInfo *)arg;
- int64_t err =
- sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start,
- limits->h_end - limits->h_start, limits->v_start,
- limits->v_end - limits->v_start, (1 << ctxt->plane));
+#if CONFIG_HIGHBITDEPTH
+ const int highbd = cm->use_highbitdepth;
+#else
+ const int highbd = 0;
+#endif // CONFIG_HIGHBITDEPTH
+
+ int64_t err = sse_restoration_tile(limits, ctxt->src, cm->frame_to_show,
+ ctxt->plane, highbd);
// #bits when a tile is not restored
int bits = x->wiener_restore_cost[0];
double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
@@ -1196,8 +1170,9 @@
aom_clear_system_state();
plane_rui->restoration_type = RESTORE_WIENER;
- err = finer_tile_search_wiener(ctxt->src, ctxt->cpi, rsi, 4, ctxt->plane,
- wiener_win, rtile_idx, ctxt->dst_frame);
+ err =
+ finer_tile_search_wiener(&ctxt->cpi->common, ctxt->src, limits, plane_rui,
+ 4, ctxt->plane, wiener_win, ctxt->dst_frame);
if (wiener_win != WIENER_WIN) {
assert(rtile_wiener_info->vfilter[0] == 0 &&
rtile_wiener_info->vfilter[WIENER_WIN - 1] == 0);
@@ -1280,7 +1255,7 @@
plane_rui->restoration_type = type[tile_idx];
}
int64_t err =
- try_restoration_frame(src, cpi, cpi->rst_search, 1 << plane, dst_frame);
+ try_restoration_frame(src, cm, cpi->rst_search, dst_frame, plane);
double cost_wiener = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err);
return cost_wiener;
@@ -1297,6 +1272,12 @@
AV1_COMMON *const cm = &cpi->common;
int tile_idx, nhtiles, nvtiles;
+#if CONFIG_HIGHBITDEPTH
+ const int highbd = cm->use_highbitdepth;
+#else
+ const int highbd = 0;
+#endif // CONFIG_HIGHBITDEPTH
+
const int is_uv = plane > 0;
const int ss_y = plane > 0 && cm->subsampling_y;
const int width = src->crop_widths[is_uv];
@@ -1306,21 +1287,18 @@
const int ntiles =
av1_get_rest_ntiles(width, height, rtile_size, &nhtiles, &nvtiles);
- (void)info;
(void)dst_frame;
info->frame_restoration_type = RESTORE_NONE;
for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
RestorationTileLimits limits = av1_get_rest_tile_limits(
tile_idx, nhtiles, nvtiles, rtile_size, width, height, ss_y);
- err = sse_restoration_tile(src, cm->frame_to_show, cm, limits.h_start,
- limits.h_end - limits.h_start, limits.v_start,
- limits.v_end - limits.v_start, 1 << plane);
+ err = sse_restoration_tile(&limits, src, cm->frame_to_show, plane, highbd);
type[tile_idx] = RESTORE_NONE;
best_tile_cost[tile_idx] = err;
}
// RD cost associated with no restoration
- err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane));
+ err = sse_restoration_frame(src, cm->frame_to_show, plane, highbd);
bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT;
cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err);
return cost_norestore;