Prune Wiener search Wiener filter search is bypassed based on source variance and reconstruction error, for speed presets 3 and 4. STATS_CHANGED Change-Id: Ie668cf58cfe298c6d3fc052a07052e3abfc258c4
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 1680d5f..6006367 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -613,6 +613,10 @@ add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N"; specialize qw/aom_sum_squares_i16 sse2/; + + add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height"; + + add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height"; } #
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c index 35f8fde..370bd75 100644 --- a/aom_dsp/psnr.c +++ b/aom_dsp/psnr.c
@@ -176,6 +176,27 @@ } #endif // CONFIG_AV1_HIGHBITDEPTH +uint64_t aom_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height) { + return aom_var_2d_u8_c(a->y_buffer + vstart * a->y_stride + hstart, + a->y_stride, width, height) / + (width * height); +} + +uint64_t aom_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height) { + return aom_var_2d_u8_c(a->u_buffer + vstart * a->uv_stride + hstart, + a->uv_stride, width, height) / + (width * height); +} + +uint64_t aom_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height) { + return aom_var_2d_u8_c(a->v_buffer + vstart * a->uv_stride + hstart, + a->uv_stride, width, height) / + (width * height); +} + int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, int hstart, int width, int vstart, int height) { @@ -228,6 +249,27 @@ } #if CONFIG_AV1_HIGHBITDEPTH +uint64_t aom_highbd_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height) { + return aom_var_2d_u16_c(a->y_buffer + vstart * a->y_stride + hstart, + a->y_stride, width, height) / + (width * height); +} + +uint64_t aom_highbd_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height) { + return aom_var_2d_u16_c(a->u_buffer + vstart * a->uv_stride + hstart, + a->uv_stride, width, height) / + (width * height); +} + +uint64_t aom_highbd_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height) { + return aom_var_2d_u16_c(a->v_buffer + vstart * a->uv_stride + hstart, + a->uv_stride, width, height) / + (width * height); +} + int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, int hstart, int width, int vstart, int height) {
diff --git a/aom_dsp/psnr.h b/aom_dsp/psnr.h index 99aa54c..7f40b8b 100644 --- a/aom_dsp/psnr.h +++ b/aom_dsp/psnr.h
@@ -35,6 +35,12 @@ * \param[in] sse Sum of squared errors */ double aom_sse_to_psnr(double samples, double peak, double sse); +uint64_t aom_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height); +uint64_t aom_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height); +uint64_t aom_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, int width, + int vstart, int height); int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, int hstart, int width, int vstart, int height); @@ -50,6 +56,12 @@ int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, int plane, int highbd); #if CONFIG_AV1_HIGHBITDEPTH +uint64_t aom_highbd_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height); +uint64_t aom_highbd_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height); +uint64_t aom_highbd_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, + int width, int vstart, int height); int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, int hstart, int width, int vstart, int height);
diff --git a/aom_dsp/sum_squares.c b/aom_dsp/sum_squares.c index 44ec41f..d739a60 100644 --- a/aom_dsp/sum_squares.c +++ b/aom_dsp/sum_squares.c
@@ -38,3 +38,36 @@ return ss; } + +uint64_t aom_var_2d_u8_c(uint8_t *src, int src_stride, int width, int height) { + int r, c; + uint64_t ss = 0, s = 0; + + for (r = 0; r < height; r++) { + for (c = 0; c < width; c++) { + const uint8_t v = src[c]; + ss += v * v; + s += v; + } + src += src_stride; + } + + return (ss - s * s / (width * height)); +} + +uint64_t aom_var_2d_u16_c(uint8_t *src, int src_stride, int width, int height) { + uint16_t *srcp = CONVERT_TO_SHORTPTR(src); + int r, c; + uint64_t ss = 0, s = 0; + + for (r = 0; r < height; r++) { + for (c = 0; c < width; c++) { + const uint16_t v = srcp[c]; + ss += v * v; + s += v; + } + srcp += src_stride; + } + + return (ss - s * s / (width * height)); +}
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index 7b9d874..46b697c 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c
@@ -63,6 +63,9 @@ const YV12_BUFFER_CONFIG *b, int hstart, int width, int vstart, int height); +typedef uint64_t (*var_part_extractor_type)(const YV12_BUFFER_CONFIG *a, + int hstart, int width, int vstart, + int height); #if CONFIG_AV1_HIGHBITDEPTH #define NUM_EXTRACTORS (3 * (1 + 1)) @@ -71,11 +74,18 @@ aom_get_v_sse_part, aom_highbd_get_y_sse_part, aom_highbd_get_u_sse_part, aom_highbd_get_v_sse_part, }; +static const var_part_extractor_type var_part_extractors[NUM_EXTRACTORS] = { + aom_get_y_var, aom_get_u_var, aom_get_v_var, + aom_highbd_get_y_var, aom_highbd_get_u_var, aom_highbd_get_v_var, +}; #else #define NUM_EXTRACTORS 3 static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = { aom_get_y_sse_part, aom_get_u_sse_part, aom_get_v_sse_part }; +static const var_part_extractor_type var_part_extractors[NUM_EXTRACTORS] = { + aom_get_y_var, aom_get_u_var, aom_get_v_var +}; #endif static int64_t sse_restoration_unit(const RestorationTileLimits *limits, @@ -87,6 +97,14 @@ limits->v_start, limits->v_end - limits->v_start); } +static uint64_t var_restoration_unit(const RestorationTileLimits *limits, + const YV12_BUFFER_CONFIG *src, int plane, + int highbd) { + return var_part_extractors[3 * highbd + plane]( + src, limits->h_start, limits->h_end - limits->h_start, limits->v_start, + limits->v_end - limits->v_start); +} + typedef struct { // The best coefficients for Wiener or Sgrproj restoration WienerInfo wiener; @@ -1440,6 +1458,36 @@ RestSearchCtxt *rsc = (RestSearchCtxt *)priv; RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; + const MACROBLOCK *const x = rsc->x; + const int64_t bits_none = x->wiener_restore_cost[0]; + + // Skip Wiener search for low variance contents + if (rsc->sf->lpf_sf.prune_wiener_based_on_src_var) { + const int scale[3] = { 0, 1, 2 }; + // Obtain the normalized Qscale + const int qs = av1_dc_quant_QTX(rsc->cm->base_qindex, 0, + rsc->cm->seq_params.bit_depth) >> + 3; + // Derive threshold as sqr(normalized Qscale) * scale / 16, + const uint64_t thresh = + (qs * qs * scale[rsc->sf->lpf_sf.prune_wiener_based_on_src_var]) >> 4; + const int highbd = rsc->cm->seq_params.use_highbitdepth; + const uint64_t src_var = + var_restoration_unit(limits, rsc->src, rsc->plane, highbd); + // Do not perform Wiener search if source variance is lower than threshold + // or if the reconstruction error is zero + int prune_wiener = (src_var < thresh) || (rusi->sse[RESTORE_NONE] == 0); + if (prune_wiener) { + rsc->bits += bits_none; + rsc->sse += rusi->sse[RESTORE_NONE]; + rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE; + rusi->sse[RESTORE_WIENER] = INT64_MAX; + if (rsc->sf->lpf_sf.prune_sgr_based_on_wiener == 2) + rusi->skip_sgr_eval = 1; + return; + } + } + const int wiener_win = (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; @@ -1470,8 +1518,6 @@ limits->h_start, limits->h_end, limits->v_start, limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H); #endif - const MACROBLOCK *const x = rsc->x; - const int64_t bits_none = x->wiener_restore_cost[0]; if (!wiener_decompose_sep_sym(reduced_wiener_win, M, H, vfilter, hfilter)) { rsc->bits += bits_none;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index 52a6a8a..6424f30 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -456,6 +456,7 @@ : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1 : 2; + sf->lpf_sf.prune_wiener_based_on_src_var = 1; sf->lpf_sf.prune_sgr_based_on_wiener = cm->allow_screen_content_tools ? 0 : 2; sf->lpf_sf.reduce_wiener_window_size = is_boosted_arf2_bwd_type ? 0 : 1; @@ -507,6 +508,7 @@ sf->lpf_sf.disable_loop_restoration_chroma = (boosted || cm->allow_screen_content_tools) ? 0 : 1; sf->lpf_sf.reduce_wiener_window_size = !boosted; + sf->lpf_sf.prune_wiener_based_on_src_var = 2; // TODO(any): The following features have no impact on quality and speed, // and are disabled. @@ -1034,6 +1036,7 @@ static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) { lpf_sf->disable_loop_restoration_chroma = 0; + lpf_sf->prune_wiener_based_on_src_var = 0; lpf_sf->prune_sgr_based_on_wiener = 0; lpf_sf->enable_sgr_ep_pruning = 0; lpf_sf->reduce_wiener_window_size = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index f5e7677..2e896a0 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -825,6 +825,12 @@ // Disable loop restoration for Chroma plane int disable_loop_restoration_chroma; + // Prune RESTORE_WIENER evaluation based on source variance + // 0 : no pruning + // 1 : conservative pruning + // 2 : aggressive pruning + int prune_wiener_based_on_src_var; + // Prune self-guided loop restoration based on wiener search results // 0 : no pruning // 1 : pruning based on rdcost ratio of RESTORE_WIENER and RESTORE_NONE