Prune Wiener search
Wiener filter search is bypassed based on source variance and
reconstruction error, for speed presets 3 and 4.
STATS_CHANGED
Change-Id: Ie668cf58cfe298c6d3fc052a07052e3abfc258c4
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 1680d5f..6006367 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -613,6 +613,10 @@
add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
specialize qw/aom_sum_squares_i16 sse2/;
+
+ add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height";
+
+ add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height";
}
#
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c
index 35f8fde..370bd75 100644
--- a/aom_dsp/psnr.c
+++ b/aom_dsp/psnr.c
@@ -176,6 +176,27 @@
}
#endif // CONFIG_AV1_HIGHBITDEPTH
+uint64_t aom_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height) {
+ return aom_var_2d_u8_c(a->y_buffer + vstart * a->y_stride + hstart,
+ a->y_stride, width, height) /
+ (width * height);
+}
+
+uint64_t aom_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height) {
+ return aom_var_2d_u8_c(a->u_buffer + vstart * a->uv_stride + hstart,
+ a->uv_stride, width, height) /
+ (width * height);
+}
+
+uint64_t aom_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height) {
+ return aom_var_2d_u8_c(a->v_buffer + vstart * a->uv_stride + hstart,
+ a->uv_stride, width, height) /
+ (width * height);
+}
+
int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b, int hstart, int width,
int vstart, int height) {
@@ -228,6 +249,27 @@
}
#if CONFIG_AV1_HIGHBITDEPTH
+uint64_t aom_highbd_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height) {
+ return aom_var_2d_u16_c(a->y_buffer + vstart * a->y_stride + hstart,
+ a->y_stride, width, height) /
+ (width * height);
+}
+
+uint64_t aom_highbd_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height) {
+ return aom_var_2d_u16_c(a->u_buffer + vstart * a->uv_stride + hstart,
+ a->uv_stride, width, height) /
+ (width * height);
+}
+
+uint64_t aom_highbd_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height) {
+ return aom_var_2d_u16_c(a->v_buffer + vstart * a->uv_stride + hstart,
+ a->uv_stride, width, height) /
+ (width * height);
+}
+
int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b, int hstart,
int width, int vstart, int height) {
diff --git a/aom_dsp/psnr.h b/aom_dsp/psnr.h
index 99aa54c..7f40b8b 100644
--- a/aom_dsp/psnr.h
+++ b/aom_dsp/psnr.h
@@ -35,6 +35,12 @@
* \param[in] sse Sum of squared errors
*/
double aom_sse_to_psnr(double samples, double peak, double sse);
+uint64_t aom_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height);
+uint64_t aom_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height);
+uint64_t aom_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart, int width,
+ int vstart, int height);
int64_t aom_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b, int hstart, int width,
int vstart, int height);
@@ -50,6 +56,12 @@
int64_t aom_get_sse_plane(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b, int plane, int highbd);
#if CONFIG_AV1_HIGHBITDEPTH
+uint64_t aom_highbd_get_y_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height);
+uint64_t aom_highbd_get_u_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height);
+uint64_t aom_highbd_get_v_var(const YV12_BUFFER_CONFIG *a, int hstart,
+ int width, int vstart, int height);
int64_t aom_highbd_get_y_sse_part(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b, int hstart,
int width, int vstart, int height);
diff --git a/aom_dsp/sum_squares.c b/aom_dsp/sum_squares.c
index 44ec41f..d739a60 100644
--- a/aom_dsp/sum_squares.c
+++ b/aom_dsp/sum_squares.c
@@ -38,3 +38,36 @@
return ss;
}
+
+uint64_t aom_var_2d_u8_c(uint8_t *src, int src_stride, int width, int height) {
+ int r, c;
+ uint64_t ss = 0, s = 0;
+
+ for (r = 0; r < height; r++) {
+ for (c = 0; c < width; c++) {
+ const uint8_t v = src[c];
+ ss += v * v;
+ s += v;
+ }
+ src += src_stride;
+ }
+
+ return (ss - s * s / (width * height));
+}
+
+uint64_t aom_var_2d_u16_c(uint8_t *src, int src_stride, int width, int height) {
+ uint16_t *srcp = CONVERT_TO_SHORTPTR(src);
+ int r, c;
+ uint64_t ss = 0, s = 0;
+
+ for (r = 0; r < height; r++) {
+ for (c = 0; c < width; c++) {
+ const uint16_t v = srcp[c];
+ ss += v * v;
+ s += v;
+ }
+ srcp += src_stride;
+ }
+
+ return (ss - s * s / (width * height));
+}
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 7b9d874..46b697c 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -63,6 +63,9 @@
const YV12_BUFFER_CONFIG *b,
int hstart, int width, int vstart,
int height);
+typedef uint64_t (*var_part_extractor_type)(const YV12_BUFFER_CONFIG *a,
+ int hstart, int width, int vstart,
+ int height);
#if CONFIG_AV1_HIGHBITDEPTH
#define NUM_EXTRACTORS (3 * (1 + 1))
@@ -71,11 +74,18 @@
aom_get_v_sse_part, aom_highbd_get_y_sse_part,
aom_highbd_get_u_sse_part, aom_highbd_get_v_sse_part,
};
+static const var_part_extractor_type var_part_extractors[NUM_EXTRACTORS] = {
+ aom_get_y_var, aom_get_u_var, aom_get_v_var,
+ aom_highbd_get_y_var, aom_highbd_get_u_var, aom_highbd_get_v_var,
+};
#else
#define NUM_EXTRACTORS 3
static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = {
aom_get_y_sse_part, aom_get_u_sse_part, aom_get_v_sse_part
};
+static const var_part_extractor_type var_part_extractors[NUM_EXTRACTORS] = {
+ aom_get_y_var, aom_get_u_var, aom_get_v_var
+};
#endif
static int64_t sse_restoration_unit(const RestorationTileLimits *limits,
@@ -87,6 +97,14 @@
limits->v_start, limits->v_end - limits->v_start);
}
+static uint64_t var_restoration_unit(const RestorationTileLimits *limits,
+ const YV12_BUFFER_CONFIG *src, int plane,
+ int highbd) {
+ return var_part_extractors[3 * highbd + plane](
+ src, limits->h_start, limits->h_end - limits->h_start, limits->v_start,
+ limits->v_end - limits->v_start);
+}
+
typedef struct {
// The best coefficients for Wiener or Sgrproj restoration
WienerInfo wiener;
@@ -1440,6 +1458,36 @@
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
+ const MACROBLOCK *const x = rsc->x;
+ const int64_t bits_none = x->wiener_restore_cost[0];
+
+ // Skip Wiener search for low variance contents
+ if (rsc->sf->lpf_sf.prune_wiener_based_on_src_var) {
+ const int scale[3] = { 0, 1, 2 };
+ // Obtain the normalized Qscale
+ const int qs = av1_dc_quant_QTX(rsc->cm->base_qindex, 0,
+ rsc->cm->seq_params.bit_depth) >>
+ 3;
+ // Derive threshold as sqr(normalized Qscale) * scale / 16,
+ const uint64_t thresh =
+ (qs * qs * scale[rsc->sf->lpf_sf.prune_wiener_based_on_src_var]) >> 4;
+ const int highbd = rsc->cm->seq_params.use_highbitdepth;
+ const uint64_t src_var =
+ var_restoration_unit(limits, rsc->src, rsc->plane, highbd);
+ // Do not perform Wiener search if source variance is lower than threshold
+ // or if the reconstruction error is zero
+ int prune_wiener = (src_var < thresh) || (rusi->sse[RESTORE_NONE] == 0);
+ if (prune_wiener) {
+ rsc->bits += bits_none;
+ rsc->sse += rusi->sse[RESTORE_NONE];
+ rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
+ rusi->sse[RESTORE_WIENER] = INT64_MAX;
+ if (rsc->sf->lpf_sf.prune_sgr_based_on_wiener == 2)
+ rusi->skip_sgr_eval = 1;
+ return;
+ }
+ }
+
const int wiener_win =
(rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
@@ -1470,8 +1518,6 @@
limits->h_start, limits->h_end, limits->v_start,
limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
#endif
- const MACROBLOCK *const x = rsc->x;
- const int64_t bits_none = x->wiener_restore_cost[0];
if (!wiener_decompose_sep_sym(reduced_wiener_win, M, H, vfilter, hfilter)) {
rsc->bits += bits_none;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 52a6a8a..6424f30 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -456,6 +456,7 @@
: gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1
: 2;
+ sf->lpf_sf.prune_wiener_based_on_src_var = 1;
sf->lpf_sf.prune_sgr_based_on_wiener =
cm->allow_screen_content_tools ? 0 : 2;
sf->lpf_sf.reduce_wiener_window_size = is_boosted_arf2_bwd_type ? 0 : 1;
@@ -507,6 +508,7 @@
sf->lpf_sf.disable_loop_restoration_chroma =
(boosted || cm->allow_screen_content_tools) ? 0 : 1;
sf->lpf_sf.reduce_wiener_window_size = !boosted;
+ sf->lpf_sf.prune_wiener_based_on_src_var = 2;
// TODO(any): The following features have no impact on quality and speed,
// and are disabled.
@@ -1034,6 +1036,7 @@
static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
lpf_sf->disable_loop_restoration_chroma = 0;
+ lpf_sf->prune_wiener_based_on_src_var = 0;
lpf_sf->prune_sgr_based_on_wiener = 0;
lpf_sf->enable_sgr_ep_pruning = 0;
lpf_sf->reduce_wiener_window_size = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index f5e7677..2e896a0 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -825,6 +825,12 @@
// Disable loop restoration for Chroma plane
int disable_loop_restoration_chroma;
+ // Prune RESTORE_WIENER evaluation based on source variance
+ // 0 : no pruning
+ // 1 : conservative pruning
+ // 2 : aggressive pruning
+ int prune_wiener_based_on_src_var;
+
// Prune self-guided loop restoration based on wiener search results
// 0 : no pruning
// 1 : pruning based on rdcost ratio of RESTORE_WIENER and RESTORE_NONE