Enable Loop restoration with Wiener filter for speed 5 and 6
In the parent version, Loop restoration (i.e., Wiener and
Self-guided) is disabled for speed 5 and 6. This CL modifies
the sf 'disable_lr_filter' to enable Wiener Loop restoration
filter alone for speed 5 and 6. Also, introduced a speed
feature to disable refinement stage around wiener filter
coefficients as it does not have much impact on quality
w.r.t. speed.
For 'good' encoding mode,
Instruction Count BD-Rate Loss(%)
cpu-used Reduction(%) avg.psnr ovr.psnr ssim
5 -0.668 -0.5327 -0.5322 -0.5161
6 -0.926 -0.5467 -0.5441 -0.5223
STATS_CHANGED
Change-Id: I82bc416c3532f263bf837259c4b72d7fe622f176
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 8117caf..6e95f70 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -289,12 +289,12 @@
}
// Assumes cm->rst_info[p].restoration_unit_size is already initialized
-void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
+void av1_alloc_restoration_buffers(AV1_COMMON *cm, bool is_sgr_enabled) {
const int num_planes = av1_num_planes(cm);
for (int p = 0; p < num_planes; ++p)
av1_alloc_restoration_struct(cm, &cm->rst_info[p], p > 0);
- if (cm->rst_tmpbuf == NULL) {
+ if (cm->rst_tmpbuf == NULL && is_sgr_enabled) {
CHECK_MEM_ERROR(cm, cm->rst_tmpbuf,
(int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
}
diff --git a/av1/common/alloccommon.h b/av1/common/alloccommon.h
index fc4a8ba..d31b4c5 100644
--- a/av1/common/alloccommon.h
+++ b/av1/common/alloccommon.h
@@ -14,6 +14,8 @@
#define INVALID_IDX -1 // Invalid buffer index.
+#include <stdbool.h>
+
#include "config/aom_config.h"
#include "av1/common/enums.h"
@@ -48,7 +50,7 @@
void av1_free_cdef_buffers(struct AV1Common *const cm,
struct AV1CdefWorker **cdef_worker,
struct AV1CdefSyncData *cdef_sync);
-void av1_alloc_restoration_buffers(struct AV1Common *cm);
+void av1_alloc_restoration_buffers(struct AV1Common *cm, bool is_sgr_enabled);
void av1_free_restoration_buffers(struct AV1Common *cm);
int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d273c79..5b76de8 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -10,6 +10,7 @@
*/
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include "config/aom_config.h"
@@ -5217,7 +5218,7 @@
if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
- av1_alloc_restoration_buffers(cm);
+ av1_alloc_restoration_buffers(cm, /*is_sgr_enabled =*/true);
}
const int use_highbd = cm->seq_params->use_highbitdepth;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index f183e15..3493de8 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2253,7 +2253,8 @@
for (int i = 0; i < num_planes; ++i)
cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
- av1_alloc_restoration_buffers(cm);
+ const bool is_sgr_enabled = !cpi->sf.lpf_sf.disable_sgr_filter;
+ av1_alloc_restoration_buffers(cm, is_sgr_enabled);
// Store the allocated restoration buffers in MT object.
if (cpi->ppi->p_mt_info.num_workers > 1) {
av1_init_lr_mt_buffers(cpi);
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index c558ee6..f2fc6ab 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -32,10 +32,6 @@
#include "av1/encoder/picklpf.h"
#include "av1/encoder/pickrst.h"
-// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed.
-// When set to RESTORE_TYPES we allow switchable.
-static const RestorationType force_restore_type = RESTORE_TYPES;
-
// Number of Wiener iterations
#define NUM_WIENER_ITERS 5
@@ -1459,7 +1455,6 @@
return bits;
}
-#define USE_WIENER_REFINEMENT_SEARCH 1
static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc,
const RestorationTileLimits *limits,
const PixelRect *tile,
@@ -1467,7 +1462,10 @@
int wiener_win) {
const int plane_off = (WIENER_WIN - wiener_win) >> 1;
int64_t err = try_restoration_unit(rsc, limits, tile, rui);
-#if USE_WIENER_REFINEMENT_SEARCH
+
+ if (rsc->lpf_sf->disable_wiener_coeff_refine_search) return err;
+
+ // Refinement search around the wiener filter coefficients.
int64_t err2;
int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
WIENER_FILT_TAP2_MINV };
@@ -1563,7 +1561,6 @@
}
}
// printf("err post = %"PRId64"\n", err);
-#endif // USE_WIENER_REFINEMENT_SEARCH
return err;
}
@@ -1818,6 +1815,24 @@
return rsi->units_per_tile;
}
+static INLINE void av1_derive_flags_for_lr_processing(
+ const LOOP_FILTER_SPEED_FEATURES *lpf_sf, bool *disable_lr_filter) {
+ const bool is_wiener_disabled = lpf_sf->disable_wiener_filter;
+ const bool is_sgr_disabled = lpf_sf->disable_sgr_filter;
+
+ // Enable None Loop restoration filter if either of Wiener or Self-guided is
+ // enabled.
+ disable_lr_filter[RESTORE_NONE] = (is_wiener_disabled && is_sgr_disabled);
+
+ disable_lr_filter[RESTORE_WIENER] = is_wiener_disabled;
+ disable_lr_filter[RESTORE_SGRPROJ] = is_sgr_disabled;
+
+ // Enable Swicthable Loop restoration filter if both of the Wiener and
+ // Self-guided are enabled.
+ disable_lr_filter[RESTORE_SWITCHABLE] =
+ (is_wiener_disabled || is_sgr_disabled);
+}
+
void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->td.mb;
@@ -1858,9 +1873,6 @@
RestSearchCtxt rsc;
- // TODO(Diksha): The buffers allocated below are used during Wiener filter
- // processing. Hence, allocate the same when Wiener filter is enabled.
- //
// The buffers 'src_avg' and 'dgd_avg' are used to compute H and M buffers.
// These buffers are required for AVX2 SIMD purpose only. Hence, allocated the
// same if AVX2 variant of SIMD for av1_compute_stats() is enabled. The buffer
@@ -1871,27 +1883,39 @@
rsc.dgd_avg = NULL;
rsc.src_avg = NULL;
#if HAVE_AVX2
- int16_t *buf;
- const int buf_size =
- sizeof(*buf) * 6 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX;
- CHECK_MEM_ERROR(cm, buf, (int16_t *)aom_memalign(32, buf_size));
+ // The buffers allocated below are used during Wiener filter processing of low
+ // bitdepth path. Hence, allocate the same when Wiener filter is enabled in
+ // low bitdepth path.
+ if (!cpi->sf.lpf_sf.disable_wiener_filter &&
+ !cm->seq_params->use_highbitdepth) {
+ const int buf_size = sizeof(*rsc.dgd_avg) * 6 * RESTORATION_UNITSIZE_MAX *
+ RESTORATION_UNITSIZE_MAX;
+ CHECK_MEM_ERROR(cm, rsc.dgd_avg, (int16_t *)aom_memalign(32, buf_size));
- // When LRU width isn't multiple of 16, the 256 bits load instruction used in
- // AVX2 intrinsic can read data beyond valid LRU. Hence, in order to silence
- // Valgrind warning this buffer is initialized with zero. Overhead due to this
- // initialization is negligible since it is done at frame level.
- memset(buf, 0, buf_size);
- rsc.dgd_avg = buf;
- rsc.src_avg = buf + 3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX;
- // Asserts the starting address of src_avg is always 32-bytes aligned.
- assert(!((intptr_t)rsc.src_avg % 32));
+ // When LRU width isn't multiple of 16, the 256 bits load instruction used
+ // in AVX2 intrinsic can read data beyond valid LRU. Hence, in order to
+ // silence Valgrind warning this buffer is initialized with zero. Overhead
+ // due to this initialization is negligible since it is done at frame level.
+ memset(rsc.dgd_avg, 0, buf_size);
+ rsc.src_avg =
+ rsc.dgd_avg + 3 * RESTORATION_UNITSIZE_MAX * RESTORATION_UNITSIZE_MAX;
+ // Asserts the starting address of src_avg is always 32-bytes aligned.
+ assert(!((intptr_t)rsc.src_avg % 32));
+ }
#endif
const int plane_start = AOM_PLANE_Y;
const int plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y;
+
+ // Derive the flags to enable/disable Loop restoration filters based on the
+ // speed features 'disable_wiener_filter' and 'disable_sgr_filter'.
+ bool disable_lr_filter[RESTORE_TYPES] = { false };
+ const LOOP_FILTER_SPEED_FEATURES *lpf_sf = &cpi->sf.lpf_sf;
+ av1_derive_flags_for_lr_processing(lpf_sf, disable_lr_filter);
+
for (int plane = plane_start; plane <= plane_end; ++plane) {
- init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi,
- &cpi->trial_frame_rst, &rsc);
+ init_rsc(src, &cpi->common, x, lpf_sf, plane, rusi, &cpi->trial_frame_rst,
+ &rsc);
const int plane_ntiles = ntiles[plane > 0];
const RestorationType num_rtypes =
@@ -1901,16 +1925,16 @@
RestorationType best_rtype = RESTORE_NONE;
const int highbd = rsc.cm->seq_params->use_highbitdepth;
- if ((plane && !cpi->sf.lpf_sf.disable_loop_restoration_chroma) ||
- (!plane && !cpi->sf.lpf_sf.disable_loop_restoration_luma)) {
+ if ((plane && !lpf_sf->disable_loop_restoration_chroma) ||
+ (!plane && !lpf_sf->disable_loop_restoration_luma)) {
av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
highbd);
for (RestorationType r = 0; r < num_rtypes; ++r) {
- if ((force_restore_type != RESTORE_TYPES) && (r != RESTORE_NONE) &&
- (r != force_restore_type))
- continue;
+ // Disable Loop restoration filter based on the flags set using speed
+ // feature 'disable_wiener_filter' and 'disable_sgr_filter'.
+ if (disable_lr_filter[r]) continue;
double cost = search_rest_type(&rsc, r);
@@ -1922,9 +1946,6 @@
}
cm->rst_info[plane].frame_restoration_type = best_rtype;
- if (force_restore_type != RESTORE_TYPES)
- assert(best_rtype == force_restore_type || best_rtype == RESTORE_NONE);
-
if (best_rtype != RESTORE_NONE) {
for (int u = 0; u < plane_ntiles; ++u) {
copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]);
@@ -1932,7 +1953,10 @@
}
}
#if HAVE_AVX2
- aom_free(buf);
+ if (!cpi->sf.lpf_sf.disable_wiener_filter &&
+ !cm->seq_params->use_highbitdepth) {
+ aom_free(rsc.dgd_avg);
+ }
#endif
aom_free(rusi);
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index e05452a..1447f24 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -484,7 +484,9 @@
sf->intra_sf.chroma_intra_pruning_with_hog = 3;
sf->lpf_sf.use_coarse_filter_level_search = 0;
- sf->lpf_sf.disable_lr_filter = 1;
+ // Disable Wiener and Self-guided Loop restoration filters.
+ sf->lpf_sf.disable_wiener_filter = true;
+ sf->lpf_sf.disable_sgr_filter = true;
sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
@@ -1211,7 +1213,9 @@
frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
: MULTI_WINNER_MODE_OFF;
- sf->lpf_sf.disable_lr_filter = 1;
+ // Disable Self-guided Loop restoration filter.
+ sf->lpf_sf.disable_sgr_filter = true;
+ sf->lpf_sf.disable_wiener_coeff_refine_search = true;
sf->tpl_sf.prune_starting_mv = 3;
sf->tpl_sf.use_y_only_rate_distortion = 1;
@@ -1580,7 +1584,9 @@
sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
sf->tx_sf.model_based_prune_tx_search_level = 0;
sf->lpf_sf.dual_sgr_penalty_level = 1;
- sf->lpf_sf.disable_lr_filter = 1;
+ // Disable Wiener and Self-guided Loop restoration filters.
+ sf->lpf_sf.disable_wiener_filter = true;
+ sf->lpf_sf.disable_sgr_filter = true;
sf->rt_sf.skip_interp_filter_search = 1;
sf->intra_sf.prune_palette_search_level = 2;
sf->intra_sf.prune_luma_palette_size_search_level = 2;
@@ -2110,7 +2116,10 @@
lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
// Set decoder side speed feature to use less dual sgr modes
lpf_sf->dual_sgr_penalty_level = 0;
- lpf_sf->disable_lr_filter = 0;
+ // Enable Wiener and Self-guided Loop restoration filters by default.
+ lpf_sf->disable_wiener_filter = false;
+ lpf_sf->disable_sgr_filter = false;
+ lpf_sf->disable_wiener_coeff_refine_search = false;
lpf_sf->use_downsampled_wiener_stats = 0;
}
@@ -2291,7 +2300,10 @@
(sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
cpi->common.seq_params->enable_dual_filter &=
!sf->interp_sf.disable_dual_filter;
- cpi->common.seq_params->enable_restoration &= !sf->lpf_sf.disable_lr_filter;
+ // Set the flag 'enable_restoration', if one the Loop restoration filters
+ // (i.e., Wiener or Self-guided) is enabled.
+ cpi->common.seq_params->enable_restoration &=
+ (!sf->lpf_sf.disable_wiener_filter || !sf->lpf_sf.disable_sgr_filter);
cpi->common.seq_params->enable_interintra_compound &=
(sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index e4c7c70..13b8903 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1432,8 +1432,14 @@
// Reduce the wiener filter win size for luma
int reduce_wiener_window_size;
- // Disable loop restoration filter
- int disable_lr_filter;
+ // Flag to disable Wiener Loop restoration filter.
+ bool disable_wiener_filter;
+
+ // Flag to disable Self-guided Loop restoration filter.
+ bool disable_sgr_filter;
+
+ // Disable the refinement search around the wiener filter coefficients.
+ bool disable_wiener_coeff_refine_search;
// Whether to downsample the rows in computation of wiener stats.
int use_downsampled_wiener_stats;