Temporal filtering: apply filtering adaptively based on qindex
At low bit rates, a stronger filtering reduces more noise and leads
to a higher rate saving.
At high bit rates, a weaker filtering is demanded to faithfully
encode the frame at a high quality.
Here, we adjust smoothing weight adaptively according to qindex, to
apply a slightly stronger filtering for low bit rates.
Performance on 150 frames, vbr mode:
speed_4 Ovr_psnr ssim vmaf_neg
Lowres -0.206 -0.367 -0.549
Ugc360p -0.329 -0.379 -0.236
Ugc480p -0.494 -0.809 -1.171
Hdres -0.293 -0.344 -0.237
Hdres2 -0.489 -0.503 0.091
speed_1 Ovr_psnr ssim vmaf_neg
Lowres -0.152 -0.269 -0.377
Ugc360p -0.257 -0.249 -0.109
Ugc480p -0.574 -0.890 -0.991
Hdres -0.335 -0.421 -0.217
Hdres2 -0.374 -0.347 0.036
STATS_CHANGED
Change-Id: Iaff961ef3a7fcb729be8d880de14f363a8e6ead9
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 3c9d311..9689e08 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -561,9 +561,16 @@
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
// Decay factors for non-local mean approach.
double decay_factor[MAX_MB_PLANE] = { 0 };
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -871,7 +878,7 @@
frame_to_filter->y_buffer + source_offset, source_y_stride,
cpi->alt_ref_buffer.y_buffer + filter_offset, filter_y_stride, &sse);
diff->sum += sse;
- diff->sse += sse * sse;
+ diff->sse += sse * (int64_t)sse;
}
}
}
diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index 2ae7dd4..d2f728d 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h
@@ -64,6 +64,14 @@
// then the actual threshold will be 720 * 0.1 = 72. Similarly, the threshold
// for 360p videos will be 360 * 0.1 = 36.
#define TF_SEARCH_DISTANCE_THRESHOLD 0.1
+// 6. Threshold to identify if the q is in a relative high range.
+// Above this cutoff q, a stronger filtering is applied.
+// For a high q, the quantization throws away more information, and thus a
+// stronger filtering is less likely to distort the encoded quality, while a
+// stronger filtering could reduce bit rates.
+// Ror a low q, more details are expected to be retained. Filtering is thus
+// more conservative.
+#define TF_QINDEX_CUTOFF 128
#define NOISE_ESTIMATION_EDGE_THRESHOLD 50
diff --git a/av1/encoder/x86/highbd_temporal_filter_avx2.c b/av1/encoder/x86/highbd_temporal_filter_avx2.c
index b5477ec..68509fa 100644
--- a/av1/encoder/x86/highbd_temporal_filter_avx2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_avx2.c
@@ -352,10 +352,16 @@
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -393,6 +399,7 @@
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/av1/encoder/x86/highbd_temporal_filter_sse2.c b/av1/encoder/x86/highbd_temporal_filter_sse2.c
index bbb3771..1bfdaf7 100644
--- a/av1/encoder/x86/highbd_temporal_filter_sse2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_sse2.c
@@ -227,10 +227,16 @@
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -268,6 +274,7 @@
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 72914e1..8aa0764 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -238,10 +238,16 @@
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -277,6 +283,7 @@
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index d70792c..26c3926 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -215,10 +215,16 @@
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -254,6 +260,7 @@
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion