Temporal filtering: apply filtering adaptively based on qindex At low bit rates, a stronger filtering reduces more noise and leads to a higher rate saving. At high bit rates, a weaker filtering is demanded to faithfully encode the frame at a high quality. Here, we adjust smoothing weight adaptively according to qindex, to apply a slightly stronger filtering for low bit rates. Performance on 150 frames, vbr mode: speed_4 Ovr_psnr ssim vmaf_neg Lowres -0.206 -0.367 -0.549 Ugc360p -0.329 -0.379 -0.236 Ugc480p -0.494 -0.809 -1.171 Hdres -0.293 -0.344 -0.237 Hdres2 -0.489 -0.503 0.091 speed_1 Ovr_psnr ssim vmaf_neg Lowres -0.152 -0.269 -0.377 Ugc360p -0.257 -0.249 -0.109 Ugc480p -0.574 -0.890 -0.991 Hdres -0.335 -0.421 -0.217 Hdres2 -0.374 -0.347 0.036 STATS_CHANGED Change-Id: Iaff961ef3a7fcb729be8d880de14f363a8e6ead9

commit: cea1c66479e32792fba4c927efd859aa0beaea17 [log] [tgz]
author: Cheng Chen <chengchen@google.com> Tue Apr 06 23:02:51 2021 -0700
committer: Cheng Chen <chengchen@google.com> Tue Apr 13 17:18:08 2021 +0000
tree: ab354a763ddcdce5f3a7b234f6a7374a311815d5
parent: 23a7cb3db94d00a9672843056479523d603de394 [diff]
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 3c9d311..9689e08 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c

@@ -561,9 +561,16 @@
       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
   // Decay factors for non-local mean approach.
   double decay_factor[MAX_MB_PLANE] = { 0 };
-  // Smaller q -> smaller filtering weight.
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
   q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
   // Smaller strength -> smaller filtering weight.
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
@@ -871,7 +878,7 @@
           frame_to_filter->y_buffer + source_offset, source_y_stride,
           cpi->alt_ref_buffer.y_buffer + filter_offset, filter_y_stride, &sse);
       diff->sum += sse;
-      diff->sse += sse * sse;
+      diff->sse += sse * (int64_t)sse;
     }
   }
 }

diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index 2ae7dd4..d2f728d 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h

@@ -64,6 +64,14 @@
 //    then the actual threshold will be 720 * 0.1 = 72. Similarly, the threshold
 //    for 360p videos will be 360 * 0.1 = 36.
 #define TF_SEARCH_DISTANCE_THRESHOLD 0.1
+// 6. Threshold to identify if the q is in a relative high range.
+//    Above this cutoff q, a stronger filtering is applied.
+//    For a high q, the quantization throws away more information, and thus a
+//    stronger filtering is less likely to distort the encoded quality, while a
+//    stronger filtering could reduce bit rates.
+//    Ror a low q, more details are expected to be retained. Filtering is thus
+//    more conservative.
+#define TF_QINDEX_CUTOFF 128
 
 #define NOISE_ESTIMATION_EDGE_THRESHOLD 50
 

diff --git a/av1/encoder/x86/highbd_temporal_filter_avx2.c b/av1/encoder/x86/highbd_temporal_filter_avx2.c
index b5477ec..68509fa 100644
--- a/av1/encoder/x86/highbd_temporal_filter_avx2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_avx2.c

@@ -352,10 +352,16 @@
                                    TF_SEARCH_ERROR_NORM_WEIGHT);
   const double weight_factor =
       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
-  // Decay factors for non-local mean approach.
-  // Smaller q -> smaller filtering weight.
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
   q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
   // Smaller strength -> smaller filtering weight.
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
@@ -393,6 +399,7 @@
     const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
     // Larger noise -> larger filtering weight.
     const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+    // Decay factors for non-local mean approach.
     const double decay_factor = 1 / (n_decay * q_decay * s_decay);
 
     // Filter U-plane and V-plane using Y-plane. This is because motion

diff --git a/av1/encoder/x86/highbd_temporal_filter_sse2.c b/av1/encoder/x86/highbd_temporal_filter_sse2.c
index bbb3771..1bfdaf7 100644
--- a/av1/encoder/x86/highbd_temporal_filter_sse2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_sse2.c

@@ -227,10 +227,16 @@
                                    TF_SEARCH_ERROR_NORM_WEIGHT);
   const double weight_factor =
       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
-  // Decay factors for non-local mean approach.
-  // Smaller q -> smaller filtering weight.
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
   q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
   // Smaller strength -> smaller filtering weight.
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
@@ -268,6 +274,7 @@
     const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
     // Larger noise -> larger filtering weight.
     const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+    // Decay factors for non-local mean approach.
     const double decay_factor = 1 / (n_decay * q_decay * s_decay);
 
     // Filter U-plane and V-plane using Y-plane. This is because motion

diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 72914e1..8aa0764 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c

@@ -238,10 +238,16 @@
                                    TF_SEARCH_ERROR_NORM_WEIGHT);
   const double weight_factor =
       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
-  // Decay factors for non-local mean approach.
-  // Smaller q -> smaller filtering weight.
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
   q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
   // Smaller strength -> smaller filtering weight.
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
@@ -277,6 +283,7 @@
     const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
     // Larger noise -> larger filtering weight.
     const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+    // Decay factors for non-local mean approach.
     const double decay_factor = 1 / (n_decay * q_decay * s_decay);
 
     // Filter U-plane and V-plane using Y-plane. This is because motion

diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index d70792c..26c3926 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c

@@ -215,10 +215,16 @@
                                    TF_SEARCH_ERROR_NORM_WEIGHT);
   const double weight_factor =
       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
-  // Decay factors for non-local mean approach.
-  // Smaller q -> smaller filtering weight.
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
   q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
   // Smaller strength -> smaller filtering weight.
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
@@ -254,6 +260,7 @@
     const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
     // Larger noise -> larger filtering weight.
     const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+    // Decay factors for non-local mean approach.
     const double decay_factor = 1 / (n_decay * q_decay * s_decay);
 
     // Filter U-plane and V-plane using Y-plane. This is because motion
commit	cea1c66479e32792fba4c927efd859aa0beaea17	[log] [tgz]
author	Cheng Chen <chengchen@google.com>	Tue Apr 06 23:02:51 2021 -0700
committer	Cheng Chen <chengchen@google.com>	Tue Apr 13 17:18:08 2021 +0000
tree	ab354a763ddcdce5f3a7b234f6a7374a311815d5
parent	23a7cb3db94d00a9672843056479523d603de394 [diff]