Modify the temporal filter strength

Modify the temporal filter strength and the related logics to
improve the temporal consistency.

BUG=b/410661330

Change-Id: I9661aeb6b402fdcc1a5235ac63c74066cc07e57a
diff --git a/av1/encoder/gop_structure.c b/av1/encoder/gop_structure.c
index 308290f..15ec0eb 100644
--- a/av1/encoder/gop_structure.c
+++ b/av1/encoder/gop_structure.c
@@ -858,6 +858,10 @@
     first_frame_update_type = GF_UPDATE;
   }
 
+  if (cpi->oxcf.algo_cfg.sharpness == 3)
+    gf_group->max_layer_depth_allowed =
+        AOMMIN(gf_group->max_layer_depth_allowed, 2);
+
   gf_group->size = construct_multi_layer_gf_structure(
       cpi, twopass, gf_group, rc, frame_info, p_rc->baseline_gf_interval,
       first_frame_update_type);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c8209dc..5788df6 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -611,41 +611,89 @@
   }
 }
 
-static void get_variance_stats(const AV1_COMP *cpi, const MACROBLOCK *x,
-                               int num_planes, int64_t *src_var,
+static void get_variance_stats(const MACROBLOCK *x, int64_t *src_var,
                                int64_t *rec_var) {
   const MACROBLOCKD *xd = &x->e_mbd;
   const MB_MODE_INFO *mbmi = xd->mi[0];
+  const struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
+  const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
 
-  DECLARE_ALIGNED(16, uint8_t, dclevel[MAX_SB_SQUARE]);
-  memset(dclevel, 128, sizeof(dclevel));
-  int dclevel_stride = block_size_wide[mbmi->bsize];
+  BLOCK_SIZE bsize = mbmi->bsize;
+  int bw = block_size_wide[bsize];
+  int bh = block_size_high[bsize];
 
-  *src_var = 0;
-  *rec_var = 0;
+  const int gau_filter[3][3] = {
+    { 1, 2, 1 },
+    { 2, 4, 2 },
+    { 1, 2, 1 },
+  };
 
-  for (int plane = 0; plane < num_planes; ++plane) {
-    if (plane && !xd->is_chroma_ref) break;
+  DECLARE_ALIGNED(16, uint8_t, dclevel[(MAX_SB_SIZE + 2) * (MAX_SB_SIZE + 2)]);
+  uint8_t *pred_ptr = &dclevel[bw + 1];
+  int pred_stride = xd->plane[0].dst.stride;
 
-    const struct macroblock_plane *const p = &x->plane[plane];
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    const BLOCK_SIZE bs =
-        get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
-    unsigned int sse;
+  for (int idy = -1; idy < bh + 1; ++idy) {
+    for (int idx = -1; idx < bw + 1; ++idx) {
+      int offset_idy = idy;
+      int offset_idx = idx;
+      if (idy == -1) offset_idy = 0;
+      if (idy == bh) offset_idy = bh - 1;
+      if (idx == -1) offset_idx = 0;
+      if (idx == bw) offset_idx = bw - 1;
 
-    int64_t var = cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, dclevel,
-                                          dclevel_stride, &sse);
-
-    *src_var += var;
-
-    var = cpi->ppi->fn_ptr[bs].vf(pd->dst.buf, pd->dst.stride, dclevel,
-                                  dclevel_stride, &sse);
-
-    *rec_var += var;
+      int offset = offset_idy * pred_stride + offset_idx;
+      pred_ptr[idy * bw + idx] = pd->dst.buf[offset];
+    }
   }
 
-  *src_var <<= 4;
+  *rec_var = 0;
+  for (int idy = 0; idy < bh; ++idy) {
+    for (int idx = 0; idx < bw; ++idx) {
+      int sum = 0;
+      for (int iy = 0; iy < 3; ++iy)
+        for (int ix = 0; ix < 3; ++ix)
+          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
+                 gau_filter[iy][ix];
+
+      sum = sum >> 4;
+
+      int diff = pred_ptr[idy * bw + idx] - sum;
+      *rec_var += diff * diff;
+    }
+  }
   *rec_var <<= 4;
+
+  int src_stride = p->src.stride;
+  for (int idy = -1; idy < bh + 1; ++idy) {
+    for (int idx = -1; idx < bw + 1; ++idx) {
+      int offset_idy = idy;
+      int offset_idx = idx;
+      if (idy == -1) offset_idy = 0;
+      if (idy == bh) offset_idy = bh - 1;
+      if (idx == -1) offset_idx = 0;
+      if (idx == bw) offset_idx = bw - 1;
+
+      int offset = offset_idy * src_stride + offset_idx;
+      pred_ptr[idy * bw + idx] = p->src.buf[offset];
+    }
+  }
+
+  *src_var = 0;
+  for (int idy = 0; idy < bh; ++idy) {
+    for (int idx = 0; idx < bw; ++idx) {
+      int sum = 0;
+      for (int iy = 0; iy < 3; ++iy)
+        for (int ix = 0; ix < 3; ++ix)
+          sum += pred_ptr[(idy + iy - 1) * bw + (idx + ix - 1)] *
+                 gau_filter[iy][ix];
+
+      sum = sum >> 4;
+
+      int diff = pred_ptr[idy * bw + idx] - sum;
+      *src_var += diff * diff;
+    }
+  }
+  *src_var <<= 4;
 }
 
 static void adjust_rdcost(const AV1_COMP *cpi, const MACROBLOCK *x,
@@ -655,7 +703,7 @@
   if (frame_is_kf_gf_arf(cpi)) return;
 
   int64_t src_var, rec_var;
-  get_variance_stats(cpi, x, 1, &src_var, &rec_var);
+  get_variance_stats(x, &src_var, &rec_var);
 
   if (src_var <= rec_var) return;
 
@@ -673,7 +721,7 @@
   if (frame_is_kf_gf_arf(cpi)) return;
 
   int64_t src_var, rec_var;
-  get_variance_stats(cpi, x, 1, &src_var, &rec_var);
+  get_variance_stats(x, &src_var, &rec_var);
 
   if (src_var <= rec_var) return;
 
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index feb6a67..92fd87f 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -969,7 +969,7 @@
         filter_strength = AOMMIN(filter_strength, 1);
 
       if (cpi->oxcf.algo_cfg.sharpness == 3 && is_low_cntras)
-        filter_strength = AOMMIN(filter_strength, 1);
+        filter_strength = AOMMIN(filter_strength, 3);
 
       // Perform weighted averaging.
       if (frame == filter_frame_idx) {  // Frame to be filtered.