Prune sgr based on wiener search results

Introduced a speed feature 'prune_sgr_based_on_wiener' for pruning
the evaluation of self-guided loop restoration based on wiener search.
The speed feature has been enabled for speed presets >= 3 for non-screen contents.

           Instruction Count
cpu-used       Reduction        Quality Loss
   3              3.80%              0.05%
   4              3.13%              0%

STATS_CHANGED

Change-Id: I8db43db6bbf64ba320aabee7b7d92c250203f704
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 5409f96..e0a4d69 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -857,6 +857,18 @@
   const int highbd = cm->seq_params.use_highbitdepth;
   const int bit_depth = cm->seq_params.bit_depth;
 
+  const int64_t bits_none = x->sgrproj_restore_cost[0];
+  // Prune evaluation of RESTORE_SGRPROJ if RESTORE_NONE was the winner (no loop
+  // restoration)
+  if (rsc->sf->prune_sgr_based_on_wiener &&
+      rusi->best_rtype[RESTORE_WIENER - 1] == RESTORE_NONE) {
+    rsc->bits += bits_none;
+    rsc->sse += rusi->sse[RESTORE_NONE];
+    rusi->best_rtype[RESTORE_SGRPROJ - 1] = RESTORE_NONE;
+    rusi->sse[RESTORE_SGRPROJ] = INT64_MAX;
+    return;
+  }
+
   uint8_t *dgd_start =
       rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start;
   const uint8_t *src_start =
@@ -880,7 +892,6 @@
 
   rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit(rsc, limits, tile, &rui);
 
-  const int64_t bits_none = x->sgrproj_restore_cost[0];
   const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
                            (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj)
                             << AV1_PROB_COST_SHIFT);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index dff9905..e47b7ff 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -405,6 +405,7 @@
     sf->enable_winner_mode_for_use_tx_domain_dist =
         cm->allow_screen_content_tools ? 0 : 1;
     sf->reduce_wiener_window_size = is_boosted_arf2_bwd_type ? 0 : 1;
+    sf->prune_sgr_based_on_wiener = cm->allow_screen_content_tools ? 0 : 1;
     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
     sf->simple_motion_search_prune_agg = 1;
     sf->disable_sb_level_mv_cost_upd = 1;
@@ -842,6 +843,7 @@
   sf->disable_wedge_search_edge_thresh = 0;
   sf->disable_wedge_search_var_thresh = 0;
   sf->disable_loop_restoration_chroma = 0;
+  sf->prune_sgr_based_on_wiener = 0;
   sf->enable_sgr_ep_pruning = 0;
   sf->reduce_wiener_window_size = 0;
   sf->fast_wedge_sign_estimate = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index bfbbef8..9dc3226 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -803,6 +803,9 @@
   // Disable loop restoration for Chroma plane
   int disable_loop_restoration_chroma;
 
+  // Prune self-guided loop restoration based on wiener search results
+  int prune_sgr_based_on_wiener;
+
   // Reduce the wiener filter win size for luma
   int reduce_wiener_window_size;