Extend prune_sgr_based_on_wiener for preset 2

Extended the speed feature 'prune_sgr_based_on_wiener' for speed preset
2. Pruning is based on the rdcost ratio of RESTORE_WIENER and
RESTORE_NONE.

The speed feature has been enabled for non-screen contents.

          Instruction Count
cpu-used       Reduction        Quality Loss
   2             1.36%              0.01%

STATS_CHANGED

Change-Id: I8ada2a62a039f4df33c0ba544be34add00dfae80
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index e371b0e..520649bd 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -862,8 +862,7 @@
   const int bit_depth = cm->seq_params.bit_depth;
 
   const int64_t bits_none = x->sgrproj_restore_cost[0];
-  // Prune evaluation of RESTORE_SGRPROJ if RESTORE_NONE was the winner (no loop
-  // restoration)
+  // Prune evaluation of RESTORE_SGRPROJ if 'skip_sgr_eval' is set
   if (rusi->skip_sgr_eval) {
     rsc->bits += bits_none;
     rsc->sse += rusi->sse[RESTORE_NONE];
@@ -1478,7 +1477,7 @@
     rsc->sse += rusi->sse[RESTORE_NONE];
     rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
     rusi->sse[RESTORE_WIENER] = INT64_MAX;
-    if (rsc->sf->prune_sgr_based_on_wiener) rusi->skip_sgr_eval = 1;
+    if (rsc->sf->prune_sgr_based_on_wiener == 2) rusi->skip_sgr_eval = 1;
     return;
   }
 
@@ -1497,7 +1496,7 @@
     rsc->sse += rusi->sse[RESTORE_NONE];
     rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE;
     rusi->sse[RESTORE_WIENER] = INT64_MAX;
-    if (rsc->sf->prune_sgr_based_on_wiener) rusi->skip_sgr_eval = 1;
+    if (rsc->sf->prune_sgr_based_on_wiener == 2) rusi->skip_sgr_eval = 1;
     return;
   }
 
@@ -1528,7 +1527,11 @@
       (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
   rusi->best_rtype[RESTORE_WIENER - 1] = rtype;
 
-  if (rsc->sf->prune_sgr_based_on_wiener) {
+  // Set 'skip_sgr_eval' based on rdcost ratio of RESTORE_WIENER and
+  // RESTORE_NONE or based on best_rtype
+  if (rsc->sf->prune_sgr_based_on_wiener == 1) {
+    rusi->skip_sgr_eval = cost_wiener > (1.01 * cost_none);
+  } else if (rsc->sf->prune_sgr_based_on_wiener == 2) {
     rusi->skip_sgr_eval = rusi->best_rtype[RESTORE_WIENER - 1] == RESTORE_NONE;
   }
 
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index be7b4e0..3babf45 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -349,6 +349,7 @@
     sf->gm_erroradv_type = GM_ERRORADV_TR_2;
 
     sf->selective_ref_frame = 3;
+    sf->prune_sgr_based_on_wiener = cm->allow_screen_content_tools ? 0 : 1;
 
     // TODO(chiyotsai@google.com): We can get 10% speed up if we move
     // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
@@ -405,7 +406,7 @@
     sf->enable_winner_mode_for_use_tx_domain_dist =
         cm->allow_screen_content_tools ? 0 : 1;
     sf->reduce_wiener_window_size = is_boosted_arf2_bwd_type ? 0 : 1;
-    sf->prune_sgr_based_on_wiener = cm->allow_screen_content_tools ? 0 : 1;
+    sf->prune_sgr_based_on_wiener = cm->allow_screen_content_tools ? 0 : 2;
     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
     sf->simple_motion_search_prune_agg = 1;
     sf->disable_sb_level_mv_cost_upd = 1;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9dc3226..88e1d03 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -804,6 +804,10 @@
   int disable_loop_restoration_chroma;
 
   // Prune self-guided loop restoration based on wiener search results
+  // 0 : no pruning
+  // 1 : pruning based on rdcost ratio of RESTORE_WIENER and RESTORE_NONE
+  // 2 : pruning based on winner restoration type among RESTORE_WIENER and
+  // RESTORE_NONE
   int prune_sgr_based_on_wiener;
 
   // Reduce the wiener filter win size for luma