AV1 RT: Introduce speed 9

With this change speed 9 gived overall 5.8% BDRate decrease (4% on low
bitrates) with 30-45% speed up comparing to speed 8.
one rtc (midres) set the BDRate decrease is 14%

Change-Id: I1e53e963c410a7a3ccd284acf128e6e4c2ed61be
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 3863c8a..432009f 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -408,7 +408,7 @@
 #if CONFIG_AV1_ENCODER
 static const arg_def_t cpu_used_av1 =
     ARG_DEF(NULL, "cpu-used", 1,
-            "Speed setting (0..6 in good mode, 6..8 in realtime mode)");
+            "Speed setting (0..6 in good mode, 6..9 in realtime mode)");
 static const arg_def_t rowmtarg =
     ARG_DEF(NULL, "row-mt", 1,
             "Enable row based multi-threading (0: off, 1: on (default))");
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index faa7999..712a0eb 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -423,7 +423,7 @@
   RANGE_CHECK_HI(extra_cfg, ext_tile_debug, 1);
   RANGE_CHECK_HI(extra_cfg, enable_auto_alt_ref, 1);
   RANGE_CHECK_HI(extra_cfg, enable_auto_bwd_ref, 2);
-  RANGE_CHECK(extra_cfg, cpu_used, 0, 8);
+  RANGE_CHECK(extra_cfg, cpu_used, 0, 9);
   RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
   RANGE_CHECK(extra_cfg, superblock_size, AOM_SUPERBLOCK_SIZE_64X64,
               AOM_SUPERBLOCK_SIZE_DYNAMIC);
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index af5f16b..3d45e27 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1621,7 +1621,9 @@
     const int mode_rd_thresh = rd_threshes[mode_index];
 
     // Only check DC for blocks >= 32X32.
-    if (this_mode > 0 && bsize >= BLOCK_32X32) continue;
+    if (this_mode > 0 &&
+        (bsize >= BLOCK_32X32 || cpi->sf.rt_sf.nonrd_intra_dc_only))
+      continue;
 
     if (rd_less_than_thresh(best_rdc->rdcost, mode_rd_thresh,
                             rd_thresh_freq_fact[mode_index]) &&
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 8f66b60..e495d7e 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -279,6 +279,9 @@
       sf->rt_sf.use_modeled_non_rd_cost = 0;
       sf->rt_sf.use_nonrd_filter_search = 0;
     }
+    if (speed >= 9) {
+      sf->rt_sf.use_modeled_non_rd_cost = 1;
+    }
   }
   if (!is_480p_or_larger) {
     if (speed == 7) {
@@ -286,9 +289,12 @@
     }
     if (speed >= 8) {
       sf->mv_sf.subpel_search_method = SUBPEL_TREE;
-
       sf->rt_sf.estimate_motion_for_var_based_partition = 1;
     }
+    if (speed >= 9) {
+      sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
+      sf->rt_sf.estimate_motion_for_var_based_partition = 0;
+    }
   }
 }
 
@@ -903,6 +909,10 @@
     sf->rt_sf.skip_intra_pred_if_tx_skip = 0;
     sf->interp_sf.cb_pred_filter_search = 1;
   }
+  if (speed >= 9) {
+    sf->rt_sf.force_large_partition_blocks = 1;
+    sf->rt_sf.nonrd_intra_dc_only = 1;
+  }
 }
 
 static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 44910e7..2798fce 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -973,6 +973,12 @@
 
   // Check for scene/content change detection on every frame before encoding.
   int check_scene_detection;
+
+  // Forces larger partition blocks in variance based partitioning
+  int force_large_partition_blocks;
+
+  // Only checks intra DCPRED mode in nonrd_pick_inter_mode
+  int nonrd_intra_dc_only;
 } REAL_TIME_SPEED_FEATURES;
 
 typedef struct SPEED_FEATURES {
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index b3fc0e3..4b94a29 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -403,6 +403,11 @@
     } else {
       thresholds[2] = (5 * threshold_base) >> 1;
     }
+    if (cpi->sf.rt_sf.force_large_partition_blocks) {
+      thresholds[1] <<= 2;
+      thresholds[2] <<= 3;
+      thresholds[3] <<= 2;
+    }
   }
 }