AV1 RT: Introducing speed 5 for RT

Speed 5 is the same features as speed 6 with extra step in adjustment of
var based partition. ~40% slowdown and 5% BDRate improvement on lowres
with some clips are more than 10% BDRate improvements

Change-Id: I2d911baee71f04ccdbd52a802c865bb2cc567989
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index c1c740e..2f23751 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2069,7 +2069,7 @@
   }
 
   if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
-       cpi->sf.part_sf.adjust_var_based_rd_partitioning == 2) &&
+       cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) &&
       partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
       (mi_row + bs < mi_params->mi_rows ||
        mi_row + hbs == mi_params->mi_rows) &&
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 59cfd50..81b3f17 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -763,26 +763,13 @@
   if (speed >= 5) {
     sf->hl_sf.recode_loop = ALLOW_RECODE_KFMAXBW;
 
-    sf->part_sf.partition_search_breakout_rate_thr = 300;
-
-    sf->mv_sf.search_method = BIGDIA;
-    sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-
     sf->inter_sf.adaptive_rd_thresh = 4;
     sf->interp_sf.disable_filter_search_var_thresh = 200;
 
-    sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
-    sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
-    sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
-    sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
-    sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
-
     sf->rd_sf.use_fast_coef_costing = 1;
     sf->rd_sf.tx_domain_dist_level = 2;
     sf->rd_sf.tx_domain_dist_thres_level = 2;
-
-    sf->winner_mode_sf.tx_size_search_level = 2;
+    sf->winner_mode_sf.tx_size_search_level = 1;
 
     sf->rt_sf.mode_search_skip_flags =
         (cm->current_frame.frame_type == KEY_FRAME)
@@ -790,9 +777,6 @@
             : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
                   FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
                   FLAG_EARLY_TERMINATE;
-  }
-
-  if (speed >= 6) {
     sf->hl_sf.frame_parameter_update = 0;
 
     sf->part_sf.default_max_partition_size = BLOCK_128X128;
@@ -800,11 +784,12 @@
     sf->part_sf.max_intra_bsize = BLOCK_32X32;
     sf->part_sf.partition_search_breakout_rate_thr = 500;
     sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
-    sf->part_sf.adjust_var_based_rd_partitioning = 1;
+    sf->part_sf.adjust_var_based_rd_partitioning = 2;
 
     sf->mv_sf.search_method = FAST_DIAMOND;
     sf->mv_sf.subpel_force_stop = QUARTER_PEL;
     sf->mv_sf.use_fullpel_costlist = 1;
+    sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
 
     sf->inter_sf.adaptive_mode_search = 2;
     sf->inter_sf.inter_mode_rd_model_estimation = 2;
@@ -821,8 +806,6 @@
     sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
     sf->rd_sf.simple_model_rd_from_var = 1;
 
-    sf->winner_mode_sf.tx_size_search_level = 1;
-
     sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
 
@@ -834,6 +817,10 @@
     sf->rt_sf.use_simple_rd_model = 1;
   }
 
+  if (speed >= 6) {
+    sf->part_sf.adjust_var_based_rd_partitioning = 1;
+  }
+
   if (speed >= 7) {
     sf->hl_sf.frame_parameter_update = 0;
 
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9174df5..e43fb10 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -368,8 +368,9 @@
   BLOCK_SIZE default_max_partition_size;
 
   // Sets level of adjustmet of variace-based partitioning during
-  // rd_use_partition 0 - no partition adjusment 1 - adjust partition for small
-  // blocks and high QP 2 - always adjust partitioning
+  // rd_use_partition 0 - no partition adjusment, 1 - try to merge partitions
+  // for small blocks and high QP, 2 - always try to merge leaf partitions, 3 -
+  // try to merge and split leaf partitions
   int adjust_var_based_rd_partitioning;
 
   // Partition search early breakout thresholds.
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
index 28fade9..5cd0382 100644
--- a/test/rt_end_to_end_test.cc
+++ b/test/rt_end_to_end_test.cc
@@ -32,15 +32,18 @@
 std::unordered_map<std::string,
                    std::unordered_map<int, std::unordered_map<int, double>>>
     kPsnrThreshold = { { "park_joy_90p_8_420.y4m",
-                         { { 6, { { 0, 35.3 }, { 3, 36.2 } } },
+                         { { 5, { { 0, 35.4 }, { 3, 36.4 } } },
+                           { 6, { { 0, 35.3 }, { 3, 36.2 } } },
                            { 7, { { 0, 34.9 }, { 3, 35.8 } } },
                            { 8, { { 0, 35.0 }, { 3, 35.8 } } } } },
                        { "paris_352_288_30.y4m",
-                         { { 6, { { 0, 36.1 }, { 3, 36.6 } } },
+                         { { 5, { { 0, 36.2 }, { 3, 36.7 } } },
+                           { 6, { { 0, 36.1 }, { 3, 36.6 } } },
                            { 7, { { 0, 35.5 }, { 3, 36.0 } } },
                            { 8, { { 0, 36.0 }, { 3, 36.5 } } } } },
                        { "niklas_1280_720_30.y4m",
-                         { { 6, { { 0, 34.2 }, { 3, 34.2 } } },
+                         { { 5, { { 0, 34.6 }, { 3, 34.6 } } },
+                           { 6, { { 0, 34.2 }, { 3, 34.2 } } },
                            { 7, { { 0, 33.7 }, { 3, 33.7 } } },
                            { 8, { { 0, 33.6 }, { 3, 33.4 } } } } } };
 
@@ -159,13 +162,13 @@
 TEST_P(RTEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); }
 
 AV1_INSTANTIATE_TEST_CASE(RTEndToEndTest, ::testing::ValuesIn(kTestVectors),
-                          ::testing::Range(6, 9),
+                          ::testing::Range(5, 9),
                           ::testing::Values<unsigned int>(0, 3),
                           ::testing::Values(1), ::testing::Values(1));
 
 AV1_INSTANTIATE_TEST_CASE(RTEndToEndTestThreaded,
                           ::testing::ValuesIn(kTestVectors),
-                          ::testing::Range(6, 9),
+                          ::testing::Range(5, 9),
                           ::testing::Values<unsigned int>(0, 3),
                           ::testing::Range(2, 5), ::testing::Range(2, 5));
 }  // namespace