Rework the less_rectangular_check speed feature

Add a new level to less_rectangular_check, which is less aggressive
and can be used at speed 0 and 1.

Tested encoding speed over 15 sequences with QP=40.
Overall speed gains:
4.8% for speed 0; 2.9% for speed 1.

Compression performance(30 frames, ovr_psnr):
             lowres       midres
speed 0:      0.00%        0.00%
speed 1:     -0.01%       -0.02%

STATS_CHANGED

Change-Id: I974111393b8a13baf5f594539f85ec48ab3cd2fb
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index ae87427..3e714b5 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3511,10 +3511,11 @@
         best_rdc = sum_rdc;
         pc_tree->partitioning = PARTITION_SPLIT;
       }
-    } else if (cpi->sf.less_rectangular_check) {
+    } else if (cpi->sf.less_rectangular_check_level > 0) {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      do_rectangular_split &= !partition_none_allowed;
+      if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
+        do_rectangular_split &= !partition_none_allowed;
     }
 
     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 68da931..1e97bfb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -175,6 +175,7 @@
   sf->inter_mode_rd_model_estimation = 1;
   sf->prune_ref_frame_for_rect_partitions =
       !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame);
+  sf->less_rectangular_check_level = 1;
 
   if (speed >= 1) {
     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
@@ -227,7 +228,7 @@
 
   if (speed >= 3) {
     sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
-    sf->less_rectangular_check = 1;
+    sf->less_rectangular_check_level = 2;
     sf->mode_skip_start = 10;
     sf->adaptive_pred_interp_filter = 1;
     // adaptive_motion_search breaks encoder multi-thread tests.
@@ -416,7 +417,7 @@
   sf->tx_type_search.fast_inter_tx_type_search = 0;
   sf->tx_type_search.skip_tx_search = 0;
   sf->selective_ref_frame = 0;
-  sf->less_rectangular_check = 0;
+  sf->less_rectangular_check_level = 0;
   sf->use_square_partition_only_threshold = BLOCK_128X128;
   sf->prune_ref_frame_for_rect_partitions = 0;
   sf->auto_min_max_partition_size = NOT_IN_USE;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 408663c..2f3c574 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -413,8 +413,9 @@
   int mode_pruning_based_on_two_pass_partition_search;
 
   // Skip rectangular partition test when partition type none gives better
-  // rd than partition type split.
-  int less_rectangular_check;
+  // rd than partition type split. Can take values 0 - 2, 0 referring to no
+  // skipping, and 1 - 2 increasing aggressiveness of skipping in order.
+  int less_rectangular_check_level;
 
   // Use square partition only beyond this block size.
   BLOCK_SIZE use_square_partition_only_threshold;