Add a new adaptive max partition selection type

Uses variance of the sb to adaptively choose the thresholds for
max partition decision. For smoother blocks, starting from larger
max partition is favored.
The new mode is turned on for hd content.

About 4-5% speedup at speed 0 (tested on dinner and night).

hdres (20 frames, --end-usage=q) +0.092%, but most of the loss
comes from one video (dinner_1080p).

Change-Id: I85c1e7ef071cebaad7c257980e67b245547b968a
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 08fffb9..fb08a12 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4864,10 +4864,8 @@
         float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
 
         av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
-        max_sq_size = AOMMIN(
-            av1_predict_max_partition(
-                cpi->sf.auto_max_partition_based_on_simple_motion, features),
-            max_sq_size);
+        max_sq_size =
+            AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size);
       }
 
       min_sq_size = AOMMIN(min_sq_size, max_sq_size);
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 292f570..2dace6c 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -19,6 +19,7 @@
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/partition_model_weights.h"
 #include "av1/encoder/partition_strategy.h"
+#include "av1/encoder/rdopt.h"
 
 // Performs a simple_motion_search with a single reference frame and extract
 // the variance of residues. Here features is assumed to be a length 6 array.
@@ -664,20 +665,20 @@
   assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED);
 }
 
-BLOCK_SIZE av1_predict_max_partition(
-    const MAX_PART_PRED_MODE max_part_pred_mode, const float *features) {
+BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x,
+                                     const float *features) {
   float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f },
         probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f };
   const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config;
 
-  assert(max_part_pred_mode != NOT_IN_USE);
+  assert(cpi->sf.auto_max_partition_based_on_simple_motion != NOT_IN_USE);
 
   aom_clear_system_state();
   av1_nn_predict(features, nn_config, scores);
   av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED);
 
   int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1;
-  if (max_part_pred_mode == DIRECT_PRED) {
+  if (cpi->sf.auto_max_partition_based_on_simple_motion == DIRECT_PRED) {
     result = 0;
     float max_prob = probs[0];
     for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) {
@@ -686,7 +687,8 @@
         result = i;
       }
     }
-  } else if (max_part_pred_mode == RELAXED_PRED) {
+  } else if (cpi->sf.auto_max_partition_based_on_simple_motion ==
+             RELAXED_PRED) {
     for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
          --result) {
       if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) {
@@ -694,6 +696,26 @@
       }
       if (probs[result] > 0.2) break;
     }
+  } else if (cpi->sf.auto_max_partition_based_on_simple_motion == ADAPT_PRED) {
+    const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
+    MACROBLOCKD *const xd = &x->e_mbd;
+    // TODO(debargha): x->source_variance is unavailable at this point,
+    // so compute. The redundant recomputation later can be removed.
+    const unsigned int source_variance =
+        is_cur_buf_hbd(xd)
+            ? av1_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size,
+                                                 xd->bd)
+            : av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size);
+    if (source_variance > 16) {
+      const double thresh = source_variance < 128 ? 0.05 : 0.1;
+      for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
+           --result) {
+        if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) {
+          probs[result] += probs[result + 1];
+        }
+        if (probs[result] > thresh) break;
+      }
+    }
   }
 
   return (BLOCK_SIZE)((result + 2) * 3);
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index 401ef48..36b1e95 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h
@@ -70,8 +70,8 @@
                                         float *features);
 
 // Predict the maximum BLOCK_SIZE to be used to encoder the current superblock.
-BLOCK_SIZE av1_predict_max_partition(
-    const MAX_PART_PRED_MODE max_part_pred_mode, const float *features);
+BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x,
+                                     const float *features);
 
 // A simplified version of set_offsets meant to be used for
 // simple_motion_search.
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 231220d..059befb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -99,7 +99,7 @@
   if (is_480p_or_larger) {
     sf->use_square_partition_only_threshold = BLOCK_128X128;
     if (is_720p_or_larger)
-      sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
+      sf->auto_max_partition_based_on_simple_motion = ADAPT_PRED;
     else
       sf->auto_max_partition_based_on_simple_motion = RELAXED_PRED;
   } else {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9284d0e..f0a2324 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -198,7 +198,12 @@
   FULL_PEL
 } UENUM1BYTE(SUBPEL_FORCE_STOP);
 
-enum { NOT_IN_USE, DIRECT_PRED, RELAXED_PRED } UENUM1BYTE(MAX_PART_PRED_MODE);
+enum {
+  NOT_IN_USE,
+  DIRECT_PRED,
+  RELAXED_PRED,
+  ADAPT_PRED
+} UENUM1BYTE(MAX_PART_PRED_MODE);
 
 typedef struct MV_SPEED_FEATURES {
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).