Add a new adaptive max partition selection type Uses variance of the sb to adaptively choose the thresholds for max partition decision. For smoother blocks, starting from larger max partition is favored. The new mode is turned on for hd content. About 4-5% speedup at speed 0 (tested on dinner and night). hdres (20 frames, --end-usage=q) +0.092%, but most of the loss comes from one video (dinner_1080p). Change-Id: I85c1e7ef071cebaad7c257980e67b245547b968a

commit: 1c141bce538648af70999f51bd0a6f27d1801c3a [log] [tgz]
author: Debargha Mukherjee <debargha@google.com> Thu Mar 21 14:20:56 2019 -0700
committer: Debargha Mukherjee <debargha@google.com> Sun Mar 24 12:10:50 2019 -0700
tree: c5487bf04a40820494b0945831742ab356633bbb
parent: ecea12780382063036e7e3da41c1b70695c9b746 [diff]
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 08fffb9..fb08a12 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -4864,10 +4864,8 @@
         float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
 
         av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
-        max_sq_size = AOMMIN(
-            av1_predict_max_partition(
-                cpi->sf.auto_max_partition_based_on_simple_motion, features),
-            max_sq_size);
+        max_sq_size =
+            AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size);
       }
 
       min_sq_size = AOMMIN(min_sq_size, max_sq_size);

diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 292f570..2dace6c 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c

@@ -19,6 +19,7 @@
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/partition_model_weights.h"
 #include "av1/encoder/partition_strategy.h"
+#include "av1/encoder/rdopt.h"
 
 // Performs a simple_motion_search with a single reference frame and extract
 // the variance of residues. Here features is assumed to be a length 6 array.
@@ -664,20 +665,20 @@
   assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED);
 }
 
-BLOCK_SIZE av1_predict_max_partition(
-    const MAX_PART_PRED_MODE max_part_pred_mode, const float *features) {
+BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x,
+                                     const float *features) {
   float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f },
         probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f };
   const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config;
 
-  assert(max_part_pred_mode != NOT_IN_USE);
+  assert(cpi->sf.auto_max_partition_based_on_simple_motion != NOT_IN_USE);
 
   aom_clear_system_state();
   av1_nn_predict(features, nn_config, scores);
   av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED);
 
   int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1;
-  if (max_part_pred_mode == DIRECT_PRED) {
+  if (cpi->sf.auto_max_partition_based_on_simple_motion == DIRECT_PRED) {
     result = 0;
     float max_prob = probs[0];
     for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) {
@@ -686,7 +687,8 @@
         result = i;
       }
     }
-  } else if (max_part_pred_mode == RELAXED_PRED) {
+  } else if (cpi->sf.auto_max_partition_based_on_simple_motion ==
+             RELAXED_PRED) {
     for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
          --result) {
       if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) {
@@ -694,6 +696,26 @@
       }
       if (probs[result] > 0.2) break;
     }
+  } else if (cpi->sf.auto_max_partition_based_on_simple_motion == ADAPT_PRED) {
+    const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
+    MACROBLOCKD *const xd = &x->e_mbd;
+    // TODO(debargha): x->source_variance is unavailable at this point,
+    // so compute. The redundant recomputation later can be removed.
+    const unsigned int source_variance =
+        is_cur_buf_hbd(xd)
+            ? av1_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size,
+                                                 xd->bd)
+            : av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size);
+    if (source_variance > 16) {
+      const double thresh = source_variance < 128 ? 0.05 : 0.1;
+      for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
+           --result) {
+        if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) {
+          probs[result] += probs[result + 1];
+        }
+        if (probs[result] > thresh) break;
+      }
+    }
   }
 
   return (BLOCK_SIZE)((result + 2) * 3);

diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index 401ef48..36b1e95 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h

@@ -70,8 +70,8 @@
                                         float *features);
 
 // Predict the maximum BLOCK_SIZE to be used to encoder the current superblock.
-BLOCK_SIZE av1_predict_max_partition(
-    const MAX_PART_PRED_MODE max_part_pred_mode, const float *features);
+BLOCK_SIZE av1_predict_max_partition(AV1_COMP *const cpi, MACROBLOCK *const x,
+                                     const float *features);
 
 // A simplified version of set_offsets meant to be used for
 // simple_motion_search.

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 231220d..059befb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -99,7 +99,7 @@
   if (is_480p_or_larger) {
     sf->use_square_partition_only_threshold = BLOCK_128X128;
     if (is_720p_or_larger)
-      sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
+      sf->auto_max_partition_based_on_simple_motion = ADAPT_PRED;
     else
       sf->auto_max_partition_based_on_simple_motion = RELAXED_PRED;
   } else {

diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9284d0e..f0a2324 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h

@@ -198,7 +198,12 @@
   FULL_PEL
 } UENUM1BYTE(SUBPEL_FORCE_STOP);
 
-enum { NOT_IN_USE, DIRECT_PRED, RELAXED_PRED } UENUM1BYTE(MAX_PART_PRED_MODE);
+enum {
+  NOT_IN_USE,
+  DIRECT_PRED,
+  RELAXED_PRED,
+  ADAPT_PRED
+} UENUM1BYTE(MAX_PART_PRED_MODE);
 
 typedef struct MV_SPEED_FEATURES {
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
commit	1c141bce538648af70999f51bd0a6f27d1801c3a	[log] [tgz]
author	Debargha Mukherjee <debargha@google.com>	Thu Mar 21 14:20:56 2019 -0700
committer	Debargha Mukherjee <debargha@google.com>	Sun Mar 24 12:10:50 2019 -0700
tree	c5487bf04a40820494b0945831742ab356633bbb
parent	ecea12780382063036e7e3da41c1b70695c9b746 [diff]