Introduce simple_motion_search_early_term_none

This model uses features from simple_motion_search and the rd results
of PARTITION_NONE to decide whether we want to terminate early.
Currently this is only enabled on speed >= 1. Benchmark on speed 0
incoming.

Performance:
  AVG_PSNR | SPEED_UP | SPEED_UP:AVG_PSNR
   0.004%  |  1.717%  |   459.21:1

Performance is evaluated on speed 1 over 30 frames.
Quality is measured on midres set without sintel_trailer. Speed is
measured using instruction counts on 5 midres clips over 4 bitrates.

STATS_CHANGED

Change-Id: I96547c30261d3de793e10a97380b4e51468112dc
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 5a5f63d..9180ac1 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2814,6 +2814,7 @@
 
 // split_score indicates confidence of picking split partition;
 // none_score indicates confidence of picking none partition;
+#define FEATURE_SIZE 19
 static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
                                           BLOCK_SIZE bsize, int *split_score,
                                           int *none_score) {
@@ -3596,7 +3597,7 @@
     AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
     int mi_col, BLOCK_SIZE bsize, float *features) {
   // TODO(chiyotsai@google.com): Cache the result of the motion search from the
-  // larger bbsize.
+  // larger bsize.
   const int w_mi = mi_size_wide[bsize];
   const int h_mi = mi_size_high[bsize];
   int f_idx = 0;
@@ -3689,12 +3690,13 @@
     int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
     int *partition_horz_allowed, int *partition_vert_allowed,
     int *do_square_split, int *do_rectangular_split, int *prune_horz,
-    int *prune_vert) {
+    int *prune_vert, float *features, int *valid) {
   const AV1_COMMON *const cm = &cpi->common;
   // Get model parameters
   const NN_CONFIG *nn_config = NULL;
   const float *prune_thresh = NULL, *only_thresh = NULL;
   const float *ml_mean = NULL, *ml_std = NULL;
+  float normalized_features[NUM_FEATURES] = { 0.0f };
 
   if (bsize == BLOCK_128X128) {
     nn_config = &av1_simple_motion_search_prune_part_nn_config_128;
@@ -3740,11 +3742,12 @@
   }
 
   // Get features
-  float features[NUM_FEATURES] = { 0.0f };
   simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
                                            bsize, features);
+  *valid = 1;
   for (int f_idx = 0; f_idx < NUM_FEATURES; f_idx++) {
-    features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx];
+    normalized_features[f_idx] =
+        (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx];
   }
 
   // Get probabilities
@@ -3752,7 +3755,7 @@
   const int num_classes =
       (bsize == BLOCK_128X128 || bsize == BLOCK_8X8) ? 4 : 10;
 
-  av1_nn_predict(features, nn_config, scores);
+  av1_nn_predict(normalized_features, nn_config, scores);
   aom_clear_system_state();
 
   av1_nn_softmax(scores, probs, num_classes);
@@ -3916,9 +3919,83 @@
 }
 #undef MAX_NUM_CLASSES
 
-// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
+// Early terminates PARTITION_NONE using simple_motion_search features and the
+// rate, distortion, and rdcost of PARTITION_NONE. This is only called when:
+//  - The frame is a show frame
+//  - The frame is not intra only
+//  - The current bsize is > BLOCK_8X8
+//  - blk_row + blk_height/2 < total_rows and blk_col + blk_width/2 < total_cols
+#define NUM_FEATURES 28
+static void av1_simple_motion_search_early_term_none(
+    AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
+    int mi_col, BLOCK_SIZE bsize, const RD_STATS *none_rdc,
+    int *early_terminate, float *simple_motion_features,
+    int *simple_motion_features_are_valid) {
+  // TODO(chiyotsai@google.com): There are other features we can extract from
+  // PARTITION_NONE. Play with this later.
+  int f_idx = 0;
+  if (!*simple_motion_features_are_valid) {
+    simple_motion_search_prune_part_features(cpi, x, pc_tree, mi_row, mi_col,
+                                             bsize, simple_motion_features);
+    *simple_motion_features_are_valid = 1;
+  }
+  f_idx = 25;
+
+  simple_motion_features[f_idx++] = logf(1.0f + (float)none_rdc->rate);
+  simple_motion_features[f_idx++] = logf(1.0f + (float)none_rdc->dist);
+  simple_motion_features[f_idx++] = logf(1.0f + (float)none_rdc->rdcost);
+
+  assert(f_idx == NUM_FEATURES);
+
+  const float *ml_mean = NULL;
+  const float *ml_std = NULL;
+  const float *ml_model = NULL;
+
+  if (bsize == BLOCK_128X128) {
+    ml_mean = av1_simple_motion_search_term_none_mean_128;
+    ml_std = av1_simple_motion_search_term_none_std_128;
+    ml_model = av1_simple_motion_search_term_none_model_128;
+  } else if (bsize == BLOCK_64X64) {
+    ml_mean = av1_simple_motion_search_term_none_mean_64;
+    ml_std = av1_simple_motion_search_term_none_std_64;
+    ml_model = av1_simple_motion_search_term_none_model_64;
+  } else if (bsize == BLOCK_32X32) {
+    ml_mean = av1_simple_motion_search_term_none_mean_32;
+    ml_std = av1_simple_motion_search_term_none_std_32;
+    ml_model = av1_simple_motion_search_term_none_model_32;
+  } else if (bsize == BLOCK_16X16) {
+    ml_mean = av1_simple_motion_search_term_none_mean_16;
+    ml_std = av1_simple_motion_search_term_none_std_16;
+    ml_model = av1_simple_motion_search_term_none_model_16;
+  } else if (bsize == BLOCK_8X8) {
+    ml_mean = av1_simple_motion_search_term_none_mean_8;
+    ml_std = av1_simple_motion_search_term_none_std_8;
+    ml_model = av1_simple_motion_search_term_none_model_8;
+  } else {
+    assert(0 && "Unexpected block size in simple_motion_term_none");
+  }
+
+  if (ml_model) {
+    float score = 0.0f;
+    for (f_idx = 0; f_idx < NUM_FEATURES; f_idx++) {
+      score += ml_model[f_idx] *
+               (simple_motion_features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx];
+    }
+    score += ml_model[NUM_FEATURES];
+
+    if (score >= 0.0f) {
+      *early_terminate = 1;
+    }
+  }
+}
+#undef NUM_FEATURES
+
+// TODO(jinging,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
+// TODO(chiyotsai@google.com): Move these ml related varables to a seprate file
+// to separate low level ml logic from partition logic
+#define NUM_SIMPLE_MOTION_FEATURES 28
 static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
                               TileDataEnc *tile_data, TOKENEXTRA **tp,
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -3952,6 +4029,7 @@
   int64_t vert_rd[2] = { 0, 0 };
   int prune_horz = 0;
   int prune_vert = 0;
+  int terminate_partition_search = 0;
 
   int split_ctx_is_ready[2] = { 0, 0 };
   int horz_ctx_is_ready = 0;
@@ -4200,11 +4278,15 @@
        (prune_horz && prune_vert)) &&
       (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8;
 
+  float simple_motion_features[NUM_SIMPLE_MOTION_FEATURES] = { 0.0f };
+  int simple_motion_features_are_valid = 0;
+
   if (try_prune_rect) {
     simple_motion_search_prune_part(
         cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
         &partition_horz_allowed, &partition_vert_allowed, &do_square_split,
-        &do_rectangular_split, &prune_horz, &prune_vert);
+        &do_rectangular_split, &prune_horz, &prune_vert, simple_motion_features,
+        &simple_motion_features_are_valid);
   }
 
 BEGIN_PARTITION_SEARCH:
@@ -4214,6 +4296,7 @@
                              cpi->oxcf.enable_rect_partitions;
     partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
                              cpi->oxcf.enable_rect_partitions;
+    terminate_partition_search = 0;
   }
 
   // Partition block source pixel variance.
@@ -4235,7 +4318,8 @@
 
   // PARTITION_NONE
   if (is_eq_min_sq_part) partition_none_allowed = 1;
-  if (partition_none_allowed && !is_gt_max_sq_part) {
+  if (!terminate_partition_search && partition_none_allowed &&
+      !is_gt_max_sq_part) {
     int pt_cost = 0;
     if (bsize_at_least_8x8) {
       pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
@@ -4326,6 +4410,18 @@
           }
         }
 
+        if (cpi->sf.simple_motion_search_early_term_none && cm->show_frame &&
+            !frame_is_intra_only(cm) && bsize >= BLOCK_8X8 &&
+            mi_row + mi_step < cm->mi_rows && mi_col + mi_step < cm->mi_cols &&
+            this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
+            this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
+            (do_square_split || do_rectangular_split)) {
+          av1_simple_motion_search_early_term_none(
+              cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc,
+              &terminate_partition_search, simple_motion_features,
+              &simple_motion_features_are_valid);
+        }
+
 #if CONFIG_FP_MB_STATS
         // Check if every 16x16 first pass block statistics has zero
         // motion and the corresponding first pass residue is small enough.
@@ -4382,7 +4478,7 @@
 
   // PARTITION_SPLIT
   if (is_eq_min_sq_part) do_square_split = 0;
-  if (do_square_split || is_gt_max_sq_part) {
+  if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
     subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
     sum_rdc.rate = partition_cost[PARTITION_SPLIT];
@@ -4561,7 +4657,7 @@
 
   // PARTITION_HORZ
   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed));
-  if (partition_horz_allowed && !prune_horz &&
+  if (!terminate_partition_search && partition_horz_allowed && !prune_horz &&
       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
@@ -4640,7 +4736,7 @@
 
   // PARTITION_VERT
   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed));
-  if (partition_vert_allowed && !prune_vert &&
+  if (!terminate_partition_search && partition_vert_allowed && !prune_vert &&
       (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) &&
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
@@ -4833,7 +4929,8 @@
   }
 
   // PARTITION_HORZ_A
-  if (partition_horz_allowed && horza_partition_allowed && !is_gt_max_sq_part) {
+  if (!terminate_partition_search && partition_horz_allowed &&
+      horza_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
     pc_tree->horizontala[0].rd_mode_is_ready = 0;
     pc_tree->horizontala[1].rd_mode_is_ready = 0;
@@ -4910,7 +5007,8 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
   }
   // PARTITION_HORZ_B
-  if (partition_horz_allowed && horzb_partition_allowed && !is_gt_max_sq_part) {
+  if (!terminate_partition_search && partition_horz_allowed &&
+      horzb_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
     pc_tree->horizontalb[0].rd_mode_is_ready = 0;
     pc_tree->horizontalb[1].rd_mode_is_ready = 0;
@@ -4982,7 +5080,8 @@
   }
 
   // PARTITION_VERT_A
-  if (partition_vert_allowed && verta_partition_allowed && !is_gt_max_sq_part) {
+  if (!terminate_partition_search && partition_vert_allowed &&
+      verta_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
     pc_tree->verticala[0].rd_mode_is_ready = 0;
     pc_tree->verticala[1].rd_mode_is_ready = 0;
@@ -5050,7 +5149,8 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
   }
   // PARTITION_VERT_B
-  if (partition_vert_allowed && vertb_partition_allowed && !is_gt_max_sq_part) {
+  if (!terminate_partition_search && partition_vert_allowed &&
+      vertb_partition_allowed && !is_gt_max_sq_part) {
     subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
     pc_tree->verticalb[0].rd_mode_is_ready = 0;
     pc_tree->verticalb[1].rd_mode_is_ready = 0;
@@ -5157,7 +5257,7 @@
 
   // PARTITION_HORZ_4
   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
-  if (partition_horz4_allowed && has_rows &&
+  if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
@@ -5208,7 +5308,7 @@
 
   // PARTITION_VERT_4
   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed));
-  if (partition_vert4_allowed && has_cols &&
+  if (!terminate_partition_search && partition_vert4_allowed && has_cols &&
       (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) &&
       !is_gt_max_sq_part) {
     av1_init_rd_stats(&sum_rdc);
@@ -5302,6 +5402,7 @@
     assert(tp_orig == *tp);
   }
 }
+#undef NUM_SIMPLE_MOTION_FEATURES
 
 // Set all the counters as max.
 static void init_first_partition_pass_stats_tables(
@@ -5788,7 +5889,9 @@
     PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
     pc_root->index = 0;
 
-    if (sf->simple_motion_search_prune_rect && !frame_is_intra_only(cm)) {
+    if ((sf->simple_motion_search_prune_rect ||
+         sf->simple_motion_search_early_term_none) &&
+        !frame_is_intra_only(cm)) {
       init_simple_motion_search_mvs(pc_root);
     }
 
diff --git a/av1/encoder/partition_model_weights.h b/av1/encoder/partition_model_weights.h
index d3e50cb..318831e 100644
--- a/av1/encoder/partition_model_weights.h
+++ b/av1/encoder/partition_model_weights.h
@@ -4182,6 +4182,7 @@
   0.083288f,  0.000000f,  0.143105f, 0.438012f, 0.073238f,
   0.000000f,  -0.278137f, 0.186134f, 0.073737f, -1.6494962f,
 };
+#undef FEATURE_SIZE
 
 // nn model for predicting max square partition level of a superblock
 #define NUM_HIDDEN_LAYERS 1
@@ -4395,6 +4396,124 @@
 #undef NUM_LAYER_0_UNITS
 #undef NUM_LOGITS
 
+static const float av1_simple_motion_search_term_none_mean_128[28] = {
+  12.661922f, 12.638062f, 10.896497f, 10.865719f, 10.978963f, 10.940105f,
+  11.012235f, 10.972760f, 11.069924f, 11.018533f, 11.773865f, 11.747426f,
+  11.891315f, 11.858107f, 11.793916f, 11.766356f, 11.874997f, 11.840164f,
+  5.940535f,  0.770746f,  4.292692f,  4.309581f,  0.848423f,  4.292334f,
+  4.298179f,  8.514713f,  14.911736f, 19.825352f,
+};
+
+static const float av1_simple_motion_search_term_none_std_128[28] = {
+  1.796731f, 1.797056f, 1.898383f, 1.900753f, 1.846624f, 1.846953f, 1.906632f,
+  1.908089f, 1.836533f, 1.835967f, 1.840262f, 1.840671f, 1.816836f, 1.817103f,
+  1.879846f, 1.881333f, 1.803102f, 1.802654f, 2.263402f, 0.420354f, 1.117165f,
+  1.083779f, 0.358611f, 1.101183f, 1.084938f, 2.462638f, 1.577009f, 1.574711f,
+};
+
+static const float av1_simple_motion_search_term_none_mean_64[28] = {
+  10.904455f, 10.853546f, 9.247903f,  9.184479f,  9.251985f,  9.186686f,
+  9.253490f,  9.190190f,  9.270079f,  9.204357f,  10.086511f, 10.031060f,
+  10.100875f, 10.045429f, 10.069688f, 10.013173f, 10.082980f, 10.024640f,
+  4.888378f,  0.878113f,  3.598450f,  3.628491f,  0.925833f,  3.560971f,
+  3.573322f,  8.807137f,  13.348477f, 18.269117f,
+};
+
+static const float av1_simple_motion_search_term_none_std_64[28] = {
+  1.789300f, 1.787061f, 1.823519f, 1.820226f, 1.794643f, 1.788620f, 1.797194f,
+  1.795135f, 1.777795f, 1.773634f, 1.794000f, 1.790377f, 1.772197f, 1.769692f,
+  1.819050f, 1.817139f, 1.793577f, 1.789333f, 1.998251f, 0.327156f, 0.885748f,
+  0.853767f, 0.262043f, 0.902435f, 0.860033f, 1.224865f, 1.603411f, 1.589296f,
+};
+
+static const float av1_simple_motion_search_term_none_mean_32[28] = {
+  9.818970f, 9.751199f, 8.015079f, 7.927318f, 8.029113f, 7.938330f,  8.012570f,
+  7.923719f, 8.033508f, 7.941911f, 8.933057f, 8.857422f, 8.935639f,  8.859187f,
+  8.905495f, 8.829741f, 8.929428f, 8.851351f, 4.114069f, 0.954752f,  2.645082f,
+  2.709703f, 0.964678f, 2.652077f, 2.673393f, 9.430499f, 11.922798f, 16.942251f,
+};
+
+static const float av1_simple_motion_search_term_none_std_32[28] = {
+  1.737107f, 1.734327f, 1.727923f, 1.720244f, 1.721570f, 1.712775f, 1.718028f,
+  1.710370f, 1.711612f, 1.702596f, 1.754856f, 1.748855f, 1.741871f, 1.736304f,
+  1.722428f, 1.717380f, 1.713563f, 1.707582f, 1.761170f, 0.207847f, 0.900058f,
+  0.862356f, 0.184593f, 0.903822f, 0.856120f, 1.529199f, 1.412085f, 1.453153f,
+};
+
+static const float av1_simple_motion_search_term_none_mean_16[28] = {
+  8.998877f, 8.912468f, 7.085255f, 6.953476f, 7.086386f, 6.954091f,  7.088727f,
+  6.955747f, 7.093955f, 6.960635f, 8.065050f, 7.961432f, 8.071631f,  7.967233f,
+  8.041699f, 7.937715f, 8.046791f, 7.942183f, 3.833521f, 0.978421f,  1.901347f,
+  1.950124f, 0.979418f, 1.928000f, 1.936727f, 9.773951f, 10.735227f, 15.949769f,
+};
+
+static const float av1_simple_motion_search_term_none_std_16[28] = {
+  1.641193f, 1.640172f, 1.614794f, 1.608906f, 1.609571f, 1.603580f, 1.606928f,
+  1.601246f, 1.599230f, 1.593529f, 1.633747f, 1.630219f, 1.625695f, 1.622547f,
+  1.633827f, 1.630182f, 1.626607f, 1.622777f, 1.548838f, 0.145303f, 0.744550f,
+  0.736552f, 0.141980f, 0.742979f, 0.736977f, 1.366255f, 1.258794f, 1.294309f,
+};
+
+static const float av1_simple_motion_search_term_none_mean_8[28] = {
+  7.703941f, 7.561147f, 5.662838f, 5.390986f, 5.664201f, 5.389009f, 5.668180f,
+  5.392634f, 5.668345f, 5.390042f, 6.697586f, 6.509071f, 6.706533f, 6.515802f,
+  6.694291f, 6.506516f, 6.700662f, 6.511125f, 3.767833f, 0.989015f, 1.375188f,
+  1.416131f, 0.987493f, 1.387296f, 1.382117f, 9.499444f, 9.347361f, 14.847057f,
+};
+
+static const float av1_simple_motion_search_term_none_std_8[28] = {
+  1.540080f, 1.547904f, 1.541767f, 1.542958f, 1.541041f, 1.544120f, 1.529233f,
+  1.531208f, 1.529302f, 1.532606f, 1.553474f, 1.559303f, 1.537755f, 1.544269f,
+  1.541482f, 1.547625f, 1.536721f, 1.543942f, 1.377474f, 0.104230f, 0.705954f,
+  0.706923f, 0.111133f, 0.684572f, 0.683705f, 0.955327f, 1.182330f, 1.157021f,
+};
+
+static const float av1_simple_motion_search_term_none_model_128[] = {
+  -0.6106842357f, -1.0402954455f, 0.6054417656f,  -0.2116623578f,
+  0.2447714930f,  0.3782256209f,  0.5095592479f,  -0.3275620904f,
+  0.3886188013f,  0.2629499420f,  -0.1979599415f, -0.5389565605f,
+  0.1209207902f,  -0.4913347466f, 0.3798542731f,  -0.2812861709f,
+  -0.1049824167f, -0.1088672020f, 0.4059596517f,  -0.1347896613f,
+  0.2276868621f,  0.0506386970f,  0.0071088411f,  0.0467952100f,
+  0.2091247458f,  -0.7371964736f, 0.1368935545f,  0.3175247786f,
+  -0.5493146094f,
+};
+
+static const float av1_simple_motion_search_term_none_model_64[] = {
+  -0.4150046575f, -0.3954358561f, 0.1997997444f,  0.3395826831f,
+  0.2827215753f,  0.3395683652f,  0.2483140395f,  0.2722216476f,
+  0.2610308009f,  0.3724974359f,  -0.0551479654f, -0.1721616359f,
+  -0.3459358629f, -0.0952524186f, -0.1428993840f, -0.0415654914f,
+  -0.3169539902f, -0.0269429900f, 0.9891530919f,  -0.0125084982f,
+  0.0972182377f,  0.0008889801f,  0.0205418050f,  0.0057237854f,
+  0.1005222691f,  -0.2851321920f, -1.5150336445f, 0.1893942436f,
+  -0.4337360901f,
+};
+
+static const float av1_simple_motion_search_term_none_model_32[] = {
+  -0.4667392852f, -0.3893302767f, 0.1603498635f,  0.2304974726f,
+  0.1404975592f,  0.2505516225f,  0.1423053884f,  0.2189318406f,
+  0.1379765409f,  0.2638241296f,  -0.1342865463f, -0.0549054345f,
+  -0.1925223436f, -0.1142702769f, 0.0127811659f,  0.0868639997f,
+  -0.0643197251f, 0.0279496470f,  0.9904395769f,  -0.0095178685f,
+  0.1179410649f,  -0.0013411972f, 0.0095060660f,  0.0195730400f,
+  0.0779717771f,  -0.2498860763f, -0.8168817125f, -0.4798397348f,
+  -0.6609679881f,
+};
+
+static const float av1_simple_motion_search_term_none_model_16[] = {
+  -0.3021081992f, -0.4620153673f, 0.0448577479f,  0.1738455035f,
+  0.0663209177f,  0.1629614573f,  0.0555168744f,  0.1631870212f,
+  0.0425805150f,  0.1688564954f,  0.0434083772f,  -0.0046603915f,
+  -0.0271580056f, -0.0183879127f, 0.1073730471f,  0.0314201476f,
+  0.0576891756f,  0.0119723753f,  0.9084332022f,  -0.0188429077f,
+  0.0755089811f,  -0.0172550234f, 0.0037663075f,  0.0022094472f,
+  0.0500247894f,  -0.2944572004f, -0.8908521199f, -0.2555515792f,
+  -0.5396254205f,
+};
+
+static const float *av1_simple_motion_search_term_none_model_8 = NULL;
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 200452d..db0664b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -269,6 +269,7 @@
     sf->prune_single_motion_modes_by_simple_trans = 1;
 
     sf->simple_motion_search_split_only = 1;
+    sf->simple_motion_search_early_term_none = 1;
 
     sf->disable_wedge_search_var_thresh = 0;
     sf->disable_wedge_search_edge_thresh = 0;
@@ -765,6 +766,7 @@
   }
   sf->simple_motion_search_split_only = 0;
   sf->simple_motion_search_prune_rect = 0;
+  sf->simple_motion_search_early_term_none = 0;
 
   // Set this at the appropriate speed levels
   sf->use_transform_domain_distortion = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index b90ea15..0cd5288 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -643,6 +643,10 @@
   // want to split directly without trying other partition types.
   int simple_motion_search_split_only;
 
+  // Use features from simple_motion_search to terminate prediction block
+  // partition after PARTITION_NONE
+  int simple_motion_search_early_term_none;
+
   int cb_pred_filter_search;
 
   // adaptive interp_filter search to allow skip of certain filter types.