Use a smaller subset of intra modes in tpl speed 5

The quality change is negligible, but improves speed by up to 3% when
bitrate is low.

STATS_CHANGED

Change-Id: I46ce8963f155493f0eb4a721ed318e9c01063aef
diff --git a/av1/common/enums.h b/av1/common/enums.h
index d5a9151..a91c45e 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -411,6 +411,8 @@
   MB_MODE_COUNT,
   INTRA_MODE_START = DC_PRED,
   INTRA_MODE_END = NEARESTMV,
+  DIR_MODE_START = V_PRED,
+  DIR_MODE_END = D67_PRED + 1,
   INTRA_MODE_NUM = INTRA_MODE_END - INTRA_MODE_START,
   SINGLE_INTER_MODE_START = NEARESTMV,
   SINGLE_INTER_MODE_END = NEAREST_NEARESTMV,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index b5858ec..b1c5eae 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -493,6 +493,7 @@
     sf->disable_interinter_wedge = 1;
     sf->disable_smooth_interintra = 1;
     sf->disable_onesided_comp = 1;
+    sf->tpl_sf.prune_intra_modes = 1;
   }
 }
 
@@ -922,6 +923,7 @@
   sf->skip_repeated_newmv = 0;
   // TODO(any) Cleanup this speed feature
   sf->prune_single_motion_modes_by_simple_trans = 0;
+  sf->tpl_sf.prune_intra_modes = 0;
 
   // Set decoder side speed feature to use less dual sgr modes
   sf->dual_sgr_penalty_level = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 5043382..bbf358f 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -247,6 +247,13 @@
   SUBPEL_FORCE_STOP subpel_force_stop;
 } MV_SPEED_FEATURES;
 
+typedef struct TPL_SPEED_FEATURES {
+  // Prune the intra modes search by tpl. If set to 0, we will search all intra
+  // modes from DC_PRED to PAETH_PRED. If set to one, we only search DC_PRED and
+  // the direction modes
+  int prune_intra_modes;
+} TPL_SPEED_FEATURES;
+
 #define MAX_MESH_STEP 4
 
 typedef struct MESH_PATTERN {
@@ -320,6 +327,9 @@
   // adds overhead.
   int static_segmentation;
 
+  // Speed features related to how tpl's searches are done.
+  TPL_SPEED_FEATURES tpl_sf;
+
   /*
    * Global motion speed features:
    */
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 92cca91..3d5e909 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -187,7 +187,6 @@
 
   int64_t best_intra_cost = INT64_MAX;
   int64_t intra_cost;
-  PREDICTION_MODE mode;
   PREDICTION_MODE best_mode = DC_PRED;
 
   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
@@ -232,7 +231,10 @@
 #endif
   }
 
-  for (mode = DC_PRED; mode <= PAETH_PRED; ++mode) {
+  const PREDICTION_MODE last_intra_mode =
+      cpi->sf.tpl_sf.prune_intra_modes ? DIR_MODE_END : INTRA_MODE_END;
+  for (PREDICTION_MODE mode = INTRA_MODE_START; mode < last_intra_mode;
+       ++mode) {
     uint8_t *src;
     uint8_t *dst;
     int dst_stride;