Reduced intra tx set

With this speed feature, two least freqently selected
transform types for each intra mode are pruned in transform
type search.

BD rate:
           lowres   midres
speed 0    0.008%   0.021%
speed 1   -0.012%   0.036%
speed 2    0.017%   0.031%

Encoding time reduction:
speed 0    4.00%
speed 1    5.25%
speed 2    7.17%

Instruction count reduction: 4%

STATS_CHANGED

Change-Id: I1be41ea54c8bf1d255db2e4b6f9659a9729cf5d0
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 0ca7c19..028a919 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -669,6 +669,22 @@
   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
 };
 
+static const uint16_t av1_reduced_intra_tx_used_flag[INTRA_MODES] = {
+  0x080F,  // DC_PRED:       0000 1000 0000 1111
+  0x040F,  // V_PRED:        0000 0100 0000 1111
+  0x080F,  // H_PRED:        0000 1000 0000 1111
+  0x020F,  // D45_PRED:      0000 0010 0000 1111
+  0x080F,  // D135_PRED:     0000 1000 0000 1111
+  0x040F,  // D113_PRED:     0000 0100 0000 1111
+  0x080F,  // D157_PRED:     0000 1000 0000 1111
+  0x080F,  // D203_PRED:     0000 1000 0000 1111
+  0x040F,  // D67_PRED:      0000 0100 0000 1111
+  0x080F,  // SMOOTH_PRED:   0000 1000 0000 1111
+  0x040F,  // SMOOTH_V_PRED: 0000 0100 0000 1111
+  0x080F,  // SMOOTH_H_PRED: 0000 1000 0000 1111
+  0x0C0E,  // PAETH_PRED:    0000 1100 0000 1110
+};
+
 static const uint16_t av1_ext_tx_used_flag[EXT_TX_SET_TYPES] = {
   0x0001,  // 0000 0000 0000 0001
   0x0201,  // 0000 0010 0000 0001
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e3f4099..05e06e8 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3051,7 +3051,15 @@
         av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size,
                         cm->reduced_tx_set_used);
   }
-  const uint16_t ext_tx_used_flag = av1_ext_tx_used_flag[tx_set_type];
+  PREDICTION_MODE intra_dir =
+      mbmi->filter_intra_mode_info.use_filter_intra
+          ? fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]
+          : mbmi->mode;
+  const uint16_t ext_tx_used_flag =
+      cpi->sf.tx_type_search.use_reduced_intra_txset &&
+              tx_set_type == EXT_TX_SET_DTT4_IDTX_1DDCT
+          ? av1_reduced_intra_tx_used_flag[intra_dir]
+          : av1_ext_tx_used_flag[tx_set_type];
   if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32 ||
       ext_tx_used_flag == 0x0001 ||
       (is_inter && cpi->oxcf.use_inter_dct_only) ||
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4c59fd5..6d72084 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -223,6 +223,7 @@
   sf->use_fast_interpolation_filter_search = 1;
   sf->intra_tx_size_search_init_depth_sqr = 1;
   sf->intra_angle_estimation = 1;
+  sf->tx_type_search.use_reduced_intra_txset = 1;
   sf->selective_ref_frame = 1;
   sf->prune_wedge_pred_diff_based = 1;
   sf->disable_wedge_search_var_thresh = 0;
@@ -432,6 +433,7 @@
   sf->use_fast_interpolation_filter_search = 1;
   sf->intra_tx_size_search_init_depth_sqr = 1;
   sf->intra_angle_estimation = 1;
+  sf->tx_type_search.use_reduced_intra_txset = 1;
   sf->selective_ref_frame = 1;
   sf->prune_wedge_pred_diff_based = 1;
   sf->disable_wedge_search_var_thresh = 0;
@@ -677,6 +679,7 @@
   sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE;
   sf->tx_type_search.ml_tx_split_thresh = 30;
   sf->tx_type_search.use_skip_flag_prediction = 1;
+  sf->tx_type_search.use_reduced_intra_txset = 0;
   sf->tx_type_search.fast_intra_tx_type_search = 0;
   sf->tx_type_search.fast_inter_tx_type_search = 0;
   sf->tx_type_search.skip_tx_search = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 4d02d56..d787f42 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -169,6 +169,9 @@
   int fast_intra_tx_type_search;
   int fast_inter_tx_type_search;
 
+  // prune two least frequently chosen transforms for each intra mode
+  int use_reduced_intra_txset;
+
   // Use a skip flag prediction model to detect blocks with skip = 1 early
   // and avoid doing full TX type search for such blocks.
   int use_skip_flag_prediction;