Skip txfm search

Skip transform type search.

Without txk_sel:
Skip remaining transform type search when all transform blocks inside
the coding block have eob = 0.

With txk_sel:
For each transform block, whenever eob = 0, we skip remaining
transform type search.

Speed impact:
On low bitrate, 25% speed up.
On high bitrate, 15-20% speed up.

Performance impact: Google test lowres, 30 frames
With txk_sel: 0.15% drop
Without txk_sel: 0.30% drop

Change-Id: I5e8db730a19feec22e378611046b1ce1ab001c85
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b0aa4e3..40ab03d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1884,6 +1884,10 @@
       best_txb_ctx = x->plane[plane].txb_entropy_ctx[block];
       best_eob = x->plane[plane].eobs[block];
     }
+
+    // Skip transform type search when we found the block has been quantized to
+    // all zero and at the same time, it has better rdcost than doing transform.
+    if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
   }
 
   av1_merge_rd_stats(rd_stats, &best_rd_stats);
@@ -2575,6 +2579,13 @@
       const int is_inter = is_inter_block(mbmi);
       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
 #endif  // !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
+
+#if !CONFIG_TXK_SEL
+      // stop searching other tx types if skip has better rdcost than transform
+      // all tx blocks.
+      if (cpi->sf.tx_type_search.skip_tx_search && !is_inter && rd_stats->skip)
+        break;
+#endif
     }
     if (n == TX_4X4) break;
   }
@@ -4812,7 +4823,7 @@
   int64_t best_rd = INT64_MAX;
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   const int is_inter = is_inter_block(mbmi);
-  TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
+  TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
   TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
   TX_SIZE best_min_tx_size = TX_SIZES_ALL;
   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
@@ -4941,6 +4952,13 @@
         for (idx = 0; idx < xd->n8_w; ++idx)
           best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
     }
+
+#if !CONFIG_TXK_SEL
+    // stop searching other tx types if skip has better rdcost than DCT for
+    // all tx blocks.
+    if (cpi->sf.tx_type_search.skip_tx_search && is_inter && this_rd_stats.skip)
+      break;
+#endif
   }
 
   // We should always find at least one candidate unless ref_best_rd is less
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ea0e5b8..2d19300 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -157,6 +157,7 @@
     // TODO(mfo): Activate feature once it gives positive results.
     sf->use_hash_based_trellis = 1;
 #endif  // CONFIG_HASH_ME && CONFIG_LV_MAP
+    sf->tx_type_search.skip_tx_search = 1;
   }
 
   if (speed >= 2) {
@@ -471,6 +472,7 @@
   sf->tx_type_search.use_skip_flag_prediction = 1;
   sf->tx_type_search.fast_intra_tx_type_search = 0;
   sf->tx_type_search.fast_inter_tx_type_search = 0;
+  sf->tx_type_search.skip_tx_search = 0;
   sf->selective_ref_frame = 0;
   sf->less_rectangular_check = 0;
   sf->use_square_partition_only = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7ad8c3c..dc7d08f 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -213,6 +213,10 @@
   // RD search (currently works only when prune_mode equals to PRUNE_2D_ACCURATE
   // or PRUNE_2D_FAST).
   int use_tx_size_pruning;
+
+  // skip remaining transform type search when we found the rdcost of skip is
+  // better than applying transform
+  int skip_tx_search;
 } TX_TYPE_SEARCH;
 
 typedef enum {