Conditionally skip transform block partition search

Speed up recursive transform block partition search. When a txfm
block is selected as all zero coefficients, skip the search over
further split partition.

Tested with txk-sel on, this makes the speed 0 / 1 both 10 - 15%
faster at medium - high target bit-rate range. The coding
performance change is neutral - 0.011% better for lowres set.

Change-Id: I1247f3d5a33d15bf4bc5f0bcbac2bf1f3e1aca2e
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 52de3af..ac2bd16 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3947,6 +3947,10 @@
   int tx_split_prune_flag = 0;
   if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE)
     tx_split_prune_flag = ((x->tx_search_prune[0] >> TX_TYPES) & 1);
+
+  if (cpi->sf.txb_split_cap)
+    if (p->eobs[block] == 0) tx_split_prune_flag = 1;
+
   if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
     const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
     const int bsw = tx_size_wide_unit[sub_txs];
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index da52789..54e31d7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -494,6 +494,7 @@
   sf->fast_wedge_sign_estimate = 0;
   sf->drop_ref = 0;
   sf->skip_intra_in_interframe = 1;
+  sf->txb_split_cap = 1;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 6a3ce90..be4eca8 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -349,6 +349,10 @@
 
   TX_TYPE_SEARCH tx_type_search;
 
+  // Skip split transform block partition when the collocated bigger block
+  // is selected as all zero coefficients.
+  int txb_split_cap;
+
   // Used if partition_search_type = FIXED_SIZE_PARTITION
   BLOCK_SIZE always_this_block_size;