Enable adaptive txb search Seach the transform block size and kernel type adaptively conditioned on the relative rate distortion cost target. When the rate distortion target is lower, the need for extensive transform block partition and type search is relatively lower, hence allowing to skip finer searches. Enable this speed feature in speed 1. Local tests on bus_cif and city_cif both at 1000 kbps at speed 1 showed that the encoder runs 40% - 50% faster. bus_cif 30 frames: 1494s -> 1053s city_cif 50 frames: 3210s -> 2328s The compression performance changes are 0.048% for lowres and 0.091% for midres. Change-Id: I9ac4b23d33488de0e4c3a089267aa08dab4ade5f

commit: b91a0e7aa03f9bd8196fc8a2a1160e974f3c118b [log] [tgz]
author: Jingning Han <jingning@google.com> Wed Feb 28 10:43:44 2018 -0800
committer: Jingning Han <jingning@google.com> Thu Mar 01 14:59:06 2018 +0000
tree: 5a4cb5facde515287c4c6294d820ae77031fc260
parent: 3fffb88b0b52b299cb75d09934914ab858d542b6 [diff]
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 42c6fcf..1415ff5 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -1869,7 +1869,7 @@
                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                                const ENTROPY_CONTEXT *a,
                                const ENTROPY_CONTEXT *l, int fast_tx_search,
-                               int use_fast_coef_costing,
+                               int use_fast_coef_costing, int64_t ref_best_rd,
                                RD_STATS *best_rd_stats) {
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -1968,6 +1968,9 @@
       best_eob = x->plane[plane].eobs[block];
     }
 
+    if (cpi->sf.adaptive_txb_search)
+      if ((best_rd - (best_rd >> 2)) > ref_best_rd) break;
+
     // Skip transform type search when we found the block has been quantized to
     // all zero and at the same time, it has better rdcost than doing transform.
     if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
@@ -2042,7 +2045,8 @@
   }
 
   search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  a, l, 0, args->use_fast_coef_costing, &this_rd_stats);
+                  a, l, 0, args->use_fast_coef_costing, INT64_MAX,
+                  &this_rd_stats);
 
 #if CONFIG_CFL
   if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
@@ -3467,7 +3471,8 @@
                        int blk_row, int blk_col, int plane, int block,
                        int plane_bsize, const ENTROPY_CONTEXT *a,
                        const ENTROPY_CONTEXT *l, RD_STATS *rd_stats,
-                       int fast_tx_search, TX_SIZE_RD_INFO *rd_info_array) {
+                       int fast_tx_search, int64_t ref_rdcost,
+                       TX_SIZE_RD_INFO *rd_info_array) {
   const struct macroblock_plane *const p = &x->plane[plane];
   TXB_CTX txb_ctx;
   get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
@@ -3494,7 +3499,7 @@
 
   RD_STATS this_rd_stats;
   search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                  a, l, fast_tx_search, 0, &this_rd_stats);
+                  a, l, fast_tx_search, 0, ref_rdcost, &this_rd_stats);
 
   av1_merge_rd_stats(rd_stats, &this_rd_stats);
 
@@ -3566,7 +3571,7 @@
     mbmi->inter_tx_size[index] = tx_size;
     av1_tx_block_rd_b(
         cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, pta, ptl,
-        rd_stats, fast_tx_search,
+        rd_stats, fast_tx_search, ref_best_rd,
         rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
     if (rd_stats->rate == INT_MAX) return;
 
@@ -3599,6 +3604,9 @@
     best_tx_type = mbmi->txk_type[txk_type_idx];
   }
 
+  if (cpi->sf.adaptive_txb_search)
+    if (this_rd > ref_best_rd) return;
+
   int tx_split_prune_flag = 0;
   if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE)
     tx_split_prune_flag = ((x->tx_search_prune[0] >> TX_TYPES) & 1);
@@ -3938,7 +3946,7 @@
     rd_stats->zero_rate = zero_blk_rate;
     rd_stats->ref_rdcost = ref_best_rd;
     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
-                      plane_bsize, ta, tl, rd_stats, fast, NULL);
+                      plane_bsize, ta, tl, rd_stats, fast, ref_best_rd, NULL);
     const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
     if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
             RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
@@ -4490,7 +4498,7 @@
     ENTROPY_CONTEXT *ta = above_ctx + blk_col;
     ENTROPY_CONTEXT *tl = left_ctx + blk_row;
     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
-                      plane_bsize, ta, tl, rd_stats, fast, NULL);
+                      plane_bsize, ta, tl, rd_stats, fast, INT64_MAX, NULL);
     av1_set_txb_context(x, plane, block, tx_size, ta, tl);
   } else {
     const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 7c3adb4..59eadeb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -153,6 +153,7 @@
     sf->use_hash_based_trellis = 1;
 #endif  // CONFIG_HASH_ME
     sf->tx_type_search.skip_tx_search = 1;
+    sf->adaptive_txb_search = 1;
   }
 
   if (speed >= 2) {
@@ -467,6 +468,7 @@
   sf->drop_ref = 0;
   sf->skip_intra_in_interframe = 1;
   sf->txb_split_cap = 1;
+  sf->adaptive_txb_search = 0;
   sf->two_pass_partition_search = 0;
 
   for (i = 0; i < TX_SIZES; i++) {

diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index c0d0cb7..b779cc2 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h

@@ -353,6 +353,10 @@
   // is selected as all zero coefficients.
   int txb_split_cap;
 
+  // Shortcut the transform block partition and type search when the target
+  // rdcost is relatively lower.
+  int adaptive_txb_search;
+
   // Used if partition_search_type = FIXED_SIZE_PARTITION
   BLOCK_SIZE always_this_block_size;
commit	b91a0e7aa03f9bd8196fc8a2a1160e974f3c118b	[log] [tgz]
author	Jingning Han <jingning@google.com>	Wed Feb 28 10:43:44 2018 -0800
committer	Jingning Han <jingning@google.com>	Thu Mar 01 14:59:06 2018 +0000
tree	5a4cb5facde515287c4c6294d820ae77031fc260
parent	3fffb88b0b52b299cb75d09934914ab858d542b6 [diff]