Add speed feature to reduce tx size search depth

The speed feature simply restricts the number of depths
searched. Currently it is turned on by default for speeds>=1.
The coding efficiency impact (tested on lowres 30 frames) seems
to be ~0.15% and the speedup is in the order of 15%.

Change-Id: I514832bd7df937292875f73d9c9026e49ac576f2
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 56c263a..d8f231e 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1536,13 +1536,10 @@
       const int bw = tx_size_wide_unit[max_tx_size];
       const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
       const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
-      int init_depth =
-          (height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
       int idx, idy;
       for (idy = 0; idy < height; idy += bh)
         for (idx = 0; idx < width; idx += bw)
-          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, init_depth, idy, idx,
-                              w);
+          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, 0, idy, idx, w);
 #if CONFIG_RECT_TX_EXT
       if (is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
           quarter_txsize_lookup[bsize] != max_tx_size &&
@@ -1778,13 +1775,10 @@
       const int bw = tx_size_wide_unit[max_tx_size];
       const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
       const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
-      int init_depth =
-          (height != width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
       int idx, idy;
       for (idy = 0; idy < height; idy += bh) {
         for (idx = 0; idx < width; idx += bw) {
-          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, init_depth, idy, idx,
-                              w);
+          write_tx_size_vartx(cm, xd, mbmi, max_tx_size, 0, idy, idx, w);
         }
       }
     } else {