Add support to recursive transform block coding

This commit re-designs the bitstream syntax to support recursive
transform block partition. The initial stage targets the inter
prediction residuals.

Change-Id: I556ab3c68c198387a2fd2d02e2b475e83cd417c3
diff --git a/configure b/configure
index 456f7c2..8bb20b7 100755
--- a/configure
+++ b/configure
@@ -264,6 +264,7 @@
     spatial_svc
     fp_mb_stats
     emulate_hardware
+    var_tx
     ext_tx
     misc_fixes
     ext_intra
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index ef7daaf..4d8b700 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -74,6 +74,11 @@
   BLOCK_SIZE sb_type;
   PREDICTION_MODE mode;
   TX_SIZE tx_size;
+#if CONFIG_VAR_TX
+  // TODO(jingning): This effectively assigned 64 entries for each 8x8 block.
+  // Apparently it takes much more space than needed.
+  TX_SIZE inter_tx_size[64];
+#endif
   int8_t skip;
 #if CONFIG_MISC_FIXES
   int8_t has_no_coeffs;
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 5404e8a..92ebbb6 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -64,6 +64,36 @@
   return vpx_read_tree(r, vp10_segment_tree, seg->tree_probs);
 }
 
+#if CONFIG_VAR_TX
+static void read_tx_size_inter(VP10_COMMON *cm, MB_MODE_INFO *mbmi,
+                               TX_SIZE tx_size, int mi_row, int mi_col,
+                               vpx_reader *r) {
+  int is_split = vpx_read_bit(r);
+
+  if (is_split) {
+    BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+    int bsl = mi_width_log2_lookup[bsize];
+    int i;
+    if (tx_size == TX_8X8) {
+      mbmi->tx_size = TX_4X4;
+      return;
+    }
+
+    assert(bsl > 0);
+    --bsl;
+    for (i = 0; i < 4; ++i) {
+      int offsetr = mi_row + ((i >> 1) << bsl);
+      int offsetc = mi_col + ((i & 0x01) << bsl);
+      if (offsetr >= cm->mi_rows || offsetc >= cm->mi_cols)
+        continue;
+      read_tx_size_inter(cm, mbmi, tx_size - 1, offsetr, offsetc, r);
+    }
+  } else {
+    mbmi->tx_size = tx_size;
+  }
+}
+#endif
+
 static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
                                      TX_SIZE max_tx_size, vpx_reader *r) {
   FRAME_COUNTS *counts = xd->counts;
@@ -600,13 +630,39 @@
   MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   int inter_block;
+#if CONFIG_VAR_TX
+  BLOCK_SIZE bsize = mbmi->sb_type;
+#endif
 
   mbmi->mv[0].as_int = 0;
   mbmi->mv[1].as_int = 0;
   mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
   mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
   inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+
+#if CONFIG_VAR_TX
+  if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+      !mbmi->skip && inter_block) {
+    const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+    const int txb_size = txsize_to_bsize[max_tx_size];
+    const int bs = num_8x8_blocks_wide_lookup[txb_size];
+    const int width  = num_8x8_blocks_wide_lookup[bsize];
+    const int height = num_8x8_blocks_high_lookup[bsize];
+    int idx, idy;
+    for (idy = 0; idy < height; idy += bs)
+      for (idx = 0; idx < width; idx += bs)
+        read_tx_size_inter(cm, mbmi, max_tx_size,
+                           mi_row + idy, mi_col + idx, r);
+    if (xd->counts) {
+      const int ctx = get_tx_size_context(xd);
+      ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size];
+    }
+  } else {
+    mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+  }
+#else
   mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
+#endif
 
   if (inter_block)
     read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index ccb289d2..e0db0c3 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -117,6 +117,35 @@
   return savings;
 }
 
+#if CONFIG_VAR_TX
+static void write_tx_size_inter(const VP10_COMMON *cm,
+                                const MB_MODE_INFO *mbmi,
+                                TX_SIZE tx_size, int mi_row, int mi_col,
+                                vpx_writer *w) {
+  if (tx_size == mbmi->tx_size) {
+    vpx_write_bit(w, 0);
+  } else {
+    const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+    int bsl = mi_width_log2_lookup[bsize];
+    int i;
+    vpx_write_bit(w, 1);
+
+    if (tx_size == TX_8X8)
+      return;
+
+    assert(bsl > 0);
+    --bsl;
+    for (i = 0; i < 4; ++i) {
+      int offsetr = mi_row + ((i >> 1) << bsl);
+      int offsetc = mi_col + ((i & 0x01) << bsl);
+      if (offsetr >= cm->mi_rows || offsetc >= cm->mi_cols)
+        continue;
+      write_tx_size_inter(cm, mbmi, tx_size - 1, offsetr, offsetc, w);
+    }
+  }
+}
+#endif
+
 static void write_selected_tx_size(const VP10_COMMON *cm,
                                    const MACROBLOCKD *xd, vpx_writer *w) {
   TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
@@ -316,6 +345,9 @@
 }
 
 static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
+#if CONFIG_VAR_TX
+                                int mi_row, int mi_col,
+#endif
                                 vpx_writer *w) {
   VP10_COMMON *const cm = &cpi->common;
   const nmv_context *nmvc = &cm->fc->nmvc;
@@ -351,7 +383,24 @@
 
   if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
       !(is_inter && skip)) {
+#if CONFIG_VAR_TX
+    if (is_inter) {  // This implies skip flag is 0.
+      const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+      const int txb_size = txsize_to_bsize[max_tx_size];
+      const int bs = num_8x8_blocks_wide_lookup[txb_size];
+      const int width  = num_8x8_blocks_wide_lookup[bsize];
+      const int height = num_8x8_blocks_high_lookup[bsize];
+      int idx, idy;
+      for (idy = 0; idy < height; idy += bs)
+        for (idx = 0; idx < width; idx += bs)
+          write_tx_size_inter(cm, mbmi, max_tx_size,
+                              mi_row + idy, mi_col + idx, w);
+    } else {
+      write_selected_tx_size(cm, xd, w);
+    }
+#else
     write_selected_tx_size(cm, xd, w);
+#endif
   }
 
   if (!is_inter) {
@@ -500,7 +549,11 @@
   if (frame_is_intra_only(cm)) {
     write_mb_modes_kf(cm, xd, xd->mi, w);
   } else {
+#if CONFIG_VAR_TX
+    pack_inter_mode_mvs(cpi, m, mi_row, mi_col, w);
+#else
     pack_inter_mode_mvs(cpi, m, w);
+#endif
   }
 
   assert(*tok < tok_end);
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index c8d7f41..0f823e9 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2577,6 +2577,7 @@
 static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
   if (xd->lossless)
     return ONLY_4X4;
+
   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
     return ALLOW_32X32;
   else if (cpi->sf.tx_size_search_method == USE_FULL_RD||