Incorporate flexible tx type and tx partition in RD scheme

This commit hooks up the rate-distortion optimization system to
fully exploit recursive transform block partition and multiple
transform type. The compression performance of the two experiments
largely adds up. For derf set, ext-tx provides additional 2.1%
coding gains on top of the gains due to recursive transform block
partition (0.69%).

Change-Id: I1091fb9545f74e489a6a2489dc3c12f5abd05043
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 942303b..dc15f99 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1650,8 +1650,6 @@
   *bsse += (int64_t)tmp_sse * 16;
 
   if (p->eobs[block] > 0) {
-    // TODO(jingning): integrate multiple transform type experiment
-    TX_TYPE tx_type = DCT_DCT;
     switch (tx_size) {
       case TX_32X32:
         vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block],
@@ -1878,10 +1876,6 @@
   *sse = 0;
   *skippable = 1;
 
-#if CONFIG_EXT_TX
-  xd->mi[0]->mbmi.tx_type = DCT_DCT;
-#endif
-
   if (is_cost_valid) {
     const struct macroblockd_plane *const pd = &xd->plane[0];
     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
@@ -1936,6 +1930,99 @@
   }
 }
 
+#if CONFIG_EXT_TX
+static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
+                               int *rate, int64_t *distortion, int *skippable,
+                               int64_t *sse, BLOCK_SIZE bsize,
+                               int64_t ref_best_rd) {
+  const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+  const VP10_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  int64_t rd = INT64_MAX;
+  int64_t best_rd = INT64_MAX;
+  TX_TYPE tx_type, best_tx_type = DCT_DCT;
+  int ext_tx_set;
+  const int is_inter = is_inter_block(mbmi);
+  vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+  int s0 = vp10_cost_bit(skip_prob, 0);
+  int s1 = vp10_cost_bit(skip_prob, 1);
+
+  *distortion = INT64_MAX;
+  *rate       = INT_MAX;
+  *skippable  = 0;
+  *sse        = INT64_MAX;
+
+  for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+    int this_rate = 0;
+    int this_skip = 1;
+    int64_t this_dist = 0;
+    int64_t this_sse  = 0;
+
+    ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+    if (is_inter) {
+      if (!ext_tx_used_inter[ext_tx_set][tx_type])
+        continue;
+    } else {
+      if (!ext_tx_used_intra[ext_tx_set][tx_type])
+        continue;
+    }
+
+    mbmi->tx_type = tx_type;
+
+    if (ext_tx_set == 1 &&
+        mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
+        best_tx_type == DCT_DCT) {
+      tx_type = IDTX - 1;
+      break;
+    }
+
+    inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
+                    bsize, ref_best_rd);
+
+    if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+        !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+        this_rate != INT_MAX) {
+      if (is_inter) {
+        if (ext_tx_set > 0)
+          this_rate += cpi->inter_tx_type_costs[ext_tx_set]
+                                       [mbmi->tx_size][mbmi->tx_type];
+      } else {
+        if (ext_tx_set > 0)
+          this_rate += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size]
+                                       [mbmi->mode][mbmi->tx_type];
+      }
+    }
+
+    if (this_rate == INT_MAX)
+      continue;
+
+    if (this_skip)
+      rd = RDCOST(x->rdmult, x->rddiv, s1, this_sse);
+    else
+      rd = RDCOST(x->rdmult, x->rddiv, this_rate + s0, this_dist);
+
+    if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !this_skip)
+      rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, this_sse));
+
+    if (rd <
+        (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
+        best_rd) {
+      best_rd = rd;
+      *distortion = this_dist;
+      *rate       = this_rate;
+      *skippable  = this_skip;
+      *sse        = this_sse;
+      best_tx_type = mbmi->tx_type;
+    }
+  }
+
+  mbmi->tx_type = best_tx_type;
+  inter_block_yrd(cpi, x, rate, distortion, skippable, sse,
+                  bsize, ref_best_rd);
+}
+#endif
+
 static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x,
                         int blk_row, int blk_col, int plane, int block,
                         TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
@@ -3704,8 +3791,13 @@
     vp10_subtract_plane(x, bsize, 0);
 #if CONFIG_VAR_TX
     if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+#if CONFIG_EXT_TX
+      select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+                         bsize, ref_best_rd);
+#else
       inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
                       bsize, ref_best_rd);
+#endif
     } else {
       super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
                       bsize, ref_best_rd);