Extend ext_tx experiment to intra blocks

ext-tx on derflr +2.30% (was +1.84%)

Change-Id: Ic91565cacc38e7a8e1200d054ed7bf99295fe19e
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 8505725..f8c274e 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -164,22 +164,39 @@
 static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
   const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
                              vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
-  int i;
+  int i, j;
   int savings = 0;
   int do_update = 0;
   for (i = TX_4X4; i <= TX_16X16; ++i) {
     savings += prob_diff_update_savings(
-        vp10_ext_tx_tree, cm->fc->ext_tx_prob[i],
-        cm->counts.ext_tx[i], EXT_TX_TYPES);
+        vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+        cm->counts.inter_ext_tx[i], EXT_TX_TYPES);
   }
   do_update = savings > savings_thresh;
   vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
   if (do_update) {
     for (i = TX_4X4; i <= TX_16X16; ++i) {
-      prob_diff_update(vp10_ext_tx_tree, cm->fc->ext_tx_prob[i],
-                       cm->counts.ext_tx[i], EXT_TX_TYPES, w);
+      prob_diff_update(vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+                       cm->counts.inter_ext_tx[i], EXT_TX_TYPES, w);
     }
   }
+
+  savings = 0;
+  do_update = 0;
+
+  for (i = TX_4X4; i <= TX_16X16; ++i)
+    for (j = 0; j < INTRA_MODES; ++j)
+      savings += prob_diff_update_savings(
+          vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+          cm->counts.intra_ext_tx[i][j], EXT_TX_TYPES);
+  do_update = savings > savings_thresh;
+  vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+  if (do_update) {
+    for (i = TX_4X4; i <= TX_16X16; ++i)
+      for (j = 0; j < INTRA_MODES; ++j)
+        prob_diff_update(vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+                         cm->counts.intra_ext_tx[i][j], EXT_TX_TYPES, w);
+  }
 }
 #endif  // CONFIG_EXT_TX
 
@@ -337,18 +354,6 @@
     write_selected_tx_size(cm, xd, w);
   }
 
-#if CONFIG_EXT_TX
-  if (is_inter &&
-      mbmi->tx_size <= TX_16X16 &&
-      cm->base_qindex > 0 &&
-      bsize >= BLOCK_8X8 &&
-      !mbmi->skip &&
-      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-    vp10_write_token(w, vp10_ext_tx_tree, cm->fc->ext_tx_prob[mbmi->tx_size],
-                     &ext_tx_encodings[mbmi->ext_txfrm]);
-  }
-#endif  // CONFIG_EXT_TX
-
   if (!is_inter) {
     if (bsize >= BLOCK_8X8) {
       write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
@@ -412,6 +417,21 @@
       }
     }
   }
+
+#if CONFIG_EXT_TX
+  if (mbmi->tx_size <= TX_16X16 && cm->base_qindex > 0 &&
+      bsize >= BLOCK_8X8 && !mbmi->skip &&
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    if (is_inter)
+      vp10_write_token(w, vp10_ext_tx_tree,
+                       cm->fc->inter_ext_tx_prob[mbmi->tx_size],
+                       &ext_tx_encodings[mbmi->ext_txfrm]);
+    else
+      vp10_write_token(w, vp10_ext_tx_tree,
+                       cm->fc->intra_ext_tx_prob[mbmi->tx_size][mbmi->mode],
+                       &ext_tx_encodings[mbmi->ext_txfrm]);
+  }
+#endif  // CONFIG_EXT_TX
 }
 
 static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
@@ -448,6 +468,16 @@
   }
 
   write_intra_mode(w, mbmi->uv_mode, vp10_kf_uv_mode_prob[mbmi->mode]);
+
+#if CONFIG_EXT_TX
+  if (mbmi->tx_size <= TX_16X16 && cm->base_qindex > 0 &&
+      bsize >= BLOCK_8X8 && !mbmi->skip &&
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    vp10_write_token(w, vp10_ext_tx_tree,
+                     cm->fc->intra_ext_tx_prob[mbmi->tx_size][mbmi->mode],
+                     &ext_tx_encodings[mbmi->ext_txfrm]);
+  }
+#endif  // CONFIG_EXT_TX
 }
 
 static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index ceb9eb4..356baf5 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2989,13 +2989,13 @@
     ++td->counts->tx.tx_totals[mbmi->tx_size];
     ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
 #if CONFIG_EXT_TX
-    if (is_inter_block(mbmi) &&
-        mbmi->tx_size <= TX_16X16 &&
-        cm->base_qindex > 0 &&
-        bsize >= BLOCK_8X8 &&
-        !mbmi->skip &&
+    if (mbmi->tx_size <= TX_16X16 && cm->base_qindex > 0 &&
+        bsize >= BLOCK_8X8 && !mbmi->skip &&
         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
-      ++td->counts->ext_tx[mbmi->tx_size][mbmi->ext_txfrm];
+      if (is_inter_block(mbmi))
+        ++td->counts->inter_ext_tx[mbmi->tx_size][mbmi->ext_txfrm];
+      else
+        ++td->counts->intra_ext_tx[mbmi->tx_size][mbmi->mode][mbmi->ext_txfrm];
     }
 #endif  // CONFIG_EXT_TX
   }
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index e977910..0f8076d 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -459,7 +459,8 @@
   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
 #if CONFIG_EXT_TX
-  int ext_tx_costs[EXT_TX_SIZES][EXT_TX_TYPES];
+  int inter_ext_tx_costs[EXT_TX_SIZES][EXT_TX_TYPES];
+  int intra_ext_tx_costs[EXT_TX_SIZES][INTRA_MODES][EXT_TX_TYPES];
 #endif  // CONFIG_EXT_TX
 
   int multi_arf_allowed;
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 82dfd23..2b549af 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -85,9 +85,13 @@
     vp10_cost_tokens(cpi->switchable_interp_costs[i],
                     fc->switchable_interp_prob[i], vp10_switchable_interp_tree);
 #if CONFIG_EXT_TX
-  for (i = TX_4X4; i <= TX_16X16; ++i)
-    vp10_cost_tokens(cpi->ext_tx_costs[i], fc->ext_tx_prob[i],
+  for (i = TX_4X4; i <= TX_16X16; ++i) {
+    vp10_cost_tokens(cpi->inter_ext_tx_costs[i], fc->inter_ext_tx_prob[i],
                      vp10_ext_tx_tree);
+    for (j = 0; j < INTRA_MODES; ++j)
+      vp10_cost_tokens(cpi->intra_ext_tx_costs[i][j],
+                       fc->intra_ext_tx_prob[i][j], vp10_ext_tx_tree);
+  }
 #endif  // CONFIG_EXT_TX
 }
 
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 1f1405e..bec1103 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -603,6 +603,11 @@
 #if CONFIG_EXT_TX
   if (is_inter_block(mbmi) && bs >= BLOCK_8X8 && !xd->lossless) {
     for (tx_type = NORM; tx_type < EXT_TX_TYPES - 1; ++tx_type) {
+      if (mbmi->ext_txfrm >= ALT11 && mbmi->ext_txfrm < ALT16 &&
+          best_tx_type == NORM) {
+        tx_type = ALT16 - 1;
+        continue;
+      }
       if (tx_type >= GET_EXT_TX_TYPES(mbmi->tx_size))
         continue;
 
@@ -613,8 +618,13 @@
 
       if (r == INT_MAX)
         continue;
-      if (mbmi->tx_size <= TX_16X16)
-        r += cpi->ext_tx_costs[mbmi->tx_size][mbmi->ext_txfrm];
+      if (mbmi->tx_size <= TX_16X16) {
+        if (is_inter_block(mbmi))
+          r += cpi->inter_ext_tx_costs[mbmi->tx_size][mbmi->ext_txfrm];
+        else
+          r += cpi->intra_ext_tx_costs[mbmi->tx_size]
+                                       [mbmi->mode][mbmi->ext_txfrm];
+      }
 
       if (s)
         this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
@@ -638,10 +648,14 @@
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 
 #if CONFIG_EXT_TX
-  if (is_inter_block(mbmi) && bs >= BLOCK_8X8 &&
-      mbmi->tx_size <= TX_16X16 &&
-      !xd->lossless && *rate != INT_MAX)
-    *rate += cpi->ext_tx_costs[mbmi->tx_size][mbmi->ext_txfrm];
+  if (bs >= BLOCK_8X8 && mbmi->tx_size <= TX_16X16 &&
+      !xd->lossless && *rate != INT_MAX) {
+    if (is_inter_block(mbmi))
+      *rate += cpi->inter_ext_tx_costs[mbmi->tx_size][mbmi->ext_txfrm];
+    else
+      *rate += cpi->intra_ext_tx_costs[mbmi->tx_size]
+                                       [mbmi->mode][mbmi->ext_txfrm];
+  }
 #endif  // CONFIG_EXT_TX
 }
 
@@ -693,7 +707,7 @@
 
 #if CONFIG_EXT_TX
   start_tx_type = NORM;
-  if (is_inter_block(mbmi) && bs >= BLOCK_8X8 && !xd->lossless)
+  if (bs >= BLOCK_8X8 && !xd->lossless)
     end_tx_type = EXT_TX_TYPES - 1;
   else
     end_tx_type = NORM;
@@ -726,9 +740,12 @@
                        &sse, ref_best_rd, 0, bs, n,
                        cpi->sf.use_fast_coef_costing);
 #if CONFIG_EXT_TX
-      if (is_inter_block(mbmi) && bs >= BLOCK_8X8 &&
-          !xd->lossless && r != INT_MAX && n < TX_32X32)
-        r += cpi->ext_tx_costs[n][mbmi->ext_txfrm];
+      if (bs >= BLOCK_8X8 && !xd->lossless && r != INT_MAX && n < TX_32X32) {
+        if (is_inter_block(mbmi))
+          r += cpi->inter_ext_tx_costs[n][mbmi->ext_txfrm];
+        else
+          r += cpi->intra_ext_tx_costs[n][mbmi->mode][mbmi->ext_txfrm];
+      }
 #endif  // CONFIG_EXT_TX
 
       if (r == INT_MAX)
@@ -1136,6 +1153,9 @@
   int this_rate, this_rate_tokenonly, s;
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_TX
+  EXT_TX_TYPE best_tx_type = NORM;
+#endif  // CONFIG_EXT_TX
   int *bmode_costs;
   const MODE_INFO *above_mi = xd->above_mi;
   const MODE_INFO *left_mi = xd->left_mi;
@@ -1161,6 +1181,9 @@
       mode_selected   = mode;
       best_rd         = this_rd;
       best_tx         = mic->mbmi.tx_size;
+#if CONFIG_EXT_TX
+      best_tx_type    = mic->mbmi.ext_txfrm;
+#endif  // CONFIG_EXT_TX
       *rate           = this_rate;
       *rate_tokenonly = this_rate_tokenonly;
       *distortion     = this_distortion;
@@ -1170,6 +1193,9 @@
 
   mic->mbmi.mode = mode_selected;
   mic->mbmi.tx_size = best_tx;
+#if CONFIG_EXT_TX
+  mic->mbmi.ext_txfrm = best_tx_type;
+#endif  // CONFIG_EXT_TX
 
   return best_rd;
 }
@@ -3295,9 +3321,6 @@
     if (ref_frame == INTRA_FRAME) {
       TX_SIZE uv_tx;
       struct macroblockd_plane *const pd = &xd->plane[1];
-#if CONFIG_EXT_TX
-      mbmi->ext_txfrm = NORM;
-#endif  // CONFIG_EXT_TX
       memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
                       NULL, bsize, best_rd);
@@ -3322,9 +3345,6 @@
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
     } else {
-#if CONFIG_EXT_TX
-      mbmi->ext_txfrm = NORM;
-#endif
       this_rd = handle_inter_mode(cpi, x, bsize,
                                   &rate2, &distortion2, &skippable,
                                   &rate_y, &rate_uv,
@@ -3755,9 +3775,6 @@
   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
   int internal_active_edge =
     vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
-#if CONFIG_EXT_TX
-  mbmi->ext_txfrm = NORM;
-#endif
 
   memset(x->zcoeff_blk[TX_4X4], 0, 4);
   vp10_zero(best_mbmode);
@@ -3916,9 +3933,6 @@
 
     if (ref_frame == INTRA_FRAME) {
       int rate;
-#if CONFIG_EXT_TX
-      mbmi->ext_txfrm = NORM;
-#endif
       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
                                        &distortion_y, best_rd) >= best_rd)
         continue;