Merge branch 'master' into nextgenv2

Manually resovled the following conflicts:
	vp10/common/blockd.h
	vp10/common/entropy.h
	vp10/common/entropymode.c
	vp10/common/entropymode.h
	vp10/common/enums.h
	vp10/common/thread_common.c
	vp10/decoder/decodeframe.c
	vp10/decoder/decodemv.c
	vp10/encoder/bitstream.c
	vp10/encoder/encodeframe.c
	vp10/encoder/rd.c
	vp10/encoder/rdopt.c

Change-Id: I15d20ce5292b70f0c2b4ba55c1f1318181481596
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 66e29d4..dd5c2d1 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -128,9 +128,7 @@
   // Only for INTER blocks
   INTERP_FILTER interp_filter;
   MV_REFERENCE_FRAME ref_frame[2];
-#if CONFIG_EXT_TX
   TX_TYPE tx_type;
-#endif  // CONFIG_EXT_TX
 
 #if CONFIG_EXT_INTRA
   EXT_INTRA_MODE_INFO ext_intra_mode_info;
@@ -280,7 +278,7 @@
   return subsize_lookup[partition][bsize];
 }
 
-static const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = {
+static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
   DCT_DCT,    // DC
   ADST_DCT,   // V
   DCT_ADST,   // H
@@ -455,13 +453,14 @@
   if (is_inter_block(mbmi))  // Sub8x8-Inter
     return DCT_DCT;
   else  // Sub8x8 Intra OR UV-Intra
-    return intra_mode_to_tx_type_lookup[plane_type == PLANE_TYPE_Y ?
+    return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y ?
         get_y_mode(mi, block_idx) : mbmi->uv_mode];
 #else
+  (void) block_idx;
   if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
-       is_inter_block(mbmi) || tx_size >= TX_32X32)
+      tx_size >= TX_32X32)
     return DCT_DCT;
-  return intra_mode_to_tx_type_lookup[get_y_mode(mi, block_idx)];
+  return mbmi->tx_type;
 #endif  // CONFIG_EXT_TX
 }
 
diff --git a/vp10/common/entropy.h b/vp10/common/entropy.h
index c1de3b2..747d1ad 100644
--- a/vp10/common/entropy.h
+++ b/vp10/common/entropy.h
@@ -21,7 +21,7 @@
 extern "C" {
 #endif
 
-#define DIFF_UPDATE_PROB 252
+#define DIFF_UPDATE_PROB       252
 #define GROUP_DIFF_UPDATE_PROB 252
 
 // Coefficient token alphabet
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 0048c7c..1b4fd26 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -1192,6 +1192,28 @@
 static  const vpx_prob default_ext_intra_probs[2] = {230, 230};
 #endif  // CONFIG_EXT_INTRA
 
+#if !CONFIG_EXT_TX
+const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
+  -DCT_DCT, 2,
+  -ADST_ADST, 4,
+  -ADST_DCT, -DCT_ADST
+};
+
+static const vpx_prob default_intra_ext_tx_prob[EXT_TX_SIZES]
+                                               [TX_TYPES][TX_TYPES - 1] = {
+  {{240, 85, 128}, {4, 1, 248}, {4, 1, 8}, {4, 248, 128}},
+  {{244, 85, 128}, {8, 2, 248}, {8, 2, 8}, {8, 248, 128}},
+  {{248, 85, 128}, {16, 4, 248}, {16, 4, 8}, {16, 248, 128}},
+};
+
+static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES]
+                                               [TX_TYPES - 1] = {
+  {160, 85, 128},
+  {176, 85, 128},
+  {192, 85, 128},
+};
+#endif
+
 static void init_mode_probs(FRAME_CONTEXT *fc) {
   vp10_copy(fc->uv_mode_prob, default_uv_probs);
   vp10_copy(fc->y_mode_prob, default_if_y_probs);
@@ -1212,10 +1234,6 @@
   vp10_copy(fc->refmv_prob, default_refmv_prob);
 #endif
   vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
-#if CONFIG_EXT_TX
-  vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
-  vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
-#endif  // CONFIG_EXT_TX
 #if CONFIG_SUPERTX
   vp10_copy(fc->supertx_prob, default_supertx_prob);
 #endif  // CONFIG_SUPERTX
@@ -1224,6 +1242,8 @@
 #if CONFIG_EXT_INTRA
   vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
 #endif  // CONFIG_EXT_INTRA
+  vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
+  vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
 }
 
 #if CONFIG_EXT_INTERP && SWITCHABLE_FILTERS == 4
@@ -1355,6 +1375,21 @@
       }
     }
   }
+#else
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    int j;
+    for (j = 0; j < TX_TYPES; ++j)
+      vpx_tree_merge_probs(vp10_ext_tx_tree,
+                           pre_fc->intra_ext_tx_prob[i][j],
+                           counts->intra_ext_tx[i][j],
+                           fc->intra_ext_tx_prob[i][j]);
+  }
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    vpx_tree_merge_probs(vp10_ext_tx_tree,
+                         pre_fc->inter_ext_tx_prob[i],
+                         counts->inter_ext_tx[i],
+                         fc->inter_ext_tx_prob[i]);
+  }
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_SUPERTX
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 11ba12f..a1ad2c4 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -83,6 +83,9 @@
   vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
   vpx_prob intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                             [TX_TYPES - 1];
+#else
+  vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
+  vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
 #endif  // CONFIG_EXT_TX
 #if CONFIG_SUPERTX
   vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
@@ -124,6 +127,9 @@
   unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
   unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                            [TX_TYPES];
+#else
+  unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+  unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
 #endif  // CONFIG_EXT_TX
 #if CONFIG_SUPERTX
   unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
@@ -175,6 +181,9 @@
     vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)];
 extern const vpx_tree_index
     vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)];
+#else
+extern const vpx_tree_index
+    vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
 #endif  // CONFIG_EXT_TX
 
 static INLINE int vp10_ceil_log2(int n) {
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 44de0a2..f0d1ba2 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -112,9 +112,10 @@
   TX_TYPES,
 } TX_TYPE;
 
+#define EXT_TX_SIZES       3  // number of sizes that use extended transforms
+
 #if CONFIG_EXT_TX
 #define USE_DST2           1
-#define EXT_TX_SIZES       3  // number of sizes that use extended transforms
 #define EXT_TX_SETS_INTER  4  // Sets of transform selections for INTER
 #define EXT_TX_SETS_INTRA  3  // Sets of transform selections for INTRA
 #endif  // CONFIG_EXT_TX
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index 3098b36..a1f17e9 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -475,6 +475,17 @@
       }
     }
   }
+#else
+  for (i = 0; i < EXT_TX_SIZES; i++) {
+    int j;
+    for (j = 0; j < TX_TYPES; ++j)
+      for (k = 0; k < TX_TYPES; k++)
+        cm->counts.intra_ext_tx[i][j][k] += counts->intra_ext_tx[i][j][k];
+  }
+  for (i = 0; i < EXT_TX_SIZES; i++) {
+    for (k = 0; k < TX_TYPES; k++)
+      cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k];
+  }
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_SUPERTX
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 44b5bc4..1bb569d 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -270,7 +270,7 @@
     if (eob == 1) {
       dqcoeff[0] = 0;
     } else {
-      if (tx_size <= TX_16X16 && eob <= 10)
+      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
         memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
       else if (tx_size == TX_32X32 && eob <= 34)
         memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
@@ -3016,6 +3016,24 @@
     }
   }
 }
+#else
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
+  int i, j, k;
+  if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+    for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+      for (j = 0; j < TX_TYPES; ++j)
+        for (k = 0; k < TX_TYPES - 1; ++k)
+          vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]);
+    }
+  }
+  if (vpx_read(r, GROUP_DIFF_UPDATE_PROB)) {
+    for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+      for (k = 0; k < TX_TYPES - 1; ++k)
+        vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]);
+    }
+  }
+}
+
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_SUPERTX
@@ -3101,9 +3119,7 @@
         vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
 
     read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
-#if CONFIG_EXT_TX
     read_ext_tx_probs(fc, &r);
-#endif
 #if CONFIG_SUPERTX
     if (!xd->lossless[0])
       read_supertx_probs(fc, &r);
@@ -3148,12 +3164,13 @@
   assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
   assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
   assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
-
 #if CONFIG_EXT_TX
   assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
                  sizeof(cm->counts.inter_ext_tx)));
   assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
                  sizeof(cm->counts.intra_ext_tx)));
+#else
+  assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
 #endif  // CONFIG_EXT_TX
 }
 #endif  // NDEBUG
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index f4386e4..ec93453 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -492,6 +492,20 @@
     } else {
       mbmi->tx_type = DCT_DCT;
     }
+#else
+  if (mbmi->tx_size < TX_32X32 &&
+      cm->base_qindex > 0 && !mbmi->skip &&
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    FRAME_COUNTS *counts = xd->counts;
+    TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+    mbmi->tx_type = vpx_read_tree(
+        r, vp10_ext_tx_tree,
+        cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+    if (counts)
+      ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+  } else {
+    mbmi->tx_type = DCT_DCT;
+  }
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_EXT_INTRA
@@ -1047,6 +1061,28 @@
   } else {
     mbmi->tx_type = DCT_DCT;
   }
+#else
+  if (mbmi->tx_size < TX_32X32 &&
+      cm->base_qindex > 0 && !mbmi->skip &&
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    FRAME_COUNTS *counts = xd->counts;
+    if (inter_block) {
+      mbmi->tx_type = vpx_read_tree(
+          r, vp10_ext_tx_tree,
+          cm->fc->inter_ext_tx_prob[mbmi->tx_size]);
+      if (counts)
+        ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+    } else {
+      const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+      mbmi->tx_type = vpx_read_tree(
+          r, vp10_ext_tx_tree,
+          cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+      if (counts)
+        ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+    }
+  } else {
+    mbmi->tx_type = DCT_DCT;
+  }
 #endif  // CONFIG_EXT_TX
 }
 
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 4c0c6af..344f63d 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -82,6 +82,8 @@
 #if CONFIG_EXT_TX
 static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES];
 static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
+#else
+static struct vp10_token ext_tx_encodings[TX_TYPES];
 #endif  // CONFIG_EXT_TX
 
 void vp10_encode_token_init() {
@@ -93,6 +95,8 @@
   for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
     vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]);
   }
+#else
+  vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree);
 #endif  // CONFIG_EXT_TX
 }
 
@@ -306,6 +310,7 @@
                      counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
 }
 
+
 #if CONFIG_EXT_TX
 static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
   const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
@@ -358,6 +363,49 @@
     }
   }
 }
+#else
+static void update_ext_tx_probs(VP10_COMMON *cm, vpx_writer *w) {
+  const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+                             vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+  int i, j;
+
+  int savings = 0;
+  int do_update = 0;
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    for (j = 0; j < TX_TYPES; ++j)
+      savings += prob_diff_update_savings(
+          vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+          cm->counts.intra_ext_tx[i][j], TX_TYPES);
+  }
+  do_update = savings > savings_thresh;
+  vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+  if (do_update) {
+    for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+      for (j = 0; j < TX_TYPES; ++j)
+        prob_diff_update(vp10_ext_tx_tree,
+                         cm->fc->intra_ext_tx_prob[i][j],
+                         cm->counts.intra_ext_tx[i][j],
+                         TX_TYPES, w);
+    }
+  }
+  savings = 0;
+  do_update = 0;
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    savings += prob_diff_update_savings(
+        vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+        cm->counts.inter_ext_tx[i], TX_TYPES);
+  }
+  do_update = savings > savings_thresh;
+  vpx_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+  if (do_update) {
+    for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+      prob_diff_update(vp10_ext_tx_tree,
+                       cm->fc->inter_ext_tx_prob[i],
+                       cm->counts.inter_ext_tx[i],
+                       TX_TYPES, w);
+    }
+  }
+}
 #endif  // CONFIG_EXT_TX
 
 static void pack_palette_tokens(vpx_writer *w, TOKENEXTRA **tp,
@@ -855,6 +903,29 @@
             &ext_tx_intra_encodings[eset][mbmi->tx_type]);
     }
   }
+#else
+  if (mbmi->tx_size < TX_32X32 &&
+      cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+      !supertx_enabled &&
+#endif  // CONFIG_SUPERTX
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    if (is_inter) {
+      vp10_write_token(
+          w, vp10_ext_tx_tree,
+          cm->fc->inter_ext_tx_prob[mbmi->tx_size],
+          &ext_tx_encodings[mbmi->tx_type]);
+    } else {
+      vp10_write_token(
+          w, vp10_ext_tx_tree,
+          cm->fc->intra_ext_tx_prob[mbmi->tx_size]
+                                   [intra_mode_to_tx_type_context[mbmi->mode]],
+          &ext_tx_encodings[mbmi->tx_type]);
+    }
+  } else {
+    if (!mbmi->skip)
+      assert(mbmi->tx_type == DCT_DCT);
+  }
 #endif  // CONFIG_EXT_TX
 }
 
@@ -954,6 +1025,16 @@
           cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
           &ext_tx_intra_encodings[eset][mbmi->tx_type]);
   }
+#else
+  if (mbmi->tx_size < TX_32X32 &&
+      cm->base_qindex > 0 && !mbmi->skip &&
+      !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+    vp10_write_token(
+        w, vp10_ext_tx_tree,
+        cm->fc->intra_ext_tx_prob[mbmi->tx_size]
+                                 [intra_mode_to_tx_type_context[mbmi->mode]],
+        &ext_tx_encodings[mbmi->tx_type]);
+  }
 #endif  // CONFIG_EXT_TX
 
 #if CONFIG_EXT_INTRA
diff --git a/vp10/encoder/bitstream.h b/vp10/encoder/bitstream.h
index 0284920..9df03da 100644
--- a/vp10/encoder/bitstream.h
+++ b/vp10/encoder/bitstream.h
@@ -18,6 +18,7 @@
 
 #include "vp10/encoder/encoder.h"
 
+void vp10_encode_token_init();
 void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
 
 void vp10_encode_token_init();
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index e516a5f..c1301f9 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -4381,6 +4381,18 @@
         }
       }
     }
+#else
+    if (mbmi->tx_size < TX_32X32 &&
+        cm->base_qindex > 0 && !mbmi->skip &&
+        !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+      if (is_inter_block(mbmi)) {
+        ++td->counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+      } else {
+        ++td->counts->intra_ext_tx[mbmi->tx_size]
+                                  [intra_mode_to_tx_type_context[mbmi->mode]]
+                                  [mbmi->tx_type];
+      }
+    }
 #endif  // CONFIG_EXT_TX
 #if CONFIG_EXT_INTRA
     if (bsize >= BLOCK_8X8 && !is_inter_block(mbmi)) {
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 6d0fd19..a90d0c9 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -2732,7 +2732,7 @@
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
   struct loopfilter *lf = &cm->lf;
   if (is_lossless_requested(&cpi->oxcf)) {
-      lf->filter_level = 0;
+    lf->filter_level = 0;
   } else {
     struct vpx_usec_timer timer;
 
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 149e21a..707255d 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -486,16 +486,18 @@
                                                  [PALETTE_COLORS];
   int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
                                                   [PALETTE_COLORS];
-#if CONFIG_EXT_TX
-  int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
-  int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
-                                                          [TX_TYPES];
-#endif  // CONFIG_EXT_TX
 
   int multi_arf_allowed;
   int multi_arf_enabled;
   int multi_arf_last_grp_enabled;
-
+#if CONFIG_EXT_TX
+  int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+  int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+                                                          [TX_TYPES];
+#else
+  int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+  int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
+#endif  // CONFIG_EXT_TX
 #if CONFIG_VP9_TEMPORAL_DENOISING
   VP9_DENOISER denoiser;
 #endif
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index a1fd00d..5dcfa55 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -124,6 +124,18 @@
       }
     }
   }
+#else
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    for (j = 0; j < TX_TYPES; ++j)
+      vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
+                       fc->intra_ext_tx_prob[i][j],
+                       vp10_ext_tx_tree);
+  }
+  for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+    vp10_cost_tokens(cpi->inter_tx_type_costs[i],
+                     fc->inter_ext_tx_prob[i],
+                     vp10_ext_tx_tree);
+  }
 #endif  // CONFIG_EXT_TX
 }
 
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index d457199..ca978ba 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -83,8 +83,11 @@
 
 #if CONFIG_EXT_TX
 const double ext_tx_th = 0.98;
+#else
+const double ext_tx_th = 0.99;
 #endif
 
+
 typedef struct {
   PREDICTION_MODE mode;
   MV_REFERENCE_FRAME ref_frame[2];
@@ -790,16 +793,16 @@
   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-#if CONFIG_EXT_TX
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   int r, s;
   int64_t d, psse, this_rd, best_rd = INT64_MAX;
   vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
   int  s0 = vp10_cost_bit(skip_prob, 0);
   int  s1 = vp10_cost_bit(skip_prob, 1);
+#if CONFIG_EXT_TX
   int ext_tx_set;
-  const int is_inter = is_inter_block(mbmi);
 #endif  // CONFIG_EXT_TX
+  const int is_inter = is_inter_block(mbmi);
 
   mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
 
@@ -815,7 +818,7 @@
           continue;
       } else {
         if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
-          if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode])
+          if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
             continue;
         }
         if (!ext_tx_used_intra[ext_tx_set][tx_type])
@@ -866,8 +869,41 @@
     }
   }
 
-  mbmi->tx_type = best_tx_type;
+#else  // CONFIG_EXT_TX
+  if (mbmi->tx_size < TX_32X32 &&
+      !xd->lossless[mbmi->segment_id]) {
+    for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+      mbmi->tx_type = tx_type;
+      txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+                       cpi,
+#endif
+                       &r, &d, &s,
+                       &psse, ref_best_rd, 0, bs, mbmi->tx_size,
+                       cpi->sf.use_fast_coef_costing);
+      if (r == INT_MAX)
+        continue;
+      if (is_inter)
+        r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+      else
+        r += cpi->intra_tx_type_costs[mbmi->tx_size]
+                                     [intra_mode_to_tx_type_context[mbmi->mode]]
+                                     [mbmi->tx_type];
+      if (s)
+        this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+      else
+        this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+      if (is_inter && !xd->lossless[mbmi->segment_id] && !s)
+        this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+
+      if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
+        best_rd = this_rd;
+        best_tx_type = mbmi->tx_type;
+      }
+    }
+  }
 #endif  // CONFIG_EXT_TX
+  mbmi->tx_type = best_tx_type;
 
   txfm_rd_in_plane(x,
 #if CONFIG_VAR_TX
@@ -892,6 +928,16 @@
                                                  [mbmi->mode][mbmi->tx_type];
     }
   }
+#else
+  if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] &&
+      *rate != INT_MAX) {
+    if (is_inter)
+      *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+    else
+      *rate += cpi->intra_tx_type_costs[mbmi->tx_size]
+          [intra_mode_to_tx_type_context[mbmi->mode]]
+          [mbmi->tx_type];
+  }
 #endif  // CONFIG_EXT_TX
 }
 
@@ -935,11 +981,11 @@
   TX_SIZE best_tx = max_tx_size;
   int start_tx, end_tx;
   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
-#if CONFIG_EXT_TX
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
+  const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
   int ext_tx_set;
 #endif  // CONFIG_EXT_TX
-  const int is_inter = is_inter_block(mbmi);
 
   const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
   assert(skip_prob > 0);
@@ -961,9 +1007,7 @@
   *skip       = 0;
   *psse       = INT64_MAX;
 
-#if CONFIG_EXT_TX
   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
-#endif  // CONFIG_EXT_TX
     last_rd = INT64_MAX;
     for (n = start_tx; n >= end_tx; --n) {
       int r_tx_size = 0;
@@ -981,7 +1025,7 @@
           continue;
       } else {
         if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
-          if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode])
+          if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
             continue;
         }
         if (!ext_tx_used_intra[ext_tx_set][tx_type])
@@ -1015,6 +1059,10 @@
         }
       }
 #else  // CONFIG_EXT_TX
+      if (n >= TX_32X32 && tx_type != DCT_DCT) {
+        continue;
+      }
+      mbmi->tx_type = tx_type;
       txfm_rd_in_plane(x,
 #if CONFIG_VAR_TX
                        cpi,
@@ -1022,6 +1070,16 @@
                        &r, &d, &s,
                        &sse, ref_best_rd, 0, bs, n,
                        cpi->sf.use_fast_coef_costing);
+      if (n < TX_32X32 &&
+          !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+          r != INT_MAX) {
+        if (is_inter)
+          r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+        else
+          r += cpi->intra_tx_type_costs[mbmi->tx_size]
+              [intra_mode_to_tx_type_context[mbmi->mode]]
+              [mbmi->tx_type];
+      }
 #endif  // CONFIG_EXT_TX
 
       if (r == INT_MAX)
@@ -1046,19 +1104,13 @@
       // Early termination in transform size search.
       if (cpi->sf.tx_size_search_breakout &&
           (rd == INT64_MAX ||
-#if CONFIG_EXT_TX
            (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
-#else
-           (s == 1 && n < start_tx) ||
-#endif
            (n < (int) max_tx_size && rd > last_rd)))
         break;
 
       last_rd = rd;
       if (rd <
-#if CONFIG_EXT_TX
           (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) *
-#endif  // CONFIG_EXT_TX
           best_rd) {
         best_tx = n;
         best_rd = rd;
@@ -1066,17 +1118,12 @@
         *rate       = r;
         *skip       = s;
         *psse       = sse;
-#if CONFIG_EXT_TX
         best_tx_type = mbmi->tx_type;
-#endif  // CONFIG_EXT_TX
       }
     }
-#if CONFIG_EXT_TX
   }
-#endif  // CONFIG_EXT_TX
 
   mbmi->tx_size = best_tx;
-#if CONFIG_EXT_TX
   mbmi->tx_type = best_tx_type;
   txfm_rd_in_plane(x,
 #if CONFIG_VAR_TX
@@ -1085,7 +1132,6 @@
                    &r, &d, &s,
                    &sse, ref_best_rd, 0, bs, best_tx,
                    cpi->sf.use_fast_coef_costing);
-#endif  // CONFIG_EXT_TX
 }
 
 static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -1938,9 +1984,7 @@
   const uint8_t *src = x->plane[0].src.buf;
   double hist[DIRECTIONAL_MODES];
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_EXT_TX
   TX_TYPE best_tx_type = DCT_DCT;
-#endif  // CONFIG_EXT_TX
   int *bmode_costs;
   PALETTE_MODE_INFO palette_mode_info;
   uint8_t *best_palette_color_map = cpi->common.allow_screen_content_tools ?
@@ -2043,9 +2087,7 @@
 #if CONFIG_EXT_INTRA
       best_angle_delta = mic->mbmi.angle_delta[0];
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_EXT_TX
       best_tx_type    = mic->mbmi.tx_type;
-#endif  // CONFIG_EXT_TX
       *rate           = this_rate;
       *rate_tokenonly = this_rate_tokenonly;
       *distortion     = this_distortion;
@@ -2066,9 +2108,7 @@
       mode_selected       = mic->mbmi.mode;
       best_tx             = mic->mbmi.tx_size;
       ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
-#if CONFIG_EXT_TX
       best_tx_type        = mic->mbmi.tx_type;
-#endif  // CONFIG_EXT_TX
     }
   }
 
@@ -2085,9 +2125,7 @@
 #if CONFIG_EXT_INTRA
   mic->mbmi.angle_delta[0] = best_angle_delta;
 #endif  // CONFIG_EXT_INTRA
-#if CONFIG_EXT_TX
   mic->mbmi.tx_type = best_tx_type;
-#endif  // CONFIG_EXT_TX
   mic->mbmi.palette_mode_info.palette_size[0] =
       palette_mode_info.palette_size[0];
   if (palette_mode_info.palette_size[0] > 0) {
@@ -2508,7 +2546,7 @@
         continue;
     } else {
       if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
-        if (tx_type != intra_mode_to_tx_type_lookup[mbmi->mode])
+        if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
           continue;
       }
       if (!ext_tx_used_intra[ext_tx_set][tx_type])
diff --git a/vp10/encoder/subexp.h b/vp10/encoder/subexp.h
index bad23d6..64eb275 100644
--- a/vp10/encoder/subexp.h
+++ b/vp10/encoder/subexp.h
@@ -38,7 +38,6 @@
                                               int stepsize);
 int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
                                        const unsigned int ct[2]);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif