Comprehensive support for symmetric DST

Creates new hybrid transforms combining symmetric DST with
ADST and DCT. Thus a total of 16 transforms are supported.

derfl: +1.659% (up about 0.2%)

Change-Id: Idde1cecdb59527890bf05da740099c3f6a5b9764
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 70983ca..597cf12 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -239,6 +239,13 @@
   DCT_ADST,
   FLIPADST_DCT,
   DCT_FLIPADST,
+  DST_DST,
+  DST_DCT,
+  DCT_DST,
+  DST_ADST,
+  ADST_DST,
+  DST_FLIPADST,
+  FLIPADST_DST,
 };
 #endif  // CONFIG_EXT_TX
 
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index b9d5af9..c638423 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -317,19 +317,26 @@
 #if CONFIG_EXT_TX
 const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = {
   -NORM, 2,
-  4, 10,
-  6, 8,
-  -ALT1, -ALT2,
-  -ALT3, -ALT4,
+  -ALT9, 4,
+  6, 16,
+  8, 10,
+  -ALT10, -ALT11,
   12, 14,
-  -ALT5, -ALT6,
-  -ALT7, -ALT8,
+  -ALT1, -ALT2,
+  -ALT4, -ALT5,
+  18, 24,
+  20, 22,
+  -ALT12, -ALT13,
+  -ALT14, -ALT15,
+  26, 28,
+  -ALT3, -ALT6,
+  -ALT7, -ALT8
 };
 
 static const vpx_prob default_ext_tx_prob[EXT_TX_SIZES][EXT_TX_TYPES - 1] = {
-  { 240, 128, 128, 128, 128, 128, 128, 128 },
-  { 208, 128, 128, 128, 128, 128, 128, 128 },
-  { 176, 128, 128, 128, 128, 128, 128, 128 },
+  { 216, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+  { 192, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+  { 168, 20, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
 };
 #endif  // CONFIG_EXT_TX
 
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index a63b493..a4a5440 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -100,6 +100,13 @@
   FLIPADST_FLIPADST = 6,
   ADST_FLIPADST = 7,
   FLIPADST_ADST = 8,
+  DST_DST = 9,
+  DST_DCT = 10,
+  DCT_DST = 11,
+  DST_ADST = 12,
+  ADST_DST = 13,
+  DST_FLIPADST = 14,
+  FLIPADST_DST = 15,
 #endif  // CONFIG_EXT_TX
   TX_TYPES,
 } TX_TYPE;
@@ -116,6 +123,13 @@
   ALT6 = 6,
   ALT7 = 7,
   ALT8 = 8,
+  ALT9 = 9,
+  ALT10 = 10,
+  ALT11 = 11,
+  ALT12 = 12,
+  ALT13 = 13,
+  ALT14 = 14,
+  ALT15 = 15,
   EXT_TX_TYPES
 } EXT_TX_TYPE;
 #endif  // CONFIG_EXT_TX
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index 6533690..9e1f870 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -17,45 +17,170 @@
 #include "vpx_dsp/inv_txfm.h"
 #include "vpx_ports/mem.h"
 
-void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                         int tx_type) {
-  const transform_2d IHT_4[] = {
-    { idct4_c, idct4_c  },  // DCT_DCT  = 0
-    { iadst4_c, idct4_c  },   // ADST_DCT = 1
-    { idct4_c, iadst4_c },    // DCT_ADST = 2
-    { iadst4_c, iadst4_c }      // ADST_ADST = 3
+#if CONFIG_EXT_TX
+void idst4_c(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 4;
+  static const int sinvalue_lookup_table[] = {
+    9630, 15582
   };
-
+  static const int mult = 14654;  // sqrt(4/5)
   int i, j;
-  tran_low_t out[4 * 4];
-  tran_low_t *outptr = out;
-  tran_low_t temp_in[4], temp_out[4];
-
-  // inverse transform row vectors
-  for (i = 0; i < 4; ++i) {
-    IHT_4[tx_type].rows(input, outptr);
-    input  += 4;
-    outptr += 4;
-  }
-
-  // inverse transform column vectors
-  for (i = 0; i < 4; ++i) {
-    for (j = 0; j < 4; ++j)
-      temp_in[j] = out[j * 4 + i];
-    IHT_4[tx_type].cols(temp_in, temp_out);
-    for (j = 0; j < 4; ++j) {
-      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
-                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
     }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, 8);
   }
 }
 
-static const transform_2d IHT_8[] = {
-  { idct8_c,  idct8_c  },  // DCT_DCT  = 0
-  { iadst8_c, idct8_c  },  // ADST_DCT = 1
-  { idct8_c,  iadst8_c },  // DCT_ADST = 2
-  { iadst8_c, iadst8_c }   // ADST_ADST = 3
-};
+void idst8_c(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 8;
+  static const int sinvalue_lookup_table[] = {
+    5604, 10531, 14189, 16135
+  };
+  static const int mult = 15447;  // 2*sqrt(2/9)
+  int i, j;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, 8);
+  }
+}
+
+void idst16_c(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 16;
+  static const int sinvalue_lookup_table[] = {
+    3011,  5919,  8625, 11038,
+    13075, 14666, 15759, 16314
+  };
+  static const int mult = 15895;  // 2*sqrt(4/17)
+  int i, j;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, 8);
+  }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  static const int N = 4;
+  static const int sinvalue_lookup_table[] = {
+    9630, 15582
+  };
+  static const int mult = 14654;  // sqrt(4/5)
+  int i, j;
+  (void) bd;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, bd);
+  }
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  static const int N = 8;
+  static const int sinvalue_lookup_table[] = {
+    5604, 10531, 14189, 16135
+  };
+  static const int mult = 15447;  // 2*sqrt(2/9)
+  int i, j;
+  (void) bd;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, bd);
+  }
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+  static const int N = 16;
+  static const int sinvalue_lookup_table[] = {
+    3011,  5919,  8625, 11038,
+    13075, 14666, 15759, 16314
+  };
+  static const int mult = 15895;  // 2*sqrt(4/17)
+  int i, j;
+  (void) bd;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    sum = (sum * mult) >> (2 * DCT_CONST_BITS);
+    output[i] = WRAPLOW(sum, bd);
+  }
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_EXT_TX
 
 #if CONFIG_EXT_TX
 void fliplr(uint8_t *dest, int stride, int l) {
@@ -125,8 +250,76 @@
 }
 #endif  // CONFIG_EXT_TX
 
+void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+                         int tx_type) {
+  const transform_2d IHT_4[] = {
+    { idct4_c, idct4_c  },   // DCT_DCT  = 0
+    { iadst4_c, idct4_c  },  // ADST_DCT = 1
+    { idct4_c, iadst4_c },   // DCT_ADST = 2
+    { iadst4_c, iadst4_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { iadst4_c, idct4_c },   // FLIPADST_DCT = 4
+    { idct4_c,  iadst4_c },  // DCT_FLIPADST = 5
+    { iadst4_c, iadst4_c },  // FLIPADST_FLIPADST = 6
+    { iadst4_c, iadst4_c },  // ADST_FLIPADST = 7
+    { iadst4_c, iadst4_c },  // FLIPADST_ADST = 8
+    { idst4_c,  idst4_c },   // DST_DST = 9
+    { idst4_c,  idct4_c  },  // DST_DCT = 10
+    { idct4_c,  idst4_c  },  // DCT_DST = 11
+    { idst4_c,  iadst4_c },  // DST_ADST = 12
+    { iadst4_c, idst4_c  },  // ADST_DST = 13
+    { idst4_c,  iadst4_c },  // DST_FLIPADST = 14
+    { iadst4_c, idst4_c  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
+  };
+
+  int i, j;
+  tran_low_t out[4 * 4];
+  tran_low_t *outptr = out;
+  tran_low_t temp_in[4], temp_out[4];
+
+  // inverse transform row vectors
+  for (i = 0; i < 4; ++i) {
+    IHT_4[tx_type].rows(input, outptr);
+    input  += 4;
+    outptr += 4;
+  }
+
+  // inverse transform column vectors
+  for (i = 0; i < 4; ++i) {
+    for (j = 0; j < 4; ++j)
+      temp_in[j] = out[j * 4 + i];
+    IHT_4[tx_type].cols(temp_in, temp_out);
+    for (j = 0; j < 4; ++j) {
+      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
+    }
+  }
+}
+
 void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          int tx_type) {
+  static const transform_2d IHT_8[] = {
+    { idct8_c,  idct8_c  },  // DCT_DCT  = 0
+    { iadst8_c, idct8_c  },  // ADST_DCT = 1
+    { idct8_c,  iadst8_c },  // DCT_ADST = 2
+    { iadst8_c, iadst8_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { iadst8_c, idct8_c },   // FLIPADST_DCT = 4
+    { idct8_c,  iadst8_c },  // DCT_FLIPADST = 5
+    { iadst8_c, iadst8_c },  // FLIPADST_FLIPADST = 6
+    { iadst8_c, iadst8_c },  // ADST_FLIPADST = 7
+    { iadst8_c, iadst8_c },  // FLIPADST_ADST = 8
+    { idst8_c,  idst8_c },   // DST_DST = 9
+    { idst8_c,  idct8_c  },  // DST_DCT = 10
+    { idct8_c,  idst8_c  },  // DCT_DST = 11
+    { idst8_c,  iadst8_c },  // DST_ADST = 12
+    { iadst8_c, idst8_c  },  // ADST_DST = 13
+    { idst8_c,  iadst8_c },  // DST_FLIPADST = 14
+    { iadst8_c, idst8_c  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
+  };
+
   int i, j;
   tran_low_t out[8 * 8];
   tran_low_t *outptr = out;
@@ -152,15 +345,29 @@
   }
 }
 
-static const transform_2d IHT_16[] = {
-  { idct16_c,  idct16_c  },  // DCT_DCT  = 0
-  { iadst16_c, idct16_c  },  // ADST_DCT = 1
-  { idct16_c,  iadst16_c },  // DCT_ADST = 2
-  { iadst16_c, iadst16_c }   // ADST_ADST = 3
-};
-
 void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                             int tx_type) {
+  static const transform_2d IHT_16[] = {
+    { idct16_c,  idct16_c  },  // DCT_DCT  = 0
+    { iadst16_c, idct16_c  },  // ADST_DCT = 1
+    { idct16_c,  iadst16_c },  // DCT_ADST = 2
+    { iadst16_c, iadst16_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { iadst16_c, idct16_c  },  // FLIPADST_DCT = 4
+    { idct16_c,  iadst16_c },  // DCT_FLIPADST = 5
+    { iadst16_c, iadst16_c },  // FLIPADST_FLIPADST = 6
+    { iadst16_c, iadst16_c },  // ADST_FLIPADST = 7
+    { iadst16_c, iadst16_c },  // FLIPADST_ADST = 8
+    { idst16_c,  idst16_c  },  // DST_DST = 9
+    { idst16_c,  idct16_c  },  // DST_DCT = 10
+    { idct16_c,  idst16_c  },  // DCT_DST = 11
+    { idst16_c,  iadst16_c },  // DST_ADST = 12
+    { iadst16_c, idst16_c  },  // ADST_DST = 13
+    { idst16_c,  iadst16_c },  // DST_FLIPADST = 14
+    { iadst16_c, idst16_c  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
+  };
+
   int i, j;
   tran_low_t out[16 * 16];
   tran_low_t *outptr = out;
@@ -286,6 +493,24 @@
       vp10_iht4x4_16_add(input, dest, stride, ADST_ADST);
       flipud(dest, stride, 4);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+      break;
+    case FLIPADST_DST:
+      flipud(dest, stride, 4);
+      vp10_iht4x4_16_add_c(input, dest, stride, ADST_DST);
+      flipud(dest, stride, 4);
+      break;
+    case DST_FLIPADST:
+      fliplr(dest, stride, 4);
+      vp10_iht4x4_16_add_c(input, dest, stride, DST_ADST);
+      fliplr(dest, stride, 4);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -330,6 +555,24 @@
       vp10_iht8x8_64_add(input, dest, stride, ADST_ADST);
       flipud(dest, stride, 8);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+      break;
+    case FLIPADST_DST:
+      flipud(dest, stride, 8);
+      vp10_iht8x8_64_add_c(input, dest, stride, ADST_DST);
+      flipud(dest, stride, 8);
+      break;
+    case DST_FLIPADST:
+      fliplr(dest, stride, 8);
+      vp10_iht8x8_64_add_c(input, dest, stride, DST_ADST);
+      fliplr(dest, stride, 8);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -374,6 +617,24 @@
       vp10_iht16x16_256_add(input, dest, stride, ADST_ADST);
       flipud(dest, stride, 16);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+      break;
+    case FLIPADST_DST:
+      flipud(dest, stride, 16);
+      vp10_iht16x16_256_add_c(input, dest, stride, ADST_DST);
+      flipud(dest, stride, 16);
+      break;
+    case DST_FLIPADST:
+      fliplr(dest, stride, 16);
+      vp10_iht16x16_256_add_c(input, dest, stride, DST_ADST);
+      fliplr(dest, stride, 16);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -402,10 +663,24 @@
 void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
                                 int stride, int tx_type, int bd) {
   const highbd_transform_2d IHT_4[] = {
-    { vpx_highbd_idct4_c, vpx_highbd_idct4_c  },    // DCT_DCT  = 0
-    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },    // ADST_DCT = 1
-    { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },    // DCT_ADST = 2
-    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }    // ADST_ADST = 3
+    { vpx_highbd_idct4_c,  vpx_highbd_idct4_c  },  // DCT_DCT  = 0
+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c  },  // ADST_DCT = 1
+    { vpx_highbd_idct4_c,  vpx_highbd_iadst4_c },  // DCT_ADST = 2
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst4_c, vpx_highbd_idct4_c  },  // FLIPADST_DCT = 4
+    { vpx_highbd_idct4_c,  vpx_highbd_iadst4_c },  // DCT_FLIPADST = 5
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // FLIPADST_FLIPADST = 6
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // ADST_FLIPADST = 7
+    { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c },  // FLIPADST_ADST = 8
+    { highbd_idst4_c,      highbd_idst4_c      },  // DST_DST = 9
+    { highbd_idst4_c,      vpx_highbd_idct4_c  },  // DST_DCT = 10
+    { vpx_highbd_idct4_c,  highbd_idst4_c      },  // DCT_DST = 11
+    { highbd_idst4_c,      vpx_highbd_iadst4_c },  // DST_ADST = 12
+    { vpx_highbd_iadst4_c, highbd_idst4_c      },  // ADST_DST = 13
+    { highbd_idst4_c,      vpx_highbd_iadst4_c },  // DST_FLIPADST = 14
+    { vpx_highbd_iadst4_c, highbd_idst4_c      },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
   };
   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
 
@@ -433,15 +708,29 @@
   }
 }
 
-static const highbd_transform_2d HIGH_IHT_8[] = {
-  { vpx_highbd_idct8_c,  vpx_highbd_idct8_c  },  // DCT_DCT  = 0
-  { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // ADST_DCT = 1
-  { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_ADST = 2
-  { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }   // ADST_ADST = 3
-};
-
 void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
                                 int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_8[] = {
+    { vpx_highbd_idct8_c,  vpx_highbd_idct8_c  },  // DCT_DCT  = 0
+    { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // ADST_DCT = 1
+    { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_ADST = 2
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // FLIPADST_DCT = 4
+    { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_FLIPADST = 5
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // FLIPADST_FLIPADST = 6
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // ADST_FLIPADST = 7
+    { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c },  // FLIPADST_ADST = 8
+    { highbd_idst8_c,      highbd_idst8_c      },  // DST_DST = 9
+    { highbd_idst8_c,      vpx_highbd_idct8_c  },  // DST_DCT = 10
+    { vpx_highbd_idct8_c,  highbd_idst8_c      },  // DCT_DST = 11
+    { highbd_idst8_c,      vpx_highbd_iadst8_c },  // DST_ADST = 12
+    { vpx_highbd_iadst8_c, highbd_idst8_c      },  // ADST_DST = 13
+    { highbd_idst8_c,      vpx_highbd_iadst8_c },  // DST_FLIPADST = 14
+    { vpx_highbd_iadst8_c, highbd_idst8_c      },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
+  };
+
   int i, j;
   tran_low_t out[8 * 8];
   tran_low_t *outptr = out;
@@ -468,15 +757,29 @@
   }
 }
 
-static const highbd_transform_2d HIGH_IHT_16[] = {
-  { vpx_highbd_idct16_c,  vpx_highbd_idct16_c  },  // DCT_DCT  = 0
-  { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // ADST_DCT = 1
-  { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_ADST = 2
-  { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }   // ADST_ADST = 3
-};
-
 void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
                                    int stride, int tx_type, int bd) {
+  static const highbd_transform_2d HIGH_IHT_16[] = {
+    { vpx_highbd_idct16_c,  vpx_highbd_idct16_c  },  // DCT_DCT  = 0
+    { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // ADST_DCT = 1
+    { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_ADST = 2
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+    { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // FLIPADST_DCT = 4
+    { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_FLIPADST = 5
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // FLIPADST_FLIPADST = 6
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // ADST_FLIPADST = 7
+    { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c },  // FLIPADST_ADST = 8
+    { highbd_idst16_c,      highbd_idst16_c      },  // DST_DST = 9
+    { highbd_idst16_c,      vpx_highbd_idct16_c  },  // DST_DCT = 10
+    { vpx_highbd_idct16_c,  highbd_idst16_c      },  // DCT_DST = 11
+    { highbd_idst16_c,      vpx_highbd_iadst16_c },  // DST_ADST = 12
+    { vpx_highbd_iadst16_c, highbd_idst16_c      },  // ADST_DST = 13
+    { highbd_idst16_c,      vpx_highbd_iadst16_c },  // DST_FLIPADST = 14
+    { vpx_highbd_iadst16_c, highbd_idst16_c      },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
+  };
+
   int i, j;
   tran_low_t out[16 * 16];
   tran_low_t *outptr = out;
@@ -606,6 +909,24 @@
       vp10_highbd_iht4x4_16_add(input, dest, stride, ADST_ADST, bd);
       flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case FLIPADST_DST:
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+      vp10_highbd_iht4x4_16_add_c(input, dest, stride, ADST_DST, bd);
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+      break;
+    case DST_FLIPADST:
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+      vp10_highbd_iht4x4_16_add_c(input, dest, stride, DST_ADST, bd);
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -651,6 +972,24 @@
       vp10_highbd_iht8x8_64_add(input, dest, stride, ADST_ADST, bd);
       flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case FLIPADST_DST:
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+      vp10_highbd_iht8x8_64_add_c(input, dest, stride, ADST_DST, bd);
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+      break;
+    case DST_FLIPADST:
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+      vp10_highbd_iht8x8_64_add_c(input, dest, stride, DST_ADST, bd);
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 8);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -696,6 +1035,24 @@
       vp10_highbd_iht16x16_256_add(input, dest, stride, ADST_ADST, bd);
       flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
       break;
+    case DST_DST:
+    case DST_DCT:
+    case DCT_DST:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST only exists in C code
+      vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+      break;
+    case FLIPADST_DST:
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+      vp10_highbd_iht16x16_256_add_c(input, dest, stride, ADST_DST, bd);
+      flipud16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+      break;
+    case DST_FLIPADST:
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+      vp10_highbd_iht16x16_256_add_c(input, dest, stride, DST_ADST, bd);
+      fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 16);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
diff --git a/vp10/common/scan.c b/vp10/common/scan.c
index 4cb78e8..57095d9 100644
--- a/vp10/common/scan.c
+++ b/vp10/common/scan.c
@@ -696,7 +696,6 @@
 };
 
 #if CONFIG_EXT_TX
-
 const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
   {  // TX_4X4
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
@@ -708,6 +707,13 @@
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
   }, {  // TX_8X8
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {row_scan_8x8,     vp10_row_iscan_8x8,     row_scan_8x8_neighbors},
@@ -718,6 +724,13 @@
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
   }, {  // TX_16X16
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {row_scan_16x16,     vp10_row_iscan_16x16,     row_scan_16x16_neighbors},
@@ -728,6 +741,13 @@
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
   }, {  // TX_32X32
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
@@ -738,6 +758,13 @@
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
   }
 };
 
@@ -752,6 +779,13 @@
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
     {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
+    {default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors},
   }, {  // TX_8X8
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
@@ -762,6 +796,13 @@
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
     {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
+    {default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors},
   }, {  // TX_16X16
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
@@ -772,6 +813,13 @@
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
     {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
+    {default_scan_16x16, vp10_default_iscan_16x16, default_scan_16x16_neighbors},
   }, {  // TX_32X32
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
@@ -782,6 +830,13 @@
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
+    {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
   }
 };
 
@@ -810,5 +865,4 @@
     {default_scan_32x32, vp10_default_iscan_32x32, default_scan_32x32_neighbors},
   }
 };
-
 #endif  // CONFIG_EXT_TX
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 288d5d0..cb1ede2 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -20,6 +20,84 @@
 #include "vpx_dsp/fwd_txfm.h"
 #include "vpx_ports/mem.h"
 
+#if CONFIG_EXT_TX
+void fdst4(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 4;
+  static const int sinvalue_lookup_table[] = {
+    9630, 15582
+  };
+  static const int mult = 14654;  // sqrt(4/5)
+  int i, j;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+  }
+}
+
+void fdst8(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 8;
+  static const int sinvalue_lookup_table[] = {
+    5604, 10531, 14189, 16135
+  };
+  static const int mult = 15447;  // 2*sqrt(2/9)
+  int i, j;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+  }
+}
+
+void fdst16(const tran_low_t *input, tran_low_t *output) {
+  static const int N = 16;
+  static const int sinvalue_lookup_table[] = {
+    3011,  5919,  8625, 11038,
+    13075, 14666, 15759, 16314
+  };
+  static const int mult = 15895;  // 2*sqrt(4/17)
+  int i, j;
+  for (i = 0; i < N; i++) {
+    int64_t sum = 0;
+    for (j = 0; j < N; j++) {
+      int idx = (i + 1) * (j + 1);
+      int sign = 0;
+      if (idx > N + 1) {
+        sign = (idx / (N + 1)) & 1;
+        idx %= (N + 1);
+      }
+      idx = idx > N + 1 - idx ? N + 1 - idx : idx;
+      if (idx == 0) continue;
+      idx--;
+      sum += (int64_t)input[j] * sinvalue_lookup_table[idx] * (sign ? -1 : 1);
+    }
+    output[i] = (sum * mult) >> (2 * DCT_CONST_BITS);
+  }
+}
+#endif  // CONFIG_EXT_TX
+
 static void fdct4(const tran_low_t *input, tran_low_t *output) {
   tran_high_t step[4];
   tran_high_t temp1, temp2;
@@ -510,25 +588,67 @@
   { fdct4,  fdct4  },  // DCT_DCT  = 0
   { fadst4, fdct4  },  // ADST_DCT = 1
   { fdct4,  fadst4 },  // DCT_ADST = 2
-  { fadst4, fadst4 }   // ADST_ADST = 3
+  { fadst4, fadst4 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+  { fadst4, fdct4  },  // FLIPADST_DCT = 4
+  { fdct4,  fadst4 },  // DCT_FLIPADST = 5
+  { fadst4, fadst4 },  // FLIPADST_FLIPADST = 6
+  { fadst4, fadst4 },  // ADST_FLIPADST = 7
+  { fadst4, fadst4 },  // FLIPADST_ADST = 8
+  { fdst4,  fdst4  },  // DST_DST = 9
+  { fdst4,  fdct4  },  // DST_DCT = 10
+  { fdct4,  fdst4  },  // DCT_DST = 11
+  { fdst4,  fadst4 },  // DST_ADST = 12
+  { fadst4, fdst4  },  // ADST_DST = 13
+  { fdst4,  fadst4 },  // DST_FLIPADST = 14
+  { fadst4, fdst4  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
 };
 
 static const transform_2d FHT_8[] = {
   { fdct8,  fdct8  },  // DCT_DCT  = 0
   { fadst8, fdct8  },  // ADST_DCT = 1
   { fdct8,  fadst8 },  // DCT_ADST = 2
-  { fadst8, fadst8 }   // ADST_ADST = 3
+  { fadst8, fadst8 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+  { fadst8, fdct8  },  // FLIPADST_DCT = 4
+  { fdct8,  fadst8 },  // DCT_FLIPADST = 5
+  { fadst8, fadst8 },  // FLIPADST_FLIPADST = 6
+  { fadst8, fadst8 },  // ADST_FLIPADST = 7
+  { fadst8, fadst8 },  // FLIPADST_ADST = 8
+  { fdst8,  fdst8  },  // DST_DST = 9
+  { fdst8,  fdct8  },  // DST_DCT = 10
+  { fdct8,  fdst8  },  // DCT_DST = 11
+  { fdst8,  fadst8 },  // DST_ADST = 12
+  { fadst8, fdst8  },  // ADST_DST = 13
+  { fdst8,  fadst8 },  // DST_FLIPADST = 14
+  { fadst8, fdst8  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
 };
 
 static const transform_2d FHT_16[] = {
   { fdct16,  fdct16  },  // DCT_DCT  = 0
   { fadst16, fdct16  },  // ADST_DCT = 1
   { fdct16,  fadst16 },  // DCT_ADST = 2
-  { fadst16, fadst16 }   // ADST_ADST = 3
+  { fadst16, fadst16 },  // ADST_ADST = 3
+#if CONFIG_EXT_TX
+  { fadst16, fdct16  },  // FLIPADST_DCT = 4
+  { fdct16,  fadst16 },  // DCT_FLIPADST = 5
+  { fadst16, fadst16 },  // FLIPADST_FLIPADST = 6
+  { fadst16, fadst16 },  // ADST_FLIPADST = 7
+  { fadst16, fadst16 },  // FLIPADST_ADST = 8
+  { fdst16,  fdst16  },  // DST_DST = 9
+  { fdst16,  fdct16  },  // DST_DCT = 10
+  { fdct16,  fdst16  },  // DCT_DST = 11
+  { fdst16,  fadst16 },  // DST_ADST = 12
+  { fadst16, fdst16  },  // ADST_DST = 13
+  { fdst16,  fadst16 },  // DST_FLIPADST = 14
+  { fadst16, fdst16  },  // FLIPADST_DST = 15
+#endif  // CONFIG_EXT_TX
 };
 
 void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
-                  int stride, int tx_type) {
+                   int stride, int tx_type) {
   if (tx_type == DCT_DCT) {
     vpx_fdct4x4_c(input, output, stride);
   } else {
@@ -560,15 +680,15 @@
 }
 
 void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
-                         tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                         int skip_block,
-                         const int16_t *zbin_ptr, const int16_t *round_ptr,
-                         const int16_t *quant_ptr,
-                         const int16_t *quant_shift_ptr,
-                         tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-                         const int16_t *dequant_ptr,
-                         uint16_t *eob_ptr,
-                         const int16_t *scan, const int16_t *iscan) {
+                          tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                          int skip_block,
+                          const int16_t *zbin_ptr, const int16_t *round_ptr,
+                          const int16_t *quant_ptr,
+                          const int16_t *quant_shift_ptr,
+                          tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                          const int16_t *dequant_ptr,
+                          uint16_t *eob_ptr,
+                          const int16_t *scan, const int16_t *iscan) {
   int eob = -1;
 
   int i, j;
@@ -672,7 +792,7 @@
 }
 
 void vp10_fht8x8_c(const int16_t *input, tran_low_t *output,
-                  int stride, int tx_type) {
+                   int stride, int tx_type) {
   if (tx_type == DCT_DCT) {
     vpx_fdct8x8_c(input, output, stride);
   } else {
@@ -758,7 +878,7 @@
 }
 
 void vp10_fht16x16_c(const int16_t *input, tran_low_t *output,
-                    int stride, int tx_type) {
+                     int stride, int tx_type) {
   if (tx_type == DCT_DCT) {
     vpx_fdct16x16_c(input, output, stride);
   } else {
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 561835f..418a629 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -424,6 +424,22 @@
       copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
       vp10_fht4x4(src_diff2, coeff, 4, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+      vp10_fht4x4_c(src_diff2, coeff, 4, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+      vp10_fht4x4_c(src_diff2, coeff, 4, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -464,6 +480,22 @@
       copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
       vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -504,6 +536,22 @@
       copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
       vp10_fht8x8(src_diff2, coeff, 8, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -544,6 +592,22 @@
       copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
       vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -584,6 +648,22 @@
       copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
       vp10_fht16x16(src_diff2, coeff, 16, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -665,6 +745,22 @@
       copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
       vp10_highbd_fht4x4(src_diff2, coeff, 4, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 4, src_diff2, 4);
+      vp10_highbd_fht4x4_c(src_diff2, coeff, 4, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 4, src_diff2, 4);
+      vp10_highbd_fht4x4_c(src_diff2, coeff, 4, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -707,6 +803,22 @@
       copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
       vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -749,6 +861,22 @@
       copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
       vp10_highbd_fht8x8(src_diff2, coeff, 8, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_highbd_fht8x8_c(src_diff2, coeff, 8, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 8, src_diff2, 8);
+      vp10_highbd_fht8x8_c(src_diff2, coeff, 8, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -791,6 +919,22 @@
       copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
       vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);
@@ -833,6 +977,22 @@
       copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
       vp10_highbd_fht16x16(src_diff2, coeff, 16, ADST_ADST);
       break;
+    case DST_DST:
+    case DCT_DST:
+    case DST_DCT:
+    case DST_ADST:
+    case ADST_DST:
+      // Use C version since DST exists only in C
+      vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+      break;
+    case DST_FLIPADST:
+      copy_fliplr(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_highbd_fht16x16_c(src_diff2, coeff, 16, DST_ADST);
+      break;
+    case FLIPADST_DST:
+      copy_flipud(src_diff, diff_stride, 16, src_diff2, 16);
+      vp10_highbd_fht16x16_c(src_diff2, coeff, 16, ADST_DST);
+      break;
 #endif  // CONFIG_EXT_TX
     default:
       assert(0);