Merge changes I92819356,I50b5a313,I807e60c6,I8a8df9fd into nextgenv2

* changes:
  Branch dct to new implementation for bd12
  Change dct32x32's range
  Fit dct's stage range into 32-bit when bitdepth is 12
  Pass tx_type into get_tx_scale
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index b06a5e9..da50c03 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -24,11 +24,7 @@
   (void) tx_type;
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    if (xd->bd == BITDEPTH_10) {
-      return 0;
-    } else {
-      return tx_size == TX_32X32;
-    }
+    return tx_size == TX_32X32;
   }
 #else
   (void)xd;
@@ -1306,11 +1302,8 @@
 
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10)
-        vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_dct_dct_4, bd);
-      else
-        vp10_highbd_idct4x4_add(input, dest, stride, eob, bd);
+      vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride,
+                              &inv_txfm_2d_cfg_dct_dct_4, bd);
       break;
     case ADST_DCT:
     case DCT_ADST:
@@ -1347,13 +1340,11 @@
 void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
                                   int stride, int eob, int bd,
                                   TX_TYPE tx_type) {
+  (void)eob;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10)
-        vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                &inv_txfm_2d_cfg_dct_dct_8, bd);
-      else
-        vp10_highbd_idct8x8_add(input, dest, stride, eob, bd);
+      vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride,
+                              &inv_txfm_2d_cfg_dct_dct_8, bd);
       break;
     case ADST_DCT:
     case DCT_ADST:
@@ -1390,13 +1381,11 @@
 void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
                                     int stride, int eob, int bd,
                                     TX_TYPE tx_type) {
+  (void)eob;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10)
-        vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                  &inv_txfm_2d_cfg_dct_dct_16, bd);
-      else
-        vp10_highbd_idct16x16_add(input, dest, stride, eob, bd);
+      vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
+                                &inv_txfm_2d_cfg_dct_dct_16, bd);
       break;
     case ADST_DCT:
     case DCT_ADST:
@@ -1433,13 +1422,11 @@
 void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
                                     int stride, int eob, int bd,
                                     TX_TYPE tx_type) {
+  (void)eob;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10)
-        vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
-                                  &inv_txfm_2d_cfg_dct_dct_32, bd);
-      else
-        vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
+      vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
+                                &inv_txfm_2d_cfg_dct_dct_32, bd);
       break;
 #if CONFIG_EXT_TX
     case ADST_DCT:
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
index 3c0a906..10050b7 100644
--- a/vp10/common/vp10_fwd_txfm2d_cfg.h
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -12,16 +12,17 @@
 #define VP10_FWD_TXFM2D_CFG_H_
 #include "vp10/common/vp10_fwd_txfm1d.h"
 //  ---------------- config fwd_dct_dct_4 ----------------
-static const int8_t fwd_shift_dct_dct_4[3] = {4, 0, -2};
+static const int8_t fwd_shift_dct_dct_4[3] = {2, 0, 0};
 static const int8_t fwd_stage_range_col_dct_dct_4[4] = {15, 16, 17, 17};
 static const int8_t fwd_stage_range_row_dct_dct_4[4] = {17, 18, 18, 18};
-static const int8_t fwd_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
-static const int8_t fwd_cos_bit_row_dct_dct_4[4] = {15, 14, 14, 14};
+static const int8_t fwd_cos_bit_col_dct_dct_4[4] = {13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_4[4] = {13, 13, 13, 13};
 
 static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
-    4,                              // .txfm_size
-    4,                              // .stage_num_col
-    4,                              // .stage_num_row
+    4,  // .txfm_size
+    4,  // .stage_num_col
+    4,  // .stage_num_row
+    // 0,  // .log_scale
     fwd_shift_dct_dct_4,            // .shift
     fwd_stage_range_col_dct_dct_4,  // .stage_range_col
     fwd_stage_range_row_dct_dct_4,  // .stage_range_row
@@ -31,16 +32,17 @@
     TXFM_TYPE_DCT4};                // .txfm_type_row
 
 //  ---------------- config fwd_dct_dct_8 ----------------
-static const int8_t fwd_shift_dct_dct_8[3] = {5, -3, -1};
+static const int8_t fwd_shift_dct_dct_8[3] = {3, -1, -1};
 static const int8_t fwd_stage_range_col_dct_dct_8[6] = {16, 17, 18, 19, 19, 19};
-static const int8_t fwd_stage_range_row_dct_dct_8[6] = {16, 17, 18, 18, 18, 18};
-static const int8_t fwd_cos_bit_col_dct_dct_8[6] = {15, 15, 14, 13, 13, 13};
-static const int8_t fwd_cos_bit_row_dct_dct_8[6] = {15, 15, 14, 14, 14, 14};
+static const int8_t fwd_stage_range_row_dct_dct_8[6] = {18, 19, 20, 20, 20, 20};
+static const int8_t fwd_cos_bit_col_dct_dct_8[6] = {13, 13, 13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_8[6] = {13, 13, 12, 12, 12, 12};
 
 static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
-    8,                              // .txfm_size
-    6,                              // .stage_num_col
-    6,                              // .stage_num_row
+    8,  // .txfm_size
+    6,  // .stage_num_col
+    6,  // .stage_num_row
+    // 0,  // .log_scale
     fwd_shift_dct_dct_8,            // .shift
     fwd_stage_range_col_dct_dct_8,  // .stage_range_col
     fwd_stage_range_row_dct_dct_8,  // .stage_range_row
@@ -50,20 +52,21 @@
     TXFM_TYPE_DCT8};                // .txfm_type_row
 
 //  ---------------- config fwd_dct_dct_16 ----------------
-static const int8_t fwd_shift_dct_dct_16[3] = {4, -3, -1};
+static const int8_t fwd_shift_dct_dct_16[3] = {2, -2, 0};
 static const int8_t fwd_stage_range_col_dct_dct_16[8] = {15, 16, 17, 18,
                                                          19, 19, 19, 19};
-static const int8_t fwd_stage_range_row_dct_dct_16[8] = {16, 17, 18, 19,
-                                                         19, 19, 19, 19};
-static const int8_t fwd_cos_bit_col_dct_dct_16[8] = {15, 15, 15, 14,
+static const int8_t fwd_stage_range_row_dct_dct_16[8] = {17, 18, 19, 20,
+                                                         20, 20, 20, 20};
+static const int8_t fwd_cos_bit_col_dct_dct_16[8] = {13, 13, 13, 13,
                                                      13, 13, 13, 13};
-static const int8_t fwd_cos_bit_row_dct_dct_16[8] = {15, 15, 14, 13,
-                                                     13, 13, 13, 13};
+static const int8_t fwd_cos_bit_row_dct_dct_16[8] = {13, 13, 13, 12,
+                                                     12, 12, 12, 12};
 
 static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
-    16,                              // .txfm_size
-    8,                               // .stage_num_col
-    8,                               // .stage_num_row
+    16,  // .txfm_size
+    8,   // .stage_num_col
+    8,   // .stage_num_row
+    // 0,  // .log_scale
     fwd_shift_dct_dct_16,            // .shift
     fwd_stage_range_col_dct_dct_16,  // .stage_range_col
     fwd_stage_range_row_dct_dct_16,  // .stage_range_row
@@ -73,20 +76,21 @@
     TXFM_TYPE_DCT16};                // .txfm_type_row
 
 //  ---------------- config fwd_dct_dct_32 ----------------
-static const int8_t fwd_shift_dct_dct_32[3] = {3, -3, -1};
-static const int8_t fwd_stage_range_col_dct_dct_32[10] = {14, 15, 16, 17, 18,
-                                                          19, 19, 19, 19, 19};
+static const int8_t fwd_shift_dct_dct_32[3] = {2, -4, 0};
+static const int8_t fwd_stage_range_col_dct_dct_32[10] = {15, 16, 17, 18, 19,
+                                                          20, 20, 20, 20, 20};
 static const int8_t fwd_stage_range_row_dct_dct_32[10] = {16, 17, 18, 19, 20,
                                                           20, 20, 20, 20, 20};
-static const int8_t fwd_cos_bit_col_dct_dct_32[10] = {15, 15, 15, 15, 14,
-                                                      13, 13, 13, 13, 13};
-static const int8_t fwd_cos_bit_row_dct_dct_32[10] = {15, 15, 14, 13, 12,
+static const int8_t fwd_cos_bit_col_dct_dct_32[10] = {13, 13, 13, 13, 13,
+                                                      12, 12, 12, 12, 12};
+static const int8_t fwd_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 12,
                                                       12, 12, 12, 12, 12};
 
 static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
-    32,                              // .txfm_size
-    10,                              // .stage_num_col
-    10,                              // .stage_num_row
+    32,  // .txfm_size
+    10,  // .stage_num_col
+    10,  // .stage_num_row
+    // 1,  // .log_scale
     fwd_shift_dct_dct_32,            // .shift
     fwd_stage_range_col_dct_dct_32,  // .stage_range_col
     fwd_stage_range_row_dct_dct_32,  // .stage_range_row
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h
index ee965ba..f1bb2c7 100644
--- a/vp10/common/vp10_inv_txfm2d_cfg.h
+++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -12,16 +12,17 @@
 #define VP10_INV_TXFM2D_CFG_H_
 #include "vp10/common/vp10_inv_txfm1d.h"
 //  ---------------- config inv_dct_dct_4 ----------------
-static const int8_t inv_shift_dct_dct_4[2] = {1, -5};
-static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16};
-static const int8_t inv_stage_range_row_dct_dct_4[4] = {16, 16, 16, 16};
-static const int8_t inv_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
-static const int8_t inv_cos_bit_row_dct_dct_4[4] = {15, 15, 15, 15};
+static const int8_t inv_shift_dct_dct_4[2] = {0, -4};
+static const int8_t inv_stage_range_col_dct_dct_4[4] = {18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_4[4] = {18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = {13, 13, 13, 13};
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = {13, 13, 13, 13};
 
 static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
-    4,                              // .txfm_size
-    4,                              // .stage_num_col
-    4,                              // .stage_num_row
+    4,  // .txfm_size
+    4,  // .stage_num_col
+    4,  // .stage_num_row
+    // 0,  // .log_scale
     inv_shift_dct_dct_4,            // .shift
     inv_stage_range_col_dct_dct_4,  // .stage_range_col
     inv_stage_range_row_dct_dct_4,  // .stage_range_row
@@ -31,16 +32,17 @@
     TXFM_TYPE_DCT4};                // .txfm_type_row
 
 //  ---------------- config inv_dct_dct_8 ----------------
-static const int8_t inv_shift_dct_dct_8[2] = {0, -5};
-static const int8_t inv_stage_range_col_dct_dct_8[6] = {17, 17, 17, 17, 16, 16};
-static const int8_t inv_stage_range_row_dct_dct_8[6] = {17, 17, 17, 17, 17, 17};
-static const int8_t inv_cos_bit_col_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
-static const int8_t inv_cos_bit_row_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_shift_dct_dct_8[2] = {1, -6};
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {20, 20, 20, 20, 19, 19};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = {12, 12, 12, 12, 12, 13};
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = {13, 13, 13, 13, 13, 13};
 
 static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
-    8,                              // .txfm_size
-    6,                              // .stage_num_col
-    6,                              // .stage_num_row
+    8,  // .txfm_size
+    6,  // .stage_num_col
+    6,  // .stage_num_row
+    // 0,  // .log_scale
     inv_shift_dct_dct_8,            // .shift
     inv_stage_range_col_dct_dct_8,  // .stage_range_col
     inv_stage_range_row_dct_dct_8,  // .stage_range_row
@@ -50,20 +52,21 @@
     TXFM_TYPE_DCT8};                // .txfm_type_row
 
 //  ---------------- config inv_dct_dct_16 ----------------
-static const int8_t inv_shift_dct_dct_16[2] = {0, -6};
-static const int8_t inv_stage_range_col_dct_dct_16[8] = {18, 18, 18, 18,
-                                                         18, 18, 17, 17};
-static const int8_t inv_stage_range_row_dct_dct_16[8] = {18, 18, 18, 18,
-                                                         18, 18, 18, 18};
-static const int8_t inv_cos_bit_col_dct_dct_16[8] = {14, 14, 14, 14,
-                                                     14, 14, 14, 15};
-static const int8_t inv_cos_bit_row_dct_dct_16[8] = {14, 14, 14, 14,
-                                                     14, 14, 14, 14};
+static const int8_t inv_shift_dct_dct_16[2] = {-1, -5};
+static const int8_t inv_stage_range_col_dct_dct_16[8] = {19, 19, 19, 19,
+                                                         19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_dct_16[8] = {20, 20, 20, 20,
+                                                         20, 20, 20, 20};
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = {13, 13, 13, 13,
+                                                     13, 13, 13, 13};
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = {12, 12, 12, 12,
+                                                     12, 12, 12, 12};
 
 static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
-    16,                              // .txfm_size
-    8,                               // .stage_num_col
-    8,                               // .stage_num_row
+    16,  // .txfm_size
+    8,   // .stage_num_col
+    8,   // .stage_num_row
+    // 0,  // .log_scale
     inv_shift_dct_dct_16,            // .shift
     inv_stage_range_col_dct_dct_16,  // .stage_range_col
     inv_stage_range_row_dct_dct_16,  // .stage_range_row
@@ -73,20 +76,21 @@
     TXFM_TYPE_DCT16};                // .txfm_type_row
 
 //  ---------------- config inv_dct_dct_32 ----------------
-static const int8_t inv_shift_dct_dct_32[2] = {-1, -6};
-static const int8_t inv_stage_range_col_dct_dct_32[10] = {18, 18, 18, 18, 18,
-                                                          18, 18, 18, 17, 17};
-static const int8_t inv_stage_range_row_dct_dct_32[10] = {19, 19, 19, 19, 19,
-                                                          19, 19, 19, 19, 19};
-static const int8_t inv_cos_bit_col_dct_dct_32[10] = {14, 14, 14, 14, 14,
-                                                      14, 14, 14, 14, 15};
-static const int8_t inv_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 13,
+static const int8_t inv_shift_dct_dct_32[2] = {-1, -5};
+static const int8_t inv_stage_range_col_dct_dct_32[10] = {19, 19, 19, 19, 19,
+                                                          19, 19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_dct_32[10] = {20, 20, 20, 20, 20,
+                                                          20, 20, 20, 20, 20};
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = {13, 13, 13, 13, 13,
                                                       13, 13, 13, 13, 13};
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = {12, 12, 12, 12, 12,
+                                                      12, 12, 12, 12, 12};
 
 static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
-    32,                              // .txfm_size
-    10,                              // .stage_num_col
-    10,                              // .stage_num_row
+    32,  // .txfm_size
+    10,  // .stage_num_col
+    10,  // .stage_num_row
+    // 1,  // .log_scale
     inv_shift_dct_dct_32,            // .shift
     inv_stage_range_col_dct_dct_32,  // .stage_range_col
     inv_stage_range_row_dct_dct_32,  // .stage_range_row
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 17e7401..0e51b15 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -291,7 +291,7 @@
                                              token_tab,
 #endif  // CONFIG_ANS
                                              plane, sc, col, row, tx_size,
-                                             r, mbmi->segment_id);
+                                             tx_type, r, mbmi->segment_id);
     inverse_transform_block(xd, plane, tx_type, tx_size,
                             dst, pd->dst.stride, eob);
   }
@@ -327,7 +327,7 @@
     const scan_order *sc = get_scan(tx_size, tx_type, 1);
     const int eob = vp10_decode_block_tokens(xd, plane, sc,
                                              blk_col, blk_row, tx_size,
-                                             r, mbmi->segment_id);
+                                             tx_type, r, mbmi->segment_id);
     inverse_transform_block(xd, plane, tx_type, tx_size,
         &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
         pd->dst.stride, eob);
@@ -373,7 +373,8 @@
 #if CONFIG_ANS
                                            token_tab,
 #endif
-                                           plane, sc, col, row, tx_size, r,
+                                           plane, sc, col, row,
+                                           tx_size, tx_type, r,
                                            mbmi->segment_id);
 
   inverse_transform_block(xd, plane, tx_type, tx_size,
diff --git a/vp10/decoder/detokenize.c b/vp10/decoder/detokenize.c
index 58cd9e6..def3d7a 100644
--- a/vp10/decoder/detokenize.c
+++ b/vp10/decoder/detokenize.c
@@ -47,7 +47,8 @@
 
 static int decode_coefs(const MACROBLOCKD *xd,
                         PLANE_TYPE type,
-                        tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
+                        tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
+                        const int16_t *dq,
                         int ctx, const int16_t *scan, const int16_t *nb,
                         vp10_reader *r) {
   FRAME_COUNTS *counts = xd->counts;
@@ -111,7 +112,7 @@
   cat6_prob = vp10_cat6_prob;
 #endif
 
-  dq_shift = get_tx_scale(xd, 0, tx_size);
+  dq_shift = get_tx_scale(xd, tx_type, tx_size);
 
   while (c < max_eob) {
     int val = -1;
@@ -222,6 +223,7 @@
                             const rans_dec_lut *const token_tab,
                             PLANE_TYPE type,
                             tran_low_t *dqcoeff, TX_SIZE tx_size,
+                            TX_TYPE tx_type,
                             const int16_t *dq,
                             int ctx, const int16_t *scan, const int16_t *nb,
                             struct AnsDecoder *const ans) {
@@ -247,7 +249,7 @@
   const uint8_t *cat5_prob;
   const uint8_t *cat6_prob;
 
-  dq_shift = get_tx_scale(xd, 0, tx_size);
+  dq_shift = get_tx_scale(xd, tx_type, tx_size);
 
   if (counts) {
     coef_counts = counts->coef[tx_size][type][ref];
@@ -462,6 +464,7 @@
                              int plane, const scan_order *sc,
                              int x, int y,
                              TX_SIZE tx_size,
+                             TX_TYPE tx_type,
 #if CONFIG_ANS
                              struct AnsDecoder *const r,
 #else
@@ -474,11 +477,11 @@
                                                pd->left_context + y);
 #if !CONFIG_ANS
   const int eob = decode_coefs(xd, pd->plane_type,
-                               pd->dqcoeff, tx_size,
+                               pd->dqcoeff, tx_size, tx_type,
                                dequant, ctx, sc->scan, sc->neighbors, r);
 #else
   const int eob = decode_coefs_ans(xd, token_tab, pd->plane_type,
-                                   pd->dqcoeff, tx_size,
+                                   pd->dqcoeff, tx_size, tx_type,
                                    dequant, ctx, sc->scan, sc->neighbors, r);
 #endif  // !CONFIG_ANS
   dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
diff --git a/vp10/decoder/detokenize.h b/vp10/decoder/detokenize.h
index 4688d0a..a19d90f 100644
--- a/vp10/decoder/detokenize.h
+++ b/vp10/decoder/detokenize.h
@@ -29,6 +29,7 @@
                              int plane, const scan_order *sc,
                              int x, int y,
                              TX_SIZE tx_size,
+                             TX_TYPE tx_type,
 #if CONFIG_ANS
                              struct AnsDecoder *const r,
 #else
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index 3ccaf39..07c6ba0 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -195,12 +195,8 @@
 
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10) {
-        vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
-                            &fwd_txfm_2d_cfg_dct_dct_4, bd);
-      } else {
-        vp10_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
-      }
+      vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride,
+                          &fwd_txfm_2d_cfg_dct_dct_4, bd);
       break;
     case ADST_DCT:
     case DCT_ADST:
@@ -239,11 +235,9 @@
   (void)fwd_txfm_opt;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10) {
-        vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
-                            &fwd_txfm_2d_cfg_dct_dct_8, bd);
-        break;
-      }
+      vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride,
+                          &fwd_txfm_2d_cfg_dct_dct_8, bd);
+      break;
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
@@ -285,11 +279,9 @@
   (void)fwd_txfm_opt;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10) {
-        vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
-                              &fwd_txfm_2d_cfg_dct_dct_16, bd);
-        break;
-      }
+      vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride,
+                            &fwd_txfm_2d_cfg_dct_dct_16, bd);
+      break;
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
@@ -329,18 +321,12 @@
                                   tran_low_t *coeff, int diff_stride,
                                   TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt,
                                   const int bd) {
-  (void)bd;
+  (void)rd_transform;
+  (void)fwd_txfm_opt;
   switch (tx_type) {
     case DCT_DCT:
-      if (bd == BITDEPTH_10) {
-        vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride,
-                              &fwd_txfm_2d_cfg_dct_dct_32, bd);
-      } else {
-        if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
-          highbd_fdct32x32(rd_transform, src_diff, coeff, diff_stride);
-        else  // FWD_TXFM_OPT_DC
-          vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
-      }
+      vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride,
+                            &fwd_txfm_2d_cfg_dct_dct_32, bd);
       break;
 #if CONFIG_EXT_TX
     case ADST_DCT:
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index b16a829..b3f8336 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1001,7 +1001,8 @@
     const struct macroblock_plane *const p = &x->plane[plane];
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     int64_t this_sse;
-    int shift = (MAX_TX_SCALE - get_tx_scale(xd, 0, tx_size)) * 2;
+    int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+    int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
     tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -1175,7 +1176,9 @@
         const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
         const int64_t resd_sse = coeff[0] - dqcoeff[0];
         int64_t dc_correct = orig_sse - resd_sse * resd_sse;
-        int shift = (MAX_TX_SCALE - get_tx_scale(xd, 0, tx_size)) * 2;
+        const struct macroblockd_plane *const pd = &xd->plane[plane];
+        TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+        int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
 #if CONFIG_VP9_HIGHBITDEPTH
         dc_correct >>= ((xd->bd - 8) * 2);
 #endif