Fix bitrot in LBD Daala inverse TX

Cleanup/optimizations of the low-bitdepth inverse TX path for AV1 TX
broke Daala TX in several places; this patch cleans up the cleanup.

Tested against the New Daala TX code that unified LBD/HBD, restores
bit-identical TX behavior.

monty-daalaTX-invzerotest-LBD-s1-2@2017-11-10T08:46:01.822Z ->
  monty-daalaTX-invzerotest-test-s1@2017-11-09T05:09:05.483Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I58e4de4c71ec5251138ff7816f77777db6f869a3
diff --git a/av1/common/idct.c b/av1/common/idct.c
index c3bde4c..51c35cd 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -56,7 +56,7 @@
   }
 }
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_TX64
 static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
   for (int i = 0; i < 64; ++i) {
     output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
@@ -78,7 +78,7 @@
   // Note overall scaling factor is 4 times orthogonal
 }
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_TX64
 static void idct64_col_c(const tran_low_t *input, tran_low_t *output) {
   int32_t in[64], out[64];
 
@@ -111,6 +111,8 @@
 #endif  // CONFIG_TX64X64
 
 // Inverse identity transform and add.
+#if !(CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16 && \
+      CONFIG_DAALA_TX32 && (!CONFIG_TX64X64 || CONFIG_DAALA_TX64))
 static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                            int bsx, int bsy, TX_TYPE tx_type) {
   const int pels = bsx * bsy;
@@ -124,6 +126,7 @@
     }
   }
 }
+#endif
 
 #define FLIPUD_PTR(dest, stride, size)       \
   do {                                       \
@@ -1813,13 +1816,12 @@
     av1_iwht4x4_add(input, dest, stride, txfm_param);
     return;
   }
-
-  switch (tx_type) {
-#if !CONFIG_DAALA_TX4
-    case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break;
+#if CONFIG_DAALA_TX4
+  (void)tx_type;
+  av1_iht4x4_16_add_c(input, dest, stride, txfm_param);
 #else
-    case DCT_DCT:
-#endif
+  switch (tx_type) {
+    case DCT_DCT: av1_idct4x4_add(input, dest, stride, txfm_param); break;
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST: av1_iht4x4_16_add(input, dest, stride, txfm_param); break;
@@ -1842,6 +1844,7 @@
     case IDTX: inv_idtx_add_c(input, dest, stride, 4, 4, tx_type); break;
     default: assert(0); break;
   }
+#endif
 }
 
 static void inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
@@ -1943,13 +1946,12 @@
 
 static void inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
                              const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX8
+  av1_iht8x8_64_add_c(input, dest, stride, txfm_param);
+#else
   const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
-#if !CONFIG_DAALA_TX8
     case DCT_DCT: idct8x8_add(input, dest, stride, txfm_param); break;
-#else
-    case DCT_DCT:
-#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST: av1_iht8x8_64_add(input, dest, stride, txfm_param); break;
@@ -1972,17 +1974,17 @@
     case IDTX: inv_idtx_add_c(input, dest, stride, 8, 8, tx_type); break;
     default: assert(0); break;
   }
+#endif
 }
 
 static void inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX16
+  av1_iht16x16_256_add_c(input, dest, stride, txfm_param);
+#else
   const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
-#if !CONFIG_DAALA_TX16
     case DCT_DCT: idct16x16_add(input, dest, stride, txfm_param); break;
-#else
-    case DCT_DCT:
-#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
@@ -2007,19 +2009,17 @@
 #endif  // CONFIG_MRC_TX
     default: assert(0); break;
   }
+#endif
 }
 
 static void inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX32
+  av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
+#else
   const TX_TYPE tx_type = txfm_param->tx_type;
   switch (tx_type) {
-#if !CONFIG_DAALA_TX32
     case DCT_DCT: idct32x32_add(input, dest, stride, txfm_param); break;
-#else
-    case DCT_DCT:
-      av1_iht32x32_1024_add_c(input, dest, stride, txfm_param);
-      break;
-#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
@@ -2042,19 +2042,19 @@
 #endif  // CONFIG_MRC_TX
     default: assert(0); break;
   }
+#endif
 }
 
 #if CONFIG_TX64X64
 static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
                                int stride, const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX64
+  av1_iht64x64_4096_add_c(input, dest, stride, txfm_param);
+#else
   const TX_TYPE tx_type = txfm_param->tx_type;
   assert(tx_type == DCT_DCT);
   switch (tx_type) {
-#if !CONFIG_DAALA_TX64
     case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
-#else
-    case DCT_DCT:
-#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
@@ -2077,6 +2077,7 @@
 #endif  // CONFIG_MRC_TX
     default: assert(0); break;
   }
+#endif
 }
 #endif  // CONFIG_TX64X64
 
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 43d059a..ff9e0fb 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -2401,7 +2401,7 @@
   }
 }
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !(CONFIG_DAALA_TX64 && CONFIG_DAALA_TX32)
 static void fidtx64(const tran_low_t *input, tran_low_t *output) {
   int i;
   for (i = 0; i < 64; ++i)