Add CONFIG_DAALA_DCT4 experiment.

This experiment replaces the 4-point Type-II scaled-output vp9 DCT
 transform with the 4-point Type-II orthonormal Daala DCT transform.
Right now the CONFIG_DAALA_DCT4 experiment depends on CONFIG_DCT_ONLY
 as it does not add an orthonormal 4-point DST.

subset-1:

monty-baseline-dctonly-squaretx-subset1 ->
  monty-dct4-dctonly-squaretx-subset1-rerun

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0055 | -0.0132 | -0.0405 |   0.0261 | 0.0005 |  0.0246 |     0.0226

objective-1-fast:

monty-baseline-dctonly-squaretx-o1f ->
  monty-dct4-dctonly-squaretx-o1f

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0215 | -0.1573 |     N/A |  -0.0131 | -0.0347 | -0.0390 |    -0.1121

Change-Id: Idef8f6e5525037d5bbb2d0927675c21d1922d69a
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 49a91fb..80a671d 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -36,8 +36,13 @@
 #if CONFIG_EXT_TX
 static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
   int i;
-  for (i = 0; i < 4; ++i)
+  for (i = 0; i < 4; ++i) {
+#if CONFIG_DAALA_DCT4
+    output[i] = input[i];
+#else
     output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+#endif
+  }
 }
 
 static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
@@ -249,10 +254,12 @@
 void av1_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const INV_TXFM_PARAM *param) {
   int tx_type = param->tx_type;
+#if !CONFIG_DAALA_DCT4
   if (tx_type == DCT_DCT) {
     aom_idct4x4_16_add(input, dest, stride);
     return;
   }
+#endif
   static const transform_2d IHT_4[] = {
     { aom_idct4_c, aom_idct4_c },    // DCT_DCT  = 0
     { aom_iadst4_c, aom_idct4_c },   // ADST_DCT = 1
@@ -293,12 +300,18 @@
 
   // inverse transform row vectors
   for (i = 0; i < 4; ++i) {
+#if CONFIG_DAALA_DCT4
+    tran_low_t temp_in[4];
+    for (j = 0; j < 4; j++) temp_in[j] = input[j] << 1;
+    IHT_4[tx_type].rows(temp_in, out[i]);
+#else
 #if CONFIG_LGT
     if (use_lgt_row)
       ilgt4(input, out[i], lgtmtx_row[i]);
     else
 #endif
       IHT_4[tx_type].rows(input, out[i]);
+#endif
     input += 4;
   }
 
@@ -328,7 +341,11 @@
     for (j = 0; j < 4; ++j) {
       int d = i * stride + j;
       int s = j * outstride + i;
+#if CONFIG_DAALA_DCT4
       dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
+#else
+      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
+#endif
     }
   }
 }
@@ -1440,7 +1457,11 @@
   }
 
   switch (tx_type) {
+#if !CONFIG_DAALA_DCT4
     case DCT_DCT: av1_idct4x4_add(input, dest, stride, param); break;
+#else
+    case DCT_DCT:
+#endif
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST: