Remove more dead transforms

Change-Id: Ieeeed1cec754abaab26ab60076795351403b38e7
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index a1c02d6..4dcda55 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -189,21 +189,6 @@
 
   add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
 
-  add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht32x32 sse2 avx2/;
-
-  add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht4x8 sse2/;
-
-  add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
-  add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
-  add_proto qw/void av1_fht8x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
-  add_proto qw/void av1_fht32x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
-
   add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type";
 
   #fwd txfm
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 79f3eee..1ed9442 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -1198,606 +1198,6 @@
   }
 }
 
-void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
-                  TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct8, fdct4 },    // DCT_DCT
-    { fadst8, fdct4 },   // ADST_DCT
-    { fdct8, fadst4 },   // DCT_ADST
-    { fadst8, fadst4 },  // ADST_ADST
-    { fadst8, fdct4 },   // FLIPADST_DCT
-    { fdct8, fadst4 },   // DCT_FLIPADST
-    { fadst8, fadst4 },  // FLIPADST_FLIPADST
-    { fadst8, fadst4 },  // ADST_FLIPADST
-    { fadst8, fadst4 },  // FLIPADST_ADST
-    { fidtx8, fidtx4 },  // IDTX
-    { fdct8, fidtx4 },   // V_DCT
-    { fidtx8, fdct4 },   // H_DCT
-    { fadst8, fidtx4 },  // V_ADST
-    { fidtx8, fadst4 },  // H_ADST
-    { fadst8, fidtx4 },  // V_FLIPADST
-    { fidtx8, fadst4 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 4;
-  const int n2 = 8;
-  tran_low_t out[8 * 4];
-  tran_low_t temp_in[8], temp_out[8];
-  int i, j;
-  int16_t flipped_input[8 * 4];
-  maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
-  // Multi-way scaling matrix (bits):
-  // LGT/AV1 row,col     input+2.5, rowTX+.5, mid+0, colTX+1, out-1 == 3
-  // LGT row, Daala col  input+3.5, rowTX+.5, mid+0, colTX+0, out-1 == 3
-  // Daala row, LGT col  input+3,   rowTX+0,  mid+0, colTX+1, out-1 == 3
-  // Daala row,col       input+4,   rowTX+0,  mid+0, colTX+0, out-1 == 3
-
-  // Rows
-  for (i = 0; i < n2; ++i) {
-    // Input scaling
-    for (j = 0; j < n; ++j) {
-      // Input scaling when Daala is not possible, LGT/AV1 only (1 above)
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
-    }
-    // Row transform (AV1/LGT scale up .5 bit, Daala does not scale)
-    ht.rows(temp_in, temp_out);
-    // No mid scaling
-    for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
-  }
-
-  // Columns
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    // Column transform (AV1/LGT scale up 1 bit, Daala does not scale)
-    ht.cols(temp_in, temp_out);
-    // Output scaling is always a downshift of 1
-    for (j = 0; j < n2; ++j)
-      output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
-                  TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct4, fdct8 },    // DCT_DCT
-    { fadst4, fdct8 },   // ADST_DCT
-    { fdct4, fadst8 },   // DCT_ADST
-    { fadst4, fadst8 },  // ADST_ADST
-    { fadst4, fdct8 },   // FLIPADST_DCT
-    { fdct4, fadst8 },   // DCT_FLIPADST
-    { fadst4, fadst8 },  // FLIPADST_FLIPADST
-    { fadst4, fadst8 },  // ADST_FLIPADST
-    { fadst4, fadst8 },  // FLIPADST_ADST
-    { fidtx4, fidtx8 },  // IDTX
-    { fdct4, fidtx8 },   // V_DCT
-    { fidtx4, fdct8 },   // H_DCT
-    { fadst4, fidtx8 },  // V_ADST
-    { fidtx4, fadst8 },  // H_ADST
-    { fadst4, fidtx8 },  // V_FLIPADST
-    { fidtx4, fadst8 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 4;
-  const int n2 = 8;
-  tran_low_t out[8 * 4];
-  tran_low_t temp_in[8], temp_out[8];
-  int i, j;
-  int16_t flipped_input[8 * 4];
-  maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
-  // Multi-way scaling matrix (bits):
-  // LGT/AV1 row,col     input+2.5, rowTX+1, mid+0, colTX+.5, out-1 == 3
-  // LGT row, Daala col  input+3,   rowTX+1, mid+0, colTX+0,  out-1 == 3
-  // Daala row, LGT col  input+3.5  rowTX+0, mid+0, colTX+.5, out-1 == 3
-  // Daala row,col       input+4,   rowTX+0, mid+0, colTX+0,  out-1 == 3
-
-  // Columns
-  for (i = 0; i < n2; ++i) {
-    for (j = 0; j < n; ++j) {
-      // Input scaling when Daala is not possible, AV1/LGT only (1 above)
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
-    }
-    // Column transform (AV1/LGT scale up .5 bit, Daala does not scale)
-    ht.cols(temp_in, temp_out);
-    // No scaling between transforms
-    for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
-  }
-
-  // Rows
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    // Row transform (AV1/LGT scale up 1 bit, Daala does not scale)
-    ht.rows(temp_in, temp_out);
-    // Output scaling is always a downshift of 1
-    for (j = 0; j < n2; ++j)
-      output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct16, fdct4 },    // DCT_DCT
-    { fadst16, fdct4 },   // ADST_DCT
-    { fdct16, fadst4 },   // DCT_ADST
-    { fadst16, fadst4 },  // ADST_ADST
-    { fadst16, fdct4 },   // FLIPADST_DCT
-    { fdct16, fadst4 },   // DCT_FLIPADST
-    { fadst16, fadst4 },  // FLIPADST_FLIPADST
-    { fadst16, fadst4 },  // ADST_FLIPADST
-    { fadst16, fadst4 },  // FLIPADST_ADST
-    { fidtx16, fidtx4 },  // IDTX
-    { fdct16, fidtx4 },   // V_DCT
-    { fidtx16, fdct4 },   // H_DCT
-    { fadst16, fidtx4 },  // V_ADST
-    { fidtx16, fadst4 },  // H_ADST
-    { fadst16, fidtx4 },  // V_FLIPADST
-    { fidtx16, fadst4 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 4;
-  const int n4 = 16;
-  tran_low_t out[16 * 4];
-  tran_low_t temp_in[16], temp_out[16];
-  int i, j;
-  int16_t flipped_input[16 * 4];
-  maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
-
-  // Rows
-  for (i = 0; i < n4; ++i) {
-    for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
-  }
-
-  // Columns
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n4; ++j)
-      output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct4, fdct16 },    // DCT_DCT
-    { fadst4, fdct16 },   // ADST_DCT
-    { fdct4, fadst16 },   // DCT_ADST
-    { fadst4, fadst16 },  // ADST_ADST
-    { fadst4, fdct16 },   // FLIPADST_DCT
-    { fdct4, fadst16 },   // DCT_FLIPADST
-    { fadst4, fadst16 },  // FLIPADST_FLIPADST
-    { fadst4, fadst16 },  // ADST_FLIPADST
-    { fadst4, fadst16 },  // FLIPADST_ADST
-    { fidtx4, fidtx16 },  // IDTX
-    { fdct4, fidtx16 },   // V_DCT
-    { fidtx4, fdct16 },   // H_DCT
-    { fadst4, fidtx16 },  // V_ADST
-    { fidtx4, fadst16 },  // H_ADST
-    { fadst4, fidtx16 },  // V_FLIPADST
-    { fidtx4, fadst16 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 4;
-  const int n4 = 16;
-  tran_low_t out[16 * 4];
-  tran_low_t temp_in[16], temp_out[16];
-  int i, j;
-  int16_t flipped_input[16 * 4];
-  maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
-
-  // Columns
-  for (i = 0; i < n4; ++i) {
-    for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
-  }
-
-  // Rows
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n4; ++j)
-      output[j + i * n4] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct16, fdct8 },    // DCT_DCT
-    { fadst16, fdct8 },   // ADST_DCT
-    { fdct16, fadst8 },   // DCT_ADST
-    { fadst16, fadst8 },  // ADST_ADST
-    { fadst16, fdct8 },   // FLIPADST_DCT
-    { fdct16, fadst8 },   // DCT_FLIPADST
-    { fadst16, fadst8 },  // FLIPADST_FLIPADST
-    { fadst16, fadst8 },  // ADST_FLIPADST
-    { fadst16, fadst8 },  // FLIPADST_ADST
-    { fidtx16, fidtx8 },  // IDTX
-    { fdct16, fidtx8 },   // V_DCT
-    { fidtx16, fdct8 },   // H_DCT
-    { fadst16, fidtx8 },  // V_ADST
-    { fidtx16, fadst8 },  // H_ADST
-    { fadst16, fidtx8 },  // V_FLIPADST
-    { fidtx16, fadst8 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 8;
-  const int n2 = 16;
-  tran_low_t out[16 * 8];
-  tran_low_t temp_in[16], temp_out[16];
-  int i, j;
-  int16_t flipped_input[16 * 8];
-  maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
-  // Multi-way scaling matrix (bits):
-  // LGT/AV1 row, AV1 col  input+2.5, rowTX+1, mid-2, colTX+1.5, out+0 == 3
-  // LGT row, Daala col    input+3,   rowTX+1, mid+0, colTX+0,   out-1 == 3
-  // Daala row, LGT col    N/A (no 16-point LGT)
-  // Daala row, col        input+4,   rowTX+0, mid+0, colTX+0,   out-1 == 3
-
-  // Rows
-  for (i = 0; i < n2; ++i) {
-    // Input scaling
-    for (j = 0; j < n; ++j) {
-      // Input scaling when Daala is not possible, LGT/AV1 only (case 1 above)
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
-    }
-
-    // Row transform (AV1/LGT scale up 1 bit, Daala does not scale)
-    ht.rows(temp_in, temp_out);
-
-    // Mid scaling
-    for (j = 0; j < n; ++j) {
-      // mid scaling: only case 1 possible
-      out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
-    }
-  }
-
-  // Columns
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    // Column transform (AV1/LGT scale up 1.5 bits, Daala does not scale)
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n2; ++j) {
-      // Output scaling (case 1 above)
-      output[i + j * n] = temp_out[j];
-    }
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct8, fdct16 },    // DCT_DCT
-    { fadst8, fdct16 },   // ADST_DCT
-    { fdct8, fadst16 },   // DCT_ADST
-    { fadst8, fadst16 },  // ADST_ADST
-    { fadst8, fdct16 },   // FLIPADST_DCT
-    { fdct8, fadst16 },   // DCT_FLIPADST
-    { fadst8, fadst16 },  // FLIPADST_FLIPADST
-    { fadst8, fadst16 },  // ADST_FLIPADST
-    { fadst8, fadst16 },  // FLIPADST_ADST
-    { fidtx8, fidtx16 },  // IDTX
-    { fdct8, fidtx16 },   // V_DCT
-    { fidtx8, fdct16 },   // H_DCT
-    { fadst8, fidtx16 },  // V_ADST
-    { fidtx8, fadst16 },  // H_ADST
-    { fadst8, fidtx16 },  // V_FLIPADST
-    { fidtx8, fadst16 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 8;
-  const int n2 = 16;
-  tran_low_t out[16 * 8];
-  tran_low_t temp_in[16], temp_out[16];
-  int i, j;
-  int16_t flipped_input[16 * 8];
-  maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
-  // Multi-way scaling matrix (bits):
-  // LGT/AV1 col, AV1 row  input+2.5, colTX+1, mid-2, rowTX+1.5, out+0 == 3
-  // LGT col, Daala row    input+3,   colTX+1, mid+0, rowTX+0,   out-1 == 3
-  // Daala col, LGT row   N/A (no 16-point LGT)
-  // Daala col, row        input+4,   colTX+0, mid+0, rowTX+0,   out-1 == 3
-
-  // Columns
-  for (i = 0; i < n2; ++i) {
-    // Input scaling
-    for (j = 0; j < n; ++j) {
-      // Input scaling when Daala is not possible, AV1/LGT only (1 above)
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
-    }
-
-    // Column transform (AV1/LGT scale up 1 bit, Daala does not scale)
-    ht.cols(temp_in, temp_out);
-
-    // Mid scaling
-    for (j = 0; j < n; ++j) {
-      // Scaling case 1 above
-      out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
-    }
-  }
-
-  // Rows
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    // Row transform (AV1 scales up 1.5 bits, Daala does not scale)
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n2; ++j) {
-      // Ouptut scaling case 1 above
-      output[j + i * n2] = temp_out[j];
-    }
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct32, fdct8 },         // DCT_DCT
-    { fhalfright32, fdct8 },   // ADST_DCT
-    { fdct32, fadst8 },        // DCT_ADST
-    { fhalfright32, fadst8 },  // ADST_ADST
-    { fhalfright32, fdct8 },   // FLIPADST_DCT
-    { fdct32, fadst8 },        // DCT_FLIPADST
-    { fhalfright32, fadst8 },  // FLIPADST_FLIPADST
-    { fhalfright32, fadst8 },  // ADST_FLIPADST
-    { fhalfright32, fadst8 },  // FLIPADST_ADST
-    { fidtx32, fidtx8 },       // IDTX
-    { fdct32, fidtx8 },        // V_DCT
-    { fidtx32, fdct8 },        // H_DCT
-    { fhalfright32, fidtx8 },  // V_ADST
-    { fidtx32, fadst8 },       // H_ADST
-    { fhalfright32, fidtx8 },  // V_FLIPADST
-    { fidtx32, fadst8 },       // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 8;
-  const int n4 = 32;
-  tran_low_t out[32 * 8];
-  tran_low_t temp_in[32], temp_out[32];
-  int i, j;
-  int16_t flipped_input[32 * 8];
-  maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
-
-  // Rows
-  for (i = 0; i < n4; ++i) {
-    for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
-  }
-
-  // Columns
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n4; ++j)
-      output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
-                   TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct8, fdct32 },         // DCT_DCT
-    { fadst8, fdct32 },        // ADST_DCT
-    { fdct8, fhalfright32 },   // DCT_ADST
-    { fadst8, fhalfright32 },  // ADST_ADST
-    { fadst8, fdct32 },        // FLIPADST_DCT
-    { fdct8, fhalfright32 },   // DCT_FLIPADST
-    { fadst8, fhalfright32 },  // FLIPADST_FLIPADST
-    { fadst8, fhalfright32 },  // ADST_FLIPADST
-    { fadst8, fhalfright32 },  // FLIPADST_ADST
-    { fidtx8, fidtx32 },       // IDTX
-    { fdct8, fidtx32 },        // V_DCT
-    { fidtx8, fdct32 },        // H_DCT
-    { fadst8, fidtx32 },       // V_ADST
-    { fidtx8, fhalfright32 },  // H_ADST
-    { fadst8, fidtx32 },       // V_FLIPADST
-    { fidtx8, fhalfright32 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 8;
-  const int n4 = 32;
-  tran_low_t out[32 * 8];
-  tran_low_t temp_in[32], temp_out[32];
-  int i, j;
-  int16_t flipped_input[32 * 8];
-  maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
-
-  // Columns
-  for (i = 0; i < n4; ++i) {
-    for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
-  }
-
-  // Rows
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n4; ++j)
-      output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
-  }
-  // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
-                    TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct32, fdct16 },         // DCT_DCT
-    { fhalfright32, fdct16 },   // ADST_DCT
-    { fdct32, fadst16 },        // DCT_ADST
-    { fhalfright32, fadst16 },  // ADST_ADST
-    { fhalfright32, fdct16 },   // FLIPADST_DCT
-    { fdct32, fadst16 },        // DCT_FLIPADST
-    { fhalfright32, fadst16 },  // FLIPADST_FLIPADST
-    { fhalfright32, fadst16 },  // ADST_FLIPADST
-    { fhalfright32, fadst16 },  // FLIPADST_ADST
-    { fidtx32, fidtx16 },       // IDTX
-    { fdct32, fidtx16 },        // V_DCT
-    { fidtx32, fdct16 },        // H_DCT
-    { fhalfright32, fidtx16 },  // V_ADST
-    { fidtx32, fadst16 },       // H_ADST
-    { fhalfright32, fidtx16 },  // V_FLIPADST
-    { fidtx32, fadst16 },       // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 16;
-  const int n2 = 32;
-  tran_low_t out[32 * 16];
-  tran_low_t temp_in[32], temp_out[32];
-  int i, j;
-  int16_t flipped_input[32 * 16];
-  maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
-  // Rows
-  for (i = 0; i < n2; ++i) {
-    for (j = 0; j < n; ++j) {
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
-    }
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n; ++j) {
-      out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
-    }
-  }
-
-  // Columns
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n2; ++j) output[i + j * n] = temp_out[j];
-  }
-  // Note: overall scale factor of transform is 4 times unitary
-}
-
-void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
-                    TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct16, fdct32 },         // DCT_DCT
-    { fadst16, fdct32 },        // ADST_DCT
-    { fdct16, fhalfright32 },   // DCT_ADST
-    { fadst16, fhalfright32 },  // ADST_ADST
-    { fadst16, fdct32 },        // FLIPADST_DCT
-    { fdct16, fhalfright32 },   // DCT_FLIPADST
-    { fadst16, fhalfright32 },  // FLIPADST_FLIPADST
-    { fadst16, fhalfright32 },  // ADST_FLIPADST
-    { fadst16, fhalfright32 },  // FLIPADST_ADST
-    { fidtx16, fidtx32 },       // IDTX
-    { fdct16, fidtx32 },        // V_DCT
-    { fidtx16, fdct32 },        // H_DCT
-    { fadst16, fidtx32 },       // V_ADST
-    { fidtx16, fhalfright32 },  // H_ADST
-    { fadst16, fidtx32 },       // V_FLIPADST
-    { fidtx16, fhalfright32 },  // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  const int n = 16;
-  const int n2 = 32;
-  tran_low_t out[32 * 16];
-  tran_low_t temp_in[32], temp_out[32];
-  int i, j;
-  int16_t flipped_input[32 * 16];
-  maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
-  // Columns
-  for (i = 0; i < n2; ++i) {
-    for (j = 0; j < n; ++j) {
-      temp_in[j] =
-          (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
-    }
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < n; ++j) {
-      out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
-    }
-  }
-
-  // Rows
-  for (i = 0; i < n; ++i) {
-    for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j];
-  }
-  // Note: overall scale factor of transform is 4 times unitary
-}
-
-void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
-                  TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  if (tx_type == DCT_DCT) {
-    aom_fdct8x8_c(input, output, stride);
-    return;
-  }
-  {
-    static const transform_2d FHT[] = {
-      { fdct8, fdct8 },    // DCT_DCT
-      { fadst8, fdct8 },   // ADST_DCT
-      { fdct8, fadst8 },   // DCT_ADST
-      { fadst8, fadst8 },  // ADST_ADST
-      { fadst8, fdct8 },   // FLIPADST_DCT
-      { fdct8, fadst8 },   // DCT_FLIPADST
-      { fadst8, fadst8 },  // FLIPADST_FLIPADST
-      { fadst8, fadst8 },  // ADST_FLIPADST
-      { fadst8, fadst8 },  // FLIPADST_ADST
-      { fidtx8, fidtx8 },  // IDTX
-      { fdct8, fidtx8 },   // V_DCT
-      { fidtx8, fdct8 },   // H_DCT
-      { fadst8, fidtx8 },  // V_ADST
-      { fidtx8, fadst8 },  // H_ADST
-      { fadst8, fidtx8 },  // V_FLIPADST
-      { fidtx8, fadst8 },  // H_FLIPADST
-    };
-    const transform_2d ht = FHT[tx_type];
-    tran_low_t out[64];
-    int i, j;
-    tran_low_t temp_in[8], temp_out[8];
-
-    int16_t flipped_input[8 * 8];
-    maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
-
-    // Columns
-    for (i = 0; i < 8; ++i) {
-      for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
-      ht.cols(temp_in, temp_out);
-      for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
-    }
-
-    // Rows
-    for (i = 0; i < 8; ++i) {
-      for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
-      ht.rows(temp_in, temp_out);
-      for (j = 0; j < 8; ++j)
-        output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
-    }
-  }
-}
-
 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
    pixel. */
 void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
@@ -1909,103 +1309,6 @@
   av1_fwht4x4_c(input, output, stride);
 }
 
-void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
-                    TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d FHT[] = {
-    { fdct32, fdct32 },              // DCT_DCT
-    { fhalfright32, fdct32 },        // ADST_DCT
-    { fdct32, fhalfright32 },        // DCT_ADST
-    { fhalfright32, fhalfright32 },  // ADST_ADST
-    { fhalfright32, fdct32 },        // FLIPADST_DCT
-    { fdct32, fhalfright32 },        // DCT_FLIPADST
-    { fhalfright32, fhalfright32 },  // FLIPADST_FLIPADST
-    { fhalfright32, fhalfright32 },  // ADST_FLIPADST
-    { fhalfright32, fhalfright32 },  // FLIPADST_ADST
-    { fidtx32, fidtx32 },            // IDTX
-    { fdct32, fidtx32 },             // V_DCT
-    { fidtx32, fdct32 },             // H_DCT
-    { fhalfright32, fidtx32 },       // V_ADST
-    { fidtx32, fhalfright32 },       // H_ADST
-    { fhalfright32, fidtx32 },       // V_FLIPADST
-    { fidtx32, fhalfright32 },       // H_FLIPADST
-  };
-  const transform_2d ht = FHT[tx_type];
-  tran_low_t out[1024];
-  int i, j;
-  tran_low_t temp_in[32], temp_out[32];
-
-  int16_t flipped_input[32 * 32];
-  maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
-
-  // Columns
-  for (i = 0; i < 32; ++i) {
-    for (j = 0; j < 32; ++j) {
-      temp_in[j] = input[j * stride + i] * 4;
-    }
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < 32; ++j) {
-      out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
-    }
-  }
-
-  // Rows
-  for (i = 0; i < 32; ++i) {
-    for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < 32; ++j) {
-      output[j + i * 32] = temp_out[j];
-    }
-  }
-}
-
-static void fidtx64(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  for (i = 0; i < 64; ++i)
-    output[i] = (tran_low_t)fdct_round_shift(input[i] * 4 * Sqrt2);
-}
-
-// For use in lieu of ADST
-static void fhalfright64(const tran_low_t *input, tran_low_t *output) {
-  int i;
-  tran_low_t inputhalf[32];
-  for (i = 0; i < 32; ++i) {
-    output[32 + i] = (tran_low_t)fdct_round_shift(input[i] * 4 * Sqrt2);
-  }
-  // Multiply input by sqrt(2)
-  for (i = 0; i < 32; ++i) {
-    inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 32] * Sqrt2);
-  }
-  fdct32(inputhalf, output);
-  // Note overall scaling factor is 2 times unitary
-}
-// stage range
-static const int8_t fwd_stage_range_col_dct_64[12] = { 0, 1, 2, 3, 4, 5,
-                                                       6, 6, 6, 6, 6, 6 };
-static const int8_t fwd_stage_range_row_dct_64[12] = { 6,  7,  8,  9,  10, 11,
-                                                       11, 11, 11, 11, 11, 11 };
-static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
-  int32_t in[64], out[64];
-  int i;
-  const int txw_idx = get_txw_idx(TX_64X64);
-  const int txh_idx = get_txh_idx(TX_64X64);
-  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_fdct64_new(in, out, fwd_cos_bit_col[txw_idx][txh_idx],
-                 fwd_stage_range_col_dct_64);
-  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
-static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
-  int32_t in[64], out[64];
-  int i;
-  const int txw_idx = get_txw_idx(TX_64X64);
-  const int txh_idx = get_txh_idx(TX_64X64);
-  for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
-  av1_fdct64_new(in, out, fwd_cos_bit_row[txw_idx][txh_idx],
-                 fwd_stage_range_row_dct_64);
-  for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
 // Forward identity transform.
 void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
                     int bsx, int bsy, TX_TYPE tx_type) {
diff --git a/test/av1_fht4x4_test.cc b/test/av1_fht4x4_test.cc
deleted file mode 100644
index ef61bce..0000000
--- a/test/av1_fht4x4_test.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using libaom_test::FhtFunc;
-using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x4Param;
-
-void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                TxfmParam *txfm_param) {
-  av1_fht4x4_c(in, out, stride, txfm_param);
-}
-
-void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride,
-                const TxfmParam *txfm_param) {
-  av1_iht4x4_16_add_c(in, out, stride, txfm_param);
-}
-
-typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                              TX_TYPE tx_type, int bd);
-typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
-                           TX_TYPE tx_type, int bd);
-
-// HighbdHt4x4Param argument list:
-// <Target optimized function, tx_type, bit depth>
-typedef tuple<HBDFhtFunc, TX_TYPE, int> HighbdHt4x4Param;
-
-void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
-                       TX_TYPE tx_type, int bd) {
-  av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
-}
-
-class AV1Trans4x4HT : public libaom_test::TransformTestBase,
-                      public ::testing::TestWithParam<Ht4x4Param> {
- public:
-  virtual ~AV1Trans4x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 4;
-    height_ = 4;
-    fwd_txfm_ref = fht4x4_ref;
-    inv_txfm_ref = iht4x4_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans4x4HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
-// Note:
-//  TODO(luoyi): Add tx_type, 9-15 for inverse transform.
-//  Need cleanup since same tests may be done in fdct4x4_test.cc
-// TEST_P(AV1Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-
-class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
- public:
-  virtual ~AV1HighbdTrans4x4HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    fwd_txfm_ref_ = highbe_fht4x4_ref;
-    tx_type_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = 16;
-
-    input_ = reinterpret_cast<int16_t *>(
-        aom_memalign(16, sizeof(int16_t) * num_coeffs_));
-    output_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-    output_ref_ = reinterpret_cast<int32_t *>(
-        aom_memalign(16, sizeof(int32_t) * num_coeffs_));
-  }
-
-  virtual void TearDown() {
-    aom_free(input_);
-    aom_free(output_);
-    aom_free(output_ref_);
-    libaom_test::ClearSystemState();
-  }
-
- protected:
-  void RunBitexactCheck();
-
- private:
-  HBDFhtFunc fwd_txfm_;
-  HBDFhtFunc fwd_txfm_ref_;
-  TX_TYPE tx_type_;
-  int bit_depth_;
-  int mask_;
-  int num_coeffs_;
-  int16_t *input_;
-  int32_t *output_;
-  int32_t *output_ref_;
-};
-
-void AV1HighbdTrans4x4HT::RunBitexactCheck() {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int i, j;
-  const int stride = 4;
-  const int num_tests = 1000;
-  const int num_coeffs = 16;
-
-  for (i = 0; i < num_tests; ++i) {
-    for (j = 0; j < num_coeffs; ++j) {
-      input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
-    }
-
-    fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
-    fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
-
-    for (j = 0; j < num_coeffs; ++j) {
-      EXPECT_EQ(output_[j], output_ref_[j])
-          << "Not bit-exact result at index: " << j << " at test block: " << i;
-    }
-  }
-}
-
-TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2
-const Ht4x4Param kArrayHt4x4Param_sse2[] = {
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_DCT, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_DCT, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_ADST, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_ADST, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, IDTX, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_DCT, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_DCT, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_ADST, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_ADST, AOM_BITS_8, 16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_FLIPADST, AOM_BITS_8,
-             16),
-  make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_FLIPADST, AOM_BITS_8,
-             16)
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT,
-                        ::testing::ValuesIn(kArrayHt4x4Param_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSE4_1
-const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 12),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 10),
-  make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT,
-                        ::testing::ValuesIn(kArrayHighbdHt4x4Param));
-
-#endif  // HAVE_SSE4_1
-
-}  // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 7669fff..092b31c 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -265,10 +265,6 @@
           "${AOM_ROOT}/test/hash_test.cc")
     endif ()
 
-    set(AOM_UNIT_TEST_ENCODER_SOURCES
-        ${AOM_UNIT_TEST_ENCODER_SOURCES}
-        "${AOM_ROOT}/test/av1_fht4x4_test.cc")
-
     set(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
         ${AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1}
         "${AOM_ROOT}/test/corner_match_test.cc")