Remove dead tx: fht_16x32_256_add

Change-Id: I048858dba4d574e9632176515f118d1a1a81bd9b
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index de07b27..5dc4179 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -103,9 +103,6 @@
 add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 specialize qw/av1_iht16x8_128_add sse2/;
 
-add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht16x32_512_add sse2/;
-
 add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
 add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
@@ -165,8 +162,6 @@
 
 add_proto qw/void av1_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
-add_proto qw/void av1_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
 add_proto qw/void av1_highbd_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
 
 add_proto qw/void av1_highbd_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
@@ -265,9 +260,6 @@
   add_proto qw/void av1_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
   specialize qw/av1_fht16x8 sse2/;
 
-  add_proto qw/void av1_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-  specialize qw/av1_fht16x32 sse2/;
-
   add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
 
   add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 719fb6d..db8c410 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -601,58 +601,6 @@
   }
 }
 
-void av1_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
-                            const TxfmParam *txfm_param) {
-  const TX_TYPE tx_type = txfm_param->tx_type;
-  static const transform_2d IHT_16x32[] = {
-    { aom_idct32_c, aom_idct16_c },     // DCT_DCT
-    { ihalfright32_c, aom_idct16_c },   // ADST_DCT
-    { aom_idct32_c, aom_iadst16_c },    // DCT_ADST
-    { ihalfright32_c, aom_iadst16_c },  // ADST_ADST
-    { ihalfright32_c, aom_idct16_c },   // FLIPADST_DCT
-    { aom_idct32_c, aom_iadst16_c },    // DCT_FLIPADST
-    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_FLIPADST
-    { ihalfright32_c, aom_iadst16_c },  // ADST_FLIPADST
-    { ihalfright32_c, aom_iadst16_c },  // FLIPADST_ADST
-    { iidtx32_c, iidtx16_c },           // IDTX
-    { aom_idct32_c, iidtx16_c },        // V_DCT
-    { iidtx32_c, aom_idct16_c },        // H_DCT
-    { ihalfright32_c, iidtx16_c },      // V_ADST
-    { iidtx32_c, aom_iadst16_c },       // H_ADST
-    { ihalfright32_c, iidtx16_c },      // V_FLIPADST
-    { iidtx32_c, aom_iadst16_c },       // H_FLIPADST
-  };
-
-  const int n = 16;
-  const int n2 = 32;
-
-  tran_low_t out[16][32], tmp[16][32], outtmp[16];
-  tran_low_t *outp = &out[0][0];
-  int outstride = n2;
-
-  // inverse transform row vectors and transpose
-  for (int i = 0; i < n2; ++i) {
-    IHT_16x32[tx_type].rows(input, outtmp);
-    for (int j = 0; j < n; ++j)
-      tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
-    input += n;
-  }
-
-  // inverse transform column vectors
-  for (int i = 0; i < n; ++i) IHT_16x32[tx_type].cols(tmp[i], out[i]);
-
-  maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-
-  // Sum with the destination
-  for (int i = 0; i < n2; ++i) {
-    for (int j = 0; j < n; ++j) {
-      int d = i * stride + j;
-      int s = j * outstride + i;
-      dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
-    }
-  }
-}
-
 void av1_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
                          const TxfmParam *txfm_param) {
   const TX_TYPE tx_type = txfm_param->tx_type;
diff --git a/av1/common/x86/idct_intrin_sse2.c b/av1/common/x86/idct_intrin_sse2.c
index e09ce8e..3e558d0 100644
--- a/av1/common/x86/idct_intrin_sse2.c
+++ b/av1/common/x86/idct_intrin_sse2.c
@@ -963,190 +963,3 @@
   in[3] = _mm_unpacklo_epi64(in[6], in[7]);
   write_buffer_4x8_round5(dest, in, stride);
 }
-
-// Note: The 16-column 32-element transforms take input in the form of four
-// 8x16 blocks (each stored as a __m128i[16]), which are the four quadrants
-// of the overall 16x32 input buffer.
-static INLINE void idct32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
-                                __m128i *br) {
-  array_transpose_16x16(tl, tr);
-  array_transpose_16x16(bl, br);
-  idct32_8col(tl, bl);
-  idct32_8col(tr, br);
-}
-
-static INLINE void ihalfright32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
-                                      __m128i *br) {
-  __m128i tmpl[16], tmpr[16];
-  int i;
-
-  // Copy the top half of the input to temporary storage
-  for (i = 0; i < 16; ++i) {
-    tmpl[i] = tl[i];
-    tmpr[i] = tr[i];
-  }
-
-  // Generate the top half of the output
-  for (i = 0; i < 16; ++i) {
-    tl[i] = _mm_slli_epi16(bl[i], 2);
-    tr[i] = _mm_slli_epi16(br[i], 2);
-  }
-  array_transpose_16x16(tl, tr);
-
-  // Copy the temporary storage back to the bottom half of the input
-  for (i = 0; i < 16; ++i) {
-    bl[i] = tmpl[i];
-    br[i] = tmpr[i];
-  }
-
-  // Generate the bottom half of the output
-  scale_sqrt2_8x16(bl);
-  scale_sqrt2_8x16(br);
-  aom_idct16_sse2(bl, br);  // Includes a transposition
-}
-
-static INLINE void iidtx32_16col(__m128i *tl, __m128i *tr, __m128i *bl,
-                                 __m128i *br) {
-  int i;
-  array_transpose_16x16(tl, tr);
-  array_transpose_16x16(bl, br);
-  for (i = 0; i < 16; ++i) {
-    tl[i] = _mm_slli_epi16(tl[i], 2);
-    tr[i] = _mm_slli_epi16(tr[i], 2);
-    bl[i] = _mm_slli_epi16(bl[i], 2);
-    br[i] = _mm_slli_epi16(br[i], 2);
-  }
-}
-
-static INLINE void write_buffer_16x32_round6(uint8_t *dest, __m128i *intl,
-                                             __m128i *intr, __m128i *inbl,
-                                             __m128i *inbr, int stride) {
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i final_rounding = _mm_set1_epi16(1 << 5);
-  int i;
-
-  for (i = 0; i < 16; ++i) {
-    intl[i] = _mm_adds_epi16(intl[i], final_rounding);
-    intr[i] = _mm_adds_epi16(intr[i], final_rounding);
-    inbl[i] = _mm_adds_epi16(inbl[i], final_rounding);
-    inbr[i] = _mm_adds_epi16(inbr[i], final_rounding);
-    intl[i] = _mm_srai_epi16(intl[i], 6);
-    intr[i] = _mm_srai_epi16(intr[i], 6);
-    inbl[i] = _mm_srai_epi16(inbl[i], 6);
-    inbr[i] = _mm_srai_epi16(inbr[i], 6);
-    RECON_AND_STORE(dest + i * stride + 0, intl[i]);
-    RECON_AND_STORE(dest + i * stride + 8, intr[i]);
-    RECON_AND_STORE(dest + (i + 16) * stride + 0, inbl[i]);
-    RECON_AND_STORE(dest + (i + 16) * stride + 8, inbr[i]);
-  }
-}
-
-void av1_iht16x32_512_add_sse2(const tran_low_t *input, uint8_t *dest,
-                               int stride, const TxfmParam *txfm_param) {
-  __m128i intl[16], intr[16], inbl[16], inbr[16];
-  const TX_TYPE tx_type = txfm_param->tx_type;
-
-  int i;
-  for (i = 0; i < 16; ++i) {
-    intl[i] = load_input_data(input + i * 16 + 0);
-    intr[i] = load_input_data(input + i * 16 + 8);
-    inbl[i] = load_input_data(input + (i + 16) * 16 + 0);
-    inbr[i] = load_input_data(input + (i + 16) * 16 + 8);
-  }
-
-  // Row transform
-  switch (tx_type) {
-    case DCT_DCT:
-    case ADST_DCT:
-    case FLIPADST_DCT:
-    case H_DCT:
-      aom_idct16_sse2(intl, intr);
-      aom_idct16_sse2(inbl, inbr);
-      break;
-    case DCT_ADST:
-    case ADST_ADST:
-    case DCT_FLIPADST:
-    case FLIPADST_FLIPADST:
-    case ADST_FLIPADST:
-    case FLIPADST_ADST:
-    case H_ADST:
-    case H_FLIPADST:
-      aom_iadst16_sse2(intl, intr);
-      aom_iadst16_sse2(inbl, inbr);
-      break;
-    case V_FLIPADST:
-    case V_ADST:
-    case V_DCT:
-    case IDTX:
-      iidtx16_sse2(intl, intr);
-      iidtx16_sse2(inbl, inbr);
-      break;
-    default: assert(0); break;
-  }
-
-  scale_sqrt2_8x16(intl);
-  scale_sqrt2_8x16(intr);
-  scale_sqrt2_8x16(inbl);
-  scale_sqrt2_8x16(inbr);
-
-  // Column transform
-  switch (tx_type) {
-    case DCT_DCT:
-    case DCT_ADST:
-    case DCT_FLIPADST:
-    case V_DCT: idct32_16col(intl, intr, inbl, inbr); break;
-    case ADST_DCT:
-    case ADST_ADST:
-    case FLIPADST_ADST:
-    case ADST_FLIPADST:
-    case FLIPADST_FLIPADST:
-    case FLIPADST_DCT:
-    case V_ADST:
-    case V_FLIPADST: ihalfright32_16col(intl, intr, inbl, inbr); break;
-    case H_DCT:
-    case H_ADST:
-    case H_FLIPADST:
-    case IDTX: iidtx32_16col(intl, intr, inbl, inbr); break;
-    default: assert(0); break;
-  }
-
-  switch (tx_type) {
-    case DCT_DCT:
-    case ADST_DCT:
-    case DCT_ADST:
-    case ADST_ADST:
-    case H_DCT:
-    case H_ADST:
-    case V_ADST:
-    case V_DCT:
-    case IDTX: break;
-    case FLIPADST_DCT:
-    case FLIPADST_ADST:
-    case V_FLIPADST: FLIPUD_PTR(dest, stride, 32); break;
-    case DCT_FLIPADST:
-    case ADST_FLIPADST:
-    case H_FLIPADST:
-      for (i = 0; i < 16; ++i) {
-        __m128i tmp = intl[i];
-        intl[i] = mm_reverse_epi16(intr[i]);
-        intr[i] = mm_reverse_epi16(tmp);
-        tmp = inbl[i];
-        inbl[i] = mm_reverse_epi16(inbr[i]);
-        inbr[i] = mm_reverse_epi16(tmp);
-      }
-      break;
-    case FLIPADST_FLIPADST:
-      for (i = 0; i < 16; ++i) {
-        __m128i tmp = intl[i];
-        intl[i] = mm_reverse_epi16(intr[i]);
-        intr[i] = mm_reverse_epi16(tmp);
-        tmp = inbl[i];
-        inbl[i] = mm_reverse_epi16(inbr[i]);
-        inbr[i] = mm_reverse_epi16(tmp);
-      }
-      FLIPUD_PTR(dest, stride, 32);
-      break;
-    default: assert(0); break;
-  }
-  write_buffer_16x32_round6(dest, intl, intr, inbl, inbr, stride);
-}
diff --git a/test/av1_fht16x32_test.cc b/test/av1_fht16x32_test.cc
deleted file mode 100644
index ac89d54..0000000
--- a/test/av1_fht16x32_test.cc
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        const TxfmParam *txfm_param);
-using libaom_test::FhtFunc;
-using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht16x32Param;
-
-void fht16x32_ref(const int16_t *in, tran_low_t *out, int stride,
-                  TxfmParam *txfm_param) {
-  av1_fht16x32_c(in, out, stride, txfm_param);
-}
-
-void iht16x32_ref(const tran_low_t *in, uint8_t *out, int stride,
-                  const TxfmParam *txfm_param) {
-  av1_iht16x32_512_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans16x32HT : public libaom_test::TransformTestBase,
-                        public ::testing::TestWithParam<Ht16x32Param> {
- public:
-  virtual ~AV1Trans16x32HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    pitch_ = 16;
-    height_ = 32;
-    fwd_txfm_ref = fht16x32_ref;
-    inv_txfm_ref = iht16x32_ref;
-    bit_depth_ = GET_PARAM(3);
-    mask_ = (1 << bit_depth_) - 1;
-    num_coeffs_ = GET_PARAM(4);
-    txfm_param_.tx_type = GET_PARAM(2);
-  }
-  virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
-    fwd_txfm_(in, out, stride, &txfm_param_);
-  }
-
-  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, &txfm_param_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(4, 0.2); }
-TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(4); }
-
-using std::tr1::make_tuple;
-const Ht16x32Param kArrayHt16x32Param_c[] = {
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, DCT_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, IDTX, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_DCT, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_ADST, AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, V_FLIPADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_c, &av1_iht16x32_512_add_c, H_FLIPADST, AOM_BITS_8,
-             512)
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans16x32HT,
-                        ::testing::ValuesIn(kArrayHt16x32Param_c));
-
-#if HAVE_SSE2
-const Ht16x32Param kArrayHt16x32Param_sse2[] = {
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_DCT,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, DCT_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, ADST_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, FLIPADST_ADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, IDTX, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_DCT, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_ADST, AOM_BITS_8,
-             512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, V_FLIPADST,
-             AOM_BITS_8, 512),
-  make_tuple(&av1_fht16x32_sse2, &av1_iht16x32_512_add_sse2, H_FLIPADST,
-             AOM_BITS_8, 512)
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans16x32HT,
-                        ::testing::ValuesIn(kArrayHt16x32Param_sse2));
-#endif  // HAVE_SSE2
-
-}  // namespace
diff --git a/test/test.cmake b/test/test.cmake
index b201c63..5dd9b09 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -270,7 +270,6 @@
 
     set(AOM_UNIT_TEST_ENCODER_SOURCES
         ${AOM_UNIT_TEST_ENCODER_SOURCES}
-        "${AOM_ROOT}/test/av1_fht16x32_test.cc"
         "${AOM_ROOT}/test/av1_fht16x8_test.cc"
         "${AOM_ROOT}/test/av1_fht4x4_test.cc"
         "${AOM_ROOT}/test/av1_fht4x8_test.cc"