Remove dead tx: fht4x8_32_add
Change-Id: I44df34985b5c6220d5897249fd88892594fbde3f
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 5dc4179..6c602fd 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -91,9 +91,6 @@
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
specialize qw/av1_iht4x4_16_add sse2/;
-add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht4x8_32_add sse2/;
-
add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
specialize qw/av1_iht8x4_32_add sse2/;
@@ -154,8 +151,6 @@
#
add_proto qw/void av1_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-add_proto qw/void av1_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-
add_proto qw/void av1_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
add_proto qw/void av1_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
diff --git a/av1/common/idct.c b/av1/common/idct.c
index db8c410..9c01a52 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -233,71 +233,6 @@
}
}
-void av1_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d IHT_4x8[] = {
- { aom_idct8_c, aom_idct4_c }, // DCT_DCT
- { aom_iadst8_c, aom_idct4_c }, // ADST_DCT
- { aom_idct8_c, aom_iadst4_c }, // DCT_ADST
- { aom_iadst8_c, aom_iadst4_c }, // ADST_ADST
- { aom_iadst8_c, aom_idct4_c }, // FLIPADST_DCT
- { aom_idct8_c, aom_iadst4_c }, // DCT_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // ADST_FLIPADST
- { aom_iadst8_c, aom_iadst4_c }, // FLIPADST_ADST
- { iidtx8_c, iidtx4_c }, // IDTX
- { aom_idct8_c, iidtx4_c }, // V_DCT
- { iidtx8_c, aom_idct4_c }, // H_DCT
- { aom_iadst8_c, iidtx4_c }, // V_ADST
- { iidtx8_c, aom_iadst4_c }, // H_ADST
- { aom_iadst8_c, iidtx4_c }, // V_FLIPADST
- { iidtx8_c, aom_iadst4_c }, // H_FLIPADST
- };
-
- const int n = 4;
- const int n2 = 8;
-
- tran_low_t out[4][8], tmp[4][8], outtmp[4];
- tran_low_t *outp = &out[0][0];
- int outstride = n2;
-
- // Multi-way scaling matrix (bits):
- // LGT/AV1 row,col input+0, rowTX+.5, mid+.5, colTX+1, out-5 == -3
- // LGT row, Daala col input+0, rowTX+.5, mid+.5, colTX+0, out-4 == -3
- // Daala row, LGT col input+1, rowTX+0, mid+0, colTX+1, out-5 == -3
- // Daala row,col input+1, rowTX+0, mid+0, colTX+0, out-4 == -3
-
- // inverse transform row vectors and transpose
- for (int i = 0; i < n2; ++i) {
- // AV1 row transform; Scaling case 1 only
- // Row transform (AV1 scales up .5 bits)
- IHT_4x8[tx_type].rows(input, outtmp);
- // Transpose and mid scaling up by .5 bit
- for (int j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
- input += n;
- }
-
- // inverse transform column vectors
- // AV1/LGT column TX scales up by 1 bit, Daala does not scale
- for (int i = 0; i < n; ++i) {
- IHT_4x8[tx_type].cols(tmp[i], out[i]);
- }
-
- maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
-
- // Sum with the destination
- for (int i = 0; i < n2; ++i) {
- for (int j = 0; j < n; ++j) {
- int d = i * stride + j;
- int s = j * outstride + i;
- // Output scaling case 1 only
- dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
- }
- }
-}
-
void av1_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
diff --git a/av1/common/x86/idct_intrin_sse2.c b/av1/common/x86/idct_intrin_sse2.c
index 3e558d0..c2f1131 100644
--- a/av1/common/x86/idct_intrin_sse2.c
+++ b/av1/common/x86/idct_intrin_sse2.c
@@ -782,184 +782,3 @@
}
write_buffer_8x4_round5(dest, in, stride);
}
-
-static INLINE void write_buffer_4x8_round5(uint8_t *dest, __m128i *in,
- int stride) {
- const __m128i final_rounding = _mm_set1_epi16(1 << 4);
- const __m128i zero = _mm_setzero_si128();
- // Final rounding and shift
- in[0] = _mm_adds_epi16(in[0], final_rounding);
- in[1] = _mm_adds_epi16(in[1], final_rounding);
- in[2] = _mm_adds_epi16(in[2], final_rounding);
- in[3] = _mm_adds_epi16(in[3], final_rounding);
-
- in[0] = _mm_srai_epi16(in[0], 5);
- in[1] = _mm_srai_epi16(in[1], 5);
- in[2] = _mm_srai_epi16(in[2], 5);
- in[3] = _mm_srai_epi16(in[3], 5);
-
- // Reconstruction and Store
- {
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
- __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
- __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
- __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
- __m128i d4 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 4));
- __m128i d5 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 5));
- __m128i d6 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 6));
- __m128i d7 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 7));
-
- d0 = _mm_unpacklo_epi32(d0, d1);
- d2 = _mm_unpacklo_epi32(d2, d3);
- d4 = _mm_unpacklo_epi32(d4, d5);
- d6 = _mm_unpacklo_epi32(d6, d7);
- d0 = _mm_unpacklo_epi8(d0, zero);
- d2 = _mm_unpacklo_epi8(d2, zero);
- d4 = _mm_unpacklo_epi8(d4, zero);
- d6 = _mm_unpacklo_epi8(d6, zero);
- d0 = _mm_add_epi16(d0, in[0]);
- d2 = _mm_add_epi16(d2, in[1]);
- d4 = _mm_add_epi16(d4, in[2]);
- d6 = _mm_add_epi16(d6, in[3]);
-
- d0 = _mm_packus_epi16(d0, d2);
- *(int *)dest = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
- d0 = _mm_packus_epi16(d4, d6);
- *(int *)(dest + stride * 4) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 5) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 6) = _mm_cvtsi128_si32(d0);
- d0 = _mm_srli_si128(d0, 4);
- *(int *)(dest + stride * 7) = _mm_cvtsi128_si32(d0);
- }
-}
-
-void av1_iht4x8_32_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- __m128i in[8];
- const TX_TYPE tx_type = txfm_param->tx_type;
-
- // Load rows, packed two per element of 'in'.
- // We pack into the bottom half of 'in' so that the
- // later repacking stage can pack into the
- // top half without overwriting anything
- in[4] = load_input_data(input + 0 * 8);
- in[5] = load_input_data(input + 1 * 8);
- in[6] = load_input_data(input + 2 * 8);
- in[7] = load_input_data(input + 3 * 8);
-
- // Row transform
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case FLIPADST_DCT:
- case H_DCT:
- aom_idct4_sse2(in + 4);
- aom_idct4_sse2(in + 6);
- break;
- case DCT_ADST:
- case ADST_ADST:
- case DCT_FLIPADST:
- case FLIPADST_FLIPADST:
- case ADST_FLIPADST:
- case FLIPADST_ADST:
- case H_ADST:
- case H_FLIPADST:
- aom_iadst4_sse2(in + 4);
- aom_iadst4_sse2(in + 6);
- break;
- case V_FLIPADST:
- case V_ADST:
- case V_DCT:
- case IDTX:
- iidtx4_sse2(in + 4);
- array_transpose_4x4(in + 4);
- iidtx4_sse2(in + 6);
- array_transpose_4x4(in + 6);
- break;
- default: assert(0); break;
- }
-
- scale_sqrt2_8x4(in + 4);
-
- // Repack data
- in[0] = _mm_unpacklo_epi64(in[4], in[6]);
- in[1] = _mm_unpackhi_epi64(in[4], in[6]);
- in[2] = _mm_unpacklo_epi64(in[5], in[7]);
- in[3] = _mm_unpackhi_epi64(in[5], in[7]);
-
- // Column transform
- switch (tx_type) {
- case DCT_DCT:
- case DCT_ADST:
- case DCT_FLIPADST:
- case V_DCT: aom_idct8_sse2(in); break;
- case ADST_DCT:
- case ADST_ADST:
- case FLIPADST_ADST:
- case ADST_FLIPADST:
- case FLIPADST_FLIPADST:
- case FLIPADST_DCT:
- case V_ADST:
- case V_FLIPADST: aom_iadst8_sse2(in); break;
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case IDTX:
- iidtx8_sse2(in);
- array_transpose_8x8(in, in);
- break;
- default: assert(0); break;
- }
-
- switch (tx_type) {
- case DCT_DCT:
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- case H_DCT:
- case H_ADST:
- case V_ADST:
- case V_DCT:
- case IDTX: break;
- case FLIPADST_DCT:
- case FLIPADST_ADST:
- case V_FLIPADST: FLIPUD_PTR(dest, stride, 8); break;
- case DCT_FLIPADST:
- case ADST_FLIPADST:
- case H_FLIPADST:
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
- in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
- in[4] = _mm_shufflelo_epi16(in[4], 0x1b);
- in[5] = _mm_shufflelo_epi16(in[5], 0x1b);
- in[6] = _mm_shufflelo_epi16(in[6], 0x1b);
- in[7] = _mm_shufflelo_epi16(in[7], 0x1b);
- break;
- case FLIPADST_FLIPADST:
- in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
- in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
- in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
- in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
- in[4] = _mm_shufflelo_epi16(in[4], 0x1b);
- in[5] = _mm_shufflelo_epi16(in[5], 0x1b);
- in[6] = _mm_shufflelo_epi16(in[6], 0x1b);
- in[7] = _mm_shufflelo_epi16(in[7], 0x1b);
- FLIPUD_PTR(dest, stride, 8);
- break;
- default: assert(0); break;
- }
- in[0] = _mm_unpacklo_epi64(in[0], in[1]);
- in[1] = _mm_unpacklo_epi64(in[2], in[3]);
- in[2] = _mm_unpacklo_epi64(in[4], in[5]);
- in[3] = _mm_unpacklo_epi64(in[6], in[7]);
- write_buffer_4x8_round5(dest, in, stride);
-}
diff --git a/test/av1_fht4x8_test.cc b/test/av1_fht4x8_test.cc
deleted file mode 100644
index 8be6aa0..0000000
--- a/test/av1_fht4x8_test.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./aom_dsp_rtcd.h"
-#include "./av1_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
- const TxfmParam *txfm_param);
-using libaom_test::FhtFunc;
-using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x8Param;
-
-void fht4x8_ref(const int16_t *in, tran_low_t *out, int stride,
- TxfmParam *txfm_param) {
- av1_fht4x8_c(in, out, stride, txfm_param);
-}
-
-void iht4x8_ref(const tran_low_t *in, uint8_t *out, int stride,
- const TxfmParam *txfm_param) {
- av1_iht4x8_32_add_c(in, out, stride, txfm_param);
-}
-
-class AV1Trans4x8HT : public libaom_test::TransformTestBase,
- public ::testing::TestWithParam<Ht4x8Param> {
- public:
- virtual ~AV1Trans4x8HT() {}
-
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- inv_txfm_ = GET_PARAM(1);
- pitch_ = 4;
- height_ = 8;
- fwd_txfm_ref = fht4x8_ref;
- inv_txfm_ref = iht4x8_ref;
- bit_depth_ = GET_PARAM(3);
- mask_ = (1 << bit_depth_) - 1;
- num_coeffs_ = GET_PARAM(4);
- txfm_param_.tx_type = GET_PARAM(2);
- }
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
- fwd_txfm_(in, out, stride, &txfm_param_);
- }
-
- void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
- inv_txfm_(out, dst, stride, &txfm_param_);
- }
-
- FhtFunc fwd_txfm_;
- IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans4x8HT, AccuracyCheck) { RunAccuracyCheck(0, 0.00001); }
-TEST_P(AV1Trans4x8HT, CoeffCheck) { RunCoeffCheck(); }
-TEST_P(AV1Trans4x8HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans4x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans4x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-
-using std::tr1::make_tuple;
-
-const Ht4x8Param kArrayHt4x8Param_c[] = {
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, DCT_FLIPADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_FLIPADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, ADST_FLIPADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, FLIPADST_ADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, IDTX, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, V_FLIPADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_c, &av1_iht4x8_32_add_c, H_FLIPADST, AOM_BITS_8, 32)
-};
-INSTANTIATE_TEST_CASE_P(C, AV1Trans4x8HT,
- ::testing::ValuesIn(kArrayHt4x8Param_c));
-
-#if HAVE_SSE2
-const Ht4x8Param kArrayHt4x8Param_sse2[] = {
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_DCT, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_DCT, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_ADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_ADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_DCT,
- AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, DCT_FLIPADST,
- AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_FLIPADST,
- AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, ADST_FLIPADST,
- AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, FLIPADST_ADST,
- AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, IDTX, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_DCT, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_ADST, AOM_BITS_8, 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, V_FLIPADST, AOM_BITS_8,
- 32),
- make_tuple(&av1_fht4x8_sse2, &av1_iht4x8_32_add_sse2, H_FLIPADST, AOM_BITS_8,
- 32)
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x8HT,
- ::testing::ValuesIn(kArrayHt4x8Param_sse2));
-#endif // HAVE_SSE2
-
-} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 5dd9b09..49c6297 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -272,7 +272,6 @@
${AOM_UNIT_TEST_ENCODER_SOURCES}
"${AOM_ROOT}/test/av1_fht16x8_test.cc"
"${AOM_ROOT}/test/av1_fht4x4_test.cc"
- "${AOM_ROOT}/test/av1_fht4x8_test.cc"
"${AOM_ROOT}/test/av1_fht8x16_test.cc"
"${AOM_ROOT}/test/av1_fht8x4_test.cc")