Remove more dead transforms
Change-Id: Ieeeed1cec754abaab26ab60076795351403b38e7
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index a1c02d6..4dcda55 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -189,21 +189,6 @@
add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht32x32 sse2 avx2/;
-
- add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
- specialize qw/av1_fht4x8 sse2/;
-
- add_proto qw/void av1_fht4x16/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
- add_proto qw/void av1_fht16x4/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
- add_proto qw/void av1_fht8x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
- add_proto qw/void av1_fht32x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
-
-
add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, TX_TYPE tx_type";
#fwd txfm
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 79f3eee..1ed9442 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -1198,606 +1198,6 @@
}
}
-void av1_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct8, fdct4 }, // DCT_DCT
- { fadst8, fdct4 }, // ADST_DCT
- { fdct8, fadst4 }, // DCT_ADST
- { fadst8, fadst4 }, // ADST_ADST
- { fadst8, fdct4 }, // FLIPADST_DCT
- { fdct8, fadst4 }, // DCT_FLIPADST
- { fadst8, fadst4 }, // FLIPADST_FLIPADST
- { fadst8, fadst4 }, // ADST_FLIPADST
- { fadst8, fadst4 }, // FLIPADST_ADST
- { fidtx8, fidtx4 }, // IDTX
- { fdct8, fidtx4 }, // V_DCT
- { fidtx8, fdct4 }, // H_DCT
- { fadst8, fidtx4 }, // V_ADST
- { fidtx8, fadst4 }, // H_ADST
- { fadst8, fidtx4 }, // V_FLIPADST
- { fidtx8, fadst4 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 4;
- const int n2 = 8;
- tran_low_t out[8 * 4];
- tran_low_t temp_in[8], temp_out[8];
- int i, j;
- int16_t flipped_input[8 * 4];
- maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
- // Multi-way scaling matrix (bits):
- // LGT/AV1 row,col input+2.5, rowTX+.5, mid+0, colTX+1, out-1 == 3
- // LGT row, Daala col input+3.5, rowTX+.5, mid+0, colTX+0, out-1 == 3
- // Daala row, LGT col input+3, rowTX+0, mid+0, colTX+1, out-1 == 3
- // Daala row,col input+4, rowTX+0, mid+0, colTX+0, out-1 == 3
-
- // Rows
- for (i = 0; i < n2; ++i) {
- // Input scaling
- for (j = 0; j < n; ++j) {
- // Input scaling when Daala is not possible, LGT/AV1 only (1 above)
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
- }
- // Row transform (AV1/LGT scale up .5 bit, Daala does not scale)
- ht.rows(temp_in, temp_out);
- // No mid scaling
- for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
- }
-
- // Columns
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- // Column transform (AV1/LGT scale up 1 bit, Daala does not scale)
- ht.cols(temp_in, temp_out);
- // Output scaling is always a downshift of 1
- for (j = 0; j < n2; ++j)
- output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct4, fdct8 }, // DCT_DCT
- { fadst4, fdct8 }, // ADST_DCT
- { fdct4, fadst8 }, // DCT_ADST
- { fadst4, fadst8 }, // ADST_ADST
- { fadst4, fdct8 }, // FLIPADST_DCT
- { fdct4, fadst8 }, // DCT_FLIPADST
- { fadst4, fadst8 }, // FLIPADST_FLIPADST
- { fadst4, fadst8 }, // ADST_FLIPADST
- { fadst4, fadst8 }, // FLIPADST_ADST
- { fidtx4, fidtx8 }, // IDTX
- { fdct4, fidtx8 }, // V_DCT
- { fidtx4, fdct8 }, // H_DCT
- { fadst4, fidtx8 }, // V_ADST
- { fidtx4, fadst8 }, // H_ADST
- { fadst4, fidtx8 }, // V_FLIPADST
- { fidtx4, fadst8 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 4;
- const int n2 = 8;
- tran_low_t out[8 * 4];
- tran_low_t temp_in[8], temp_out[8];
- int i, j;
- int16_t flipped_input[8 * 4];
- maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
- // Multi-way scaling matrix (bits):
- // LGT/AV1 row,col input+2.5, rowTX+1, mid+0, colTX+.5, out-1 == 3
- // LGT row, Daala col input+3, rowTX+1, mid+0, colTX+0, out-1 == 3
- // Daala row, LGT col input+3.5 rowTX+0, mid+0, colTX+.5, out-1 == 3
- // Daala row,col input+4, rowTX+0, mid+0, colTX+0, out-1 == 3
-
- // Columns
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- // Input scaling when Daala is not possible, AV1/LGT only (1 above)
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
- }
- // Column transform (AV1/LGT scale up .5 bit, Daala does not scale)
- ht.cols(temp_in, temp_out);
- // No scaling between transforms
- for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- // Row transform (AV1/LGT scale up 1 bit, Daala does not scale)
- ht.rows(temp_in, temp_out);
- // Output scaling is always a downshift of 1
- for (j = 0; j < n2; ++j)
- output[j + i * n2] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht4x16_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct16, fdct4 }, // DCT_DCT
- { fadst16, fdct4 }, // ADST_DCT
- { fdct16, fadst4 }, // DCT_ADST
- { fadst16, fadst4 }, // ADST_ADST
- { fadst16, fdct4 }, // FLIPADST_DCT
- { fdct16, fadst4 }, // DCT_FLIPADST
- { fadst16, fadst4 }, // FLIPADST_FLIPADST
- { fadst16, fadst4 }, // ADST_FLIPADST
- { fadst16, fadst4 }, // FLIPADST_ADST
- { fidtx16, fidtx4 }, // IDTX
- { fdct16, fidtx4 }, // V_DCT
- { fidtx16, fdct4 }, // H_DCT
- { fadst16, fidtx4 }, // V_ADST
- { fidtx16, fadst4 }, // H_ADST
- { fadst16, fidtx4 }, // V_FLIPADST
- { fidtx16, fadst4 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 4;
- const int n4 = 16;
- tran_low_t out[16 * 4];
- tran_low_t temp_in[16], temp_out[16];
- int i, j;
- int16_t flipped_input[16 * 4];
- maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
-
- // Rows
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
- }
-
- // Columns
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n4; ++j)
- output[i + j * n] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x4_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct4, fdct16 }, // DCT_DCT
- { fadst4, fdct16 }, // ADST_DCT
- { fdct4, fadst16 }, // DCT_ADST
- { fadst4, fadst16 }, // ADST_ADST
- { fadst4, fdct16 }, // FLIPADST_DCT
- { fdct4, fadst16 }, // DCT_FLIPADST
- { fadst4, fadst16 }, // FLIPADST_FLIPADST
- { fadst4, fadst16 }, // ADST_FLIPADST
- { fadst4, fadst16 }, // FLIPADST_ADST
- { fidtx4, fidtx16 }, // IDTX
- { fdct4, fidtx16 }, // V_DCT
- { fidtx4, fdct16 }, // H_DCT
- { fadst4, fidtx16 }, // V_ADST
- { fidtx4, fadst16 }, // H_ADST
- { fadst4, fidtx16 }, // V_FLIPADST
- { fidtx4, fadst16 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 4;
- const int n4 = 16;
- tran_low_t out[16 * 4];
- tran_low_t temp_in[16], temp_out[16];
- int i, j;
- int16_t flipped_input[16 * 4];
- maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
-
- // Columns
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n4; ++j)
- output[j + i * n4] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct16, fdct8 }, // DCT_DCT
- { fadst16, fdct8 }, // ADST_DCT
- { fdct16, fadst8 }, // DCT_ADST
- { fadst16, fadst8 }, // ADST_ADST
- { fadst16, fdct8 }, // FLIPADST_DCT
- { fdct16, fadst8 }, // DCT_FLIPADST
- { fadst16, fadst8 }, // FLIPADST_FLIPADST
- { fadst16, fadst8 }, // ADST_FLIPADST
- { fadst16, fadst8 }, // FLIPADST_ADST
- { fidtx16, fidtx8 }, // IDTX
- { fdct16, fidtx8 }, // V_DCT
- { fidtx16, fdct8 }, // H_DCT
- { fadst16, fidtx8 }, // V_ADST
- { fidtx16, fadst8 }, // H_ADST
- { fadst16, fidtx8 }, // V_FLIPADST
- { fidtx16, fadst8 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 8;
- const int n2 = 16;
- tran_low_t out[16 * 8];
- tran_low_t temp_in[16], temp_out[16];
- int i, j;
- int16_t flipped_input[16 * 8];
- maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
- // Multi-way scaling matrix (bits):
- // LGT/AV1 row, AV1 col input+2.5, rowTX+1, mid-2, colTX+1.5, out+0 == 3
- // LGT row, Daala col input+3, rowTX+1, mid+0, colTX+0, out-1 == 3
- // Daala row, LGT col N/A (no 16-point LGT)
- // Daala row, col input+4, rowTX+0, mid+0, colTX+0, out-1 == 3
-
- // Rows
- for (i = 0; i < n2; ++i) {
- // Input scaling
- for (j = 0; j < n; ++j) {
- // Input scaling when Daala is not possible, LGT/AV1 only (case 1 above)
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
- }
-
- // Row transform (AV1/LGT scale up 1 bit, Daala does not scale)
- ht.rows(temp_in, temp_out);
-
- // Mid scaling
- for (j = 0; j < n; ++j) {
- // mid scaling: only case 1 possible
- out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
- }
- }
-
- // Columns
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- // Column transform (AV1/LGT scale up 1.5 bits, Daala does not scale)
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n2; ++j) {
- // Output scaling (case 1 above)
- output[i + j * n] = temp_out[j];
- }
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct8, fdct16 }, // DCT_DCT
- { fadst8, fdct16 }, // ADST_DCT
- { fdct8, fadst16 }, // DCT_ADST
- { fadst8, fadst16 }, // ADST_ADST
- { fadst8, fdct16 }, // FLIPADST_DCT
- { fdct8, fadst16 }, // DCT_FLIPADST
- { fadst8, fadst16 }, // FLIPADST_FLIPADST
- { fadst8, fadst16 }, // ADST_FLIPADST
- { fadst8, fadst16 }, // FLIPADST_ADST
- { fidtx8, fidtx16 }, // IDTX
- { fdct8, fidtx16 }, // V_DCT
- { fidtx8, fdct16 }, // H_DCT
- { fadst8, fidtx16 }, // V_ADST
- { fidtx8, fadst16 }, // H_ADST
- { fadst8, fidtx16 }, // V_FLIPADST
- { fidtx8, fadst16 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 8;
- const int n2 = 16;
- tran_low_t out[16 * 8];
- tran_low_t temp_in[16], temp_out[16];
- int i, j;
- int16_t flipped_input[16 * 8];
- maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
- // Multi-way scaling matrix (bits):
- // LGT/AV1 col, AV1 row input+2.5, colTX+1, mid-2, rowTX+1.5, out+0 == 3
- // LGT col, Daala row input+3, colTX+1, mid+0, rowTX+0, out-1 == 3
- // Daala col, LGT row N/A (no 16-point LGT)
- // Daala col, row input+4, colTX+0, mid+0, rowTX+0, out-1 == 3
-
- // Columns
- for (i = 0; i < n2; ++i) {
- // Input scaling
- for (j = 0; j < n; ++j) {
- // Input scaling when Daala is not possible, AV1/LGT only (1 above)
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
- }
-
- // Column transform (AV1/LGT scale up 1 bit, Daala does not scale)
- ht.cols(temp_in, temp_out);
-
- // Mid scaling
- for (j = 0; j < n; ++j) {
- // Scaling case 1 above
- out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
- }
- }
-
- // Rows
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- // Row transform (AV1 scales up 1.5 bits, Daala does not scale)
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n2; ++j) {
- // Ouptut scaling case 1 above
- output[j + i * n2] = temp_out[j];
- }
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht8x32_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct32, fdct8 }, // DCT_DCT
- { fhalfright32, fdct8 }, // ADST_DCT
- { fdct32, fadst8 }, // DCT_ADST
- { fhalfright32, fadst8 }, // ADST_ADST
- { fhalfright32, fdct8 }, // FLIPADST_DCT
- { fdct32, fadst8 }, // DCT_FLIPADST
- { fhalfright32, fadst8 }, // FLIPADST_FLIPADST
- { fhalfright32, fadst8 }, // ADST_FLIPADST
- { fhalfright32, fadst8 }, // FLIPADST_ADST
- { fidtx32, fidtx8 }, // IDTX
- { fdct32, fidtx8 }, // V_DCT
- { fidtx32, fdct8 }, // H_DCT
- { fhalfright32, fidtx8 }, // V_ADST
- { fidtx32, fadst8 }, // H_ADST
- { fhalfright32, fidtx8 }, // V_FLIPADST
- { fidtx32, fadst8 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 8;
- const int n4 = 32;
- tran_low_t out[32 * 8];
- tran_low_t temp_in[32], temp_out[32];
- int i, j;
- int16_t flipped_input[32 * 8];
- maybe_flip_input(&input, &stride, n4, n, flipped_input, tx_type);
-
- // Rows
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) temp_in[j] = input[i * stride + j] * 4;
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
- }
-
- // Columns
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n4; ++j)
- output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht32x8_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct8, fdct32 }, // DCT_DCT
- { fadst8, fdct32 }, // ADST_DCT
- { fdct8, fhalfright32 }, // DCT_ADST
- { fadst8, fhalfright32 }, // ADST_ADST
- { fadst8, fdct32 }, // FLIPADST_DCT
- { fdct8, fhalfright32 }, // DCT_FLIPADST
- { fadst8, fhalfright32 }, // FLIPADST_FLIPADST
- { fadst8, fhalfright32 }, // ADST_FLIPADST
- { fadst8, fhalfright32 }, // FLIPADST_ADST
- { fidtx8, fidtx32 }, // IDTX
- { fdct8, fidtx32 }, // V_DCT
- { fidtx8, fdct32 }, // H_DCT
- { fadst8, fidtx32 }, // V_ADST
- { fidtx8, fhalfright32 }, // H_ADST
- { fadst8, fidtx32 }, // V_FLIPADST
- { fidtx8, fhalfright32 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 8;
- const int n4 = 32;
- tran_low_t out[32 * 8];
- tran_low_t temp_in[32], temp_out[32];
- int i, j;
- int16_t flipped_input[32 * 8];
- maybe_flip_input(&input, &stride, n, n4, flipped_input, tx_type);
-
- // Columns
- for (i = 0; i < n4; ++i) {
- for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n; ++j) out[j * n4 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n4; ++j) temp_in[j] = out[j + i * n4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n4; ++j)
- output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
- }
- // Note: overall scale factor of transform is 8 times unitary
-}
-
-void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct32, fdct16 }, // DCT_DCT
- { fhalfright32, fdct16 }, // ADST_DCT
- { fdct32, fadst16 }, // DCT_ADST
- { fhalfright32, fadst16 }, // ADST_ADST
- { fhalfright32, fdct16 }, // FLIPADST_DCT
- { fdct32, fadst16 }, // DCT_FLIPADST
- { fhalfright32, fadst16 }, // FLIPADST_FLIPADST
- { fhalfright32, fadst16 }, // ADST_FLIPADST
- { fhalfright32, fadst16 }, // FLIPADST_ADST
- { fidtx32, fidtx16 }, // IDTX
- { fdct32, fidtx16 }, // V_DCT
- { fidtx32, fdct16 }, // H_DCT
- { fhalfright32, fidtx16 }, // V_ADST
- { fidtx32, fadst16 }, // H_ADST
- { fhalfright32, fidtx16 }, // V_FLIPADST
- { fidtx32, fadst16 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 16;
- const int n2 = 32;
- tran_low_t out[32 * 16];
- tran_low_t temp_in[32], temp_out[32];
- int i, j;
- int16_t flipped_input[32 * 16];
- maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
-
- // Rows
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
- }
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j) {
- out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
- }
- }
-
- // Columns
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n2; ++j) output[i + j * n] = temp_out[j];
- }
- // Note: overall scale factor of transform is 4 times unitary
-}
-
-void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct16, fdct32 }, // DCT_DCT
- { fadst16, fdct32 }, // ADST_DCT
- { fdct16, fhalfright32 }, // DCT_ADST
- { fadst16, fhalfright32 }, // ADST_ADST
- { fadst16, fdct32 }, // FLIPADST_DCT
- { fdct16, fhalfright32 }, // DCT_FLIPADST
- { fadst16, fhalfright32 }, // FLIPADST_FLIPADST
- { fadst16, fhalfright32 }, // ADST_FLIPADST
- { fadst16, fhalfright32 }, // FLIPADST_ADST
- { fidtx16, fidtx32 }, // IDTX
- { fdct16, fidtx32 }, // V_DCT
- { fidtx16, fdct32 }, // H_DCT
- { fadst16, fidtx32 }, // V_ADST
- { fidtx16, fhalfright32 }, // H_ADST
- { fadst16, fidtx32 }, // V_FLIPADST
- { fidtx16, fhalfright32 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- const int n = 16;
- const int n2 = 32;
- tran_low_t out[32 * 16];
- tran_low_t temp_in[32], temp_out[32];
- int i, j;
- int16_t flipped_input[32 * 16];
- maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
-
- // Columns
- for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j) {
- temp_in[j] =
- (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
- }
- ht.cols(temp_in, temp_out);
- for (j = 0; j < n; ++j) {
- out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
- }
- }
-
- // Rows
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j];
- }
- // Note: overall scale factor of transform is 4 times unitary
-}
-
-void av1_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- if (tx_type == DCT_DCT) {
- aom_fdct8x8_c(input, output, stride);
- return;
- }
- {
- static const transform_2d FHT[] = {
- { fdct8, fdct8 }, // DCT_DCT
- { fadst8, fdct8 }, // ADST_DCT
- { fdct8, fadst8 }, // DCT_ADST
- { fadst8, fadst8 }, // ADST_ADST
- { fadst8, fdct8 }, // FLIPADST_DCT
- { fdct8, fadst8 }, // DCT_FLIPADST
- { fadst8, fadst8 }, // FLIPADST_FLIPADST
- { fadst8, fadst8 }, // ADST_FLIPADST
- { fadst8, fadst8 }, // FLIPADST_ADST
- { fidtx8, fidtx8 }, // IDTX
- { fdct8, fidtx8 }, // V_DCT
- { fidtx8, fdct8 }, // H_DCT
- { fadst8, fidtx8 }, // V_ADST
- { fidtx8, fadst8 }, // H_ADST
- { fadst8, fidtx8 }, // V_FLIPADST
- { fidtx8, fadst8 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- tran_low_t out[64];
- int i, j;
- tran_low_t temp_in[8], temp_out[8];
-
- int16_t flipped_input[8 * 8];
- maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
-
- // Columns
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- }
-}
-
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
pixel. */
void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
@@ -1909,103 +1309,6 @@
av1_fwht4x4_c(input, output, stride);
}
-void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
- TxfmParam *txfm_param) {
- const TX_TYPE tx_type = txfm_param->tx_type;
- static const transform_2d FHT[] = {
- { fdct32, fdct32 }, // DCT_DCT
- { fhalfright32, fdct32 }, // ADST_DCT
- { fdct32, fhalfright32 }, // DCT_ADST
- { fhalfright32, fhalfright32 }, // ADST_ADST
- { fhalfright32, fdct32 }, // FLIPADST_DCT
- { fdct32, fhalfright32 }, // DCT_FLIPADST
- { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
- { fhalfright32, fhalfright32 }, // ADST_FLIPADST
- { fhalfright32, fhalfright32 }, // FLIPADST_ADST
- { fidtx32, fidtx32 }, // IDTX
- { fdct32, fidtx32 }, // V_DCT
- { fidtx32, fdct32 }, // H_DCT
- { fhalfright32, fidtx32 }, // V_ADST
- { fidtx32, fhalfright32 }, // H_ADST
- { fhalfright32, fidtx32 }, // V_FLIPADST
- { fidtx32, fhalfright32 }, // H_FLIPADST
- };
- const transform_2d ht = FHT[tx_type];
- tran_low_t out[1024];
- int i, j;
- tran_low_t temp_in[32], temp_out[32];
-
- int16_t flipped_input[32 * 32];
- maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
-
- // Columns
- for (i = 0; i < 32; ++i) {
- for (j = 0; j < 32; ++j) {
- temp_in[j] = input[j * stride + i] * 4;
- }
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 32; ++j) {
- out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
- }
- }
-
- // Rows
- for (i = 0; i < 32; ++i) {
- for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 32; ++j) {
- output[j + i * 32] = temp_out[j];
- }
- }
-}
-
-static void fidtx64(const tran_low_t *input, tran_low_t *output) {
- int i;
- for (i = 0; i < 64; ++i)
- output[i] = (tran_low_t)fdct_round_shift(input[i] * 4 * Sqrt2);
-}
-
-// For use in lieu of ADST
-static void fhalfright64(const tran_low_t *input, tran_low_t *output) {
- int i;
- tran_low_t inputhalf[32];
- for (i = 0; i < 32; ++i) {
- output[32 + i] = (tran_low_t)fdct_round_shift(input[i] * 4 * Sqrt2);
- }
- // Multiply input by sqrt(2)
- for (i = 0; i < 32; ++i) {
- inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 32] * Sqrt2);
- }
- fdct32(inputhalf, output);
- // Note overall scaling factor is 2 times unitary
-}
-// stage range
-static const int8_t fwd_stage_range_col_dct_64[12] = { 0, 1, 2, 3, 4, 5,
- 6, 6, 6, 6, 6, 6 };
-static const int8_t fwd_stage_range_row_dct_64[12] = { 6, 7, 8, 9, 10, 11,
- 11, 11, 11, 11, 11, 11 };
-static void fdct64_col(const tran_low_t *input, tran_low_t *output) {
- int32_t in[64], out[64];
- int i;
- const int txw_idx = get_txw_idx(TX_64X64);
- const int txh_idx = get_txh_idx(TX_64X64);
- for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_fdct64_new(in, out, fwd_cos_bit_col[txw_idx][txh_idx],
- fwd_stage_range_col_dct_64);
- for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
-static void fdct64_row(const tran_low_t *input, tran_low_t *output) {
- int32_t in[64], out[64];
- int i;
- const int txw_idx = get_txw_idx(TX_64X64);
- const int txh_idx = get_txh_idx(TX_64X64);
- for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i];
- av1_fdct64_new(in, out, fwd_cos_bit_row[txw_idx][txh_idx],
- fwd_stage_range_row_dct_64);
- for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
-}
-
// Forward identity transform.
void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
int bsx, int bsy, TX_TYPE tx_type) {
diff --git a/test/av1_fht4x4_test.cc b/test/av1_fht4x4_test.cc
deleted file mode 100644
index ef61bce..0000000
--- a/test/av1_fht4x4_test.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
-
-#include "./av1_rtcd.h"
-#include "./aom_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/transform_test_base.h"
-#include "test/util.h"
-#include "aom_ports/mem.h"
-
-using libaom_test::ACMRandom;
-
-namespace {
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
- const TxfmParam *txfm_param);
-using libaom_test::FhtFunc;
-using std::tr1::tuple;
-typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht4x4Param;
-
-void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
- TxfmParam *txfm_param) {
- av1_fht4x4_c(in, out, stride, txfm_param);
-}
-
-void iht4x4_ref(const tran_low_t *in, uint8_t *out, int stride,
- const TxfmParam *txfm_param) {
- av1_iht4x4_16_add_c(in, out, stride, txfm_param);
-}
-
-typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
- TX_TYPE tx_type, int bd);
-typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
- TX_TYPE tx_type, int bd);
-
-// HighbdHt4x4Param argument list:
-// <Target optimized function, tx_type, bit depth>
-typedef tuple<HBDFhtFunc, TX_TYPE, int> HighbdHt4x4Param;
-
-void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
- TX_TYPE tx_type, int bd) {
- av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, bd);
-}
-
-class AV1Trans4x4HT : public libaom_test::TransformTestBase,
- public ::testing::TestWithParam<Ht4x4Param> {
- public:
- virtual ~AV1Trans4x4HT() {}
-
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- inv_txfm_ = GET_PARAM(1);
- pitch_ = 4;
- height_ = 4;
- fwd_txfm_ref = fht4x4_ref;
- inv_txfm_ref = iht4x4_ref;
- bit_depth_ = GET_PARAM(3);
- mask_ = (1 << bit_depth_) - 1;
- num_coeffs_ = GET_PARAM(4);
- txfm_param_.tx_type = GET_PARAM(2);
- }
- virtual void TearDown() { libaom_test::ClearSystemState(); }
-
- protected:
- void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
- fwd_txfm_(in, out, stride, &txfm_param_);
- }
-
- void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
- inv_txfm_(out, dst, stride, &txfm_param_);
- }
-
- FhtFunc fwd_txfm_;
- IhtFunc inv_txfm_;
-};
-
-TEST_P(AV1Trans4x4HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
-// Note:
-// TODO(luoyi): Add tx_type, 9-15 for inverse transform.
-// Need cleanup since same tests may be done in fdct4x4_test.cc
-// TEST_P(AV1Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-// TEST_P(AV1Trans4x4HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-
-class AV1HighbdTrans4x4HT : public ::testing::TestWithParam<HighbdHt4x4Param> {
- public:
- virtual ~AV1HighbdTrans4x4HT() {}
-
- virtual void SetUp() {
- fwd_txfm_ = GET_PARAM(0);
- fwd_txfm_ref_ = highbe_fht4x4_ref;
- tx_type_ = GET_PARAM(1);
- bit_depth_ = GET_PARAM(2);
- mask_ = (1 << bit_depth_) - 1;
- num_coeffs_ = 16;
-
- input_ = reinterpret_cast<int16_t *>(
- aom_memalign(16, sizeof(int16_t) * num_coeffs_));
- output_ = reinterpret_cast<int32_t *>(
- aom_memalign(16, sizeof(int32_t) * num_coeffs_));
- output_ref_ = reinterpret_cast<int32_t *>(
- aom_memalign(16, sizeof(int32_t) * num_coeffs_));
- }
-
- virtual void TearDown() {
- aom_free(input_);
- aom_free(output_);
- aom_free(output_ref_);
- libaom_test::ClearSystemState();
- }
-
- protected:
- void RunBitexactCheck();
-
- private:
- HBDFhtFunc fwd_txfm_;
- HBDFhtFunc fwd_txfm_ref_;
- TX_TYPE tx_type_;
- int bit_depth_;
- int mask_;
- int num_coeffs_;
- int16_t *input_;
- int32_t *output_;
- int32_t *output_ref_;
-};
-
-void AV1HighbdTrans4x4HT::RunBitexactCheck() {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int i, j;
- const int stride = 4;
- const int num_tests = 1000;
- const int num_coeffs = 16;
-
- for (i = 0; i < num_tests; ++i) {
- for (j = 0; j < num_coeffs; ++j) {
- input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
- }
-
- fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
- fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_);
-
- for (j = 0; j < num_coeffs; ++j) {
- EXPECT_EQ(output_[j], output_ref_[j])
- << "Not bit-exact result at index: " << j << " at test block: " << i;
- }
- }
-}
-
-TEST_P(AV1HighbdTrans4x4HT, HighbdCoeffCheck) { RunBitexactCheck(); }
-
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2
-const Ht4x4Param kArrayHt4x4Param_sse2[] = {
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_DCT, AOM_BITS_8,
- 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_DCT, AOM_BITS_8,
- 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_ADST, AOM_BITS_8,
- 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_ADST, AOM_BITS_8,
- 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_DCT,
- AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, DCT_FLIPADST,
- AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_FLIPADST,
- AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, ADST_FLIPADST,
- AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, FLIPADST_ADST,
- AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, IDTX, AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_DCT, AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_DCT, AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_ADST, AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_ADST, AOM_BITS_8, 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, V_FLIPADST, AOM_BITS_8,
- 16),
- make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, H_FLIPADST, AOM_BITS_8,
- 16)
-};
-INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans4x4HT,
- ::testing::ValuesIn(kArrayHt4x4Param_sse2));
-#endif // HAVE_SSE2
-
-#if HAVE_SSE4_1
-const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_DCT, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_DCT, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_ADST, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_ADST, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_DCT, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, DCT_FLIPADST, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_FLIPADST, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, ADST_FLIPADST, 12),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 10),
- make_tuple(&av1_fwd_txfm2d_4x4_sse4_1, FLIPADST_ADST, 12),
-};
-
-INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdTrans4x4HT,
- ::testing::ValuesIn(kArrayHighbdHt4x4Param));
-
-#endif // HAVE_SSE4_1
-
-} // namespace
diff --git a/test/test.cmake b/test/test.cmake
index 7669fff..092b31c 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -265,10 +265,6 @@
"${AOM_ROOT}/test/hash_test.cc")
endif ()
- set(AOM_UNIT_TEST_ENCODER_SOURCES
- ${AOM_UNIT_TEST_ENCODER_SOURCES}
- "${AOM_ROOT}/test/av1_fht4x4_test.cc")
-
set(AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1
${AOM_UNIT_TEST_ENCODER_INTRIN_SSE4_1}
"${AOM_ROOT}/test/corner_match_test.cc")