Code cleanup in highbd inv_txfm module
Removed redundant functions of SSE4_1 and AVX2 in highbd inv_txfm path.
No impact on speed numbers and the same has been verified for multiple test cases.
Change-Id: If23a3a386e5f7ced1ae11136578b9ac4ff86c561
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index d9000c7..5b46865 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -116,23 +116,7 @@
add_proto qw/void av1_highbd_inv_txfm_add_4x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
specialize qw/av1_highbd_inv_txfm_add_4x4 sse4_1/;
add_proto qw/void av1_highbd_inv_txfm_add_8x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x8 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x32/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_16x32/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_16x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x16 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_8x32/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_8x32 sse4_1 avx2/;
-add_proto qw/void av1_highbd_inv_txfm_add_32x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
-specialize qw/av1_highbd_inv_txfm_add_32x8 sse4_1 avx2/;
+specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1/;
add_proto qw/void av1_highbd_inv_txfm_add_4x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
specialize qw/av1_highbd_inv_txfm_add_4x8 sse4_1/;
add_proto qw/void av1_highbd_inv_txfm_add_8x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
diff --git a/av1/common/x86/highbd_inv_txfm_avx2.c b/av1/common/x86/highbd_inv_txfm_avx2.c
index cc9e90b..fe22465 100644
--- a/av1/common/x86/highbd_inv_txfm_avx2.c
+++ b/av1/common/x86/highbd_inv_txfm_avx2.c
@@ -4309,238 +4309,30 @@
highbd_inv_txfm2d_add_no_identity_avx2(input, CONVERT_TO_SHORTPTR(output),
stride, tx_type, tx_size, eob, bd);
break;
+ case IDTX:
+ case H_DCT:
+ case H_ADST:
+ case H_FLIPADST:
+ case V_DCT:
+ case V_ADST:
+ case V_FLIPADST:
+ av1_highbd_inv_txfm2d_add_universe_sse4_1(input, output, stride, tx_type,
+ tx_size, eob, bd);
+ break;
default: assert(0); break;
}
}
-
-void av1_highbd_inv_txfm_add_16x16_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case IDTX:
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_32x32_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- const int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- case IDTX:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
-
- default: assert(0);
- }
-}
-
-void av1_highbd_inv_txfm_add_16x32_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- case IDTX:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default: assert(0);
- }
-}
-
-void av1_highbd_inv_txfm_add_32x16_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- case IDTX:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default: assert(0);
- }
-}
-void av1_highbd_inv_txfm_add_8x8_avx2(const tran_low_t *input, uint8_t *dest,
- int stride, const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case IDTX:
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-void av1_highbd_inv_txfm_add_8x32_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- case IDTX:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default: assert(0);
- }
-}
-
-void av1_highbd_inv_txfm_add_32x8_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case DCT_DCT:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- case IDTX:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default: assert(0);
- }
-}
-void av1_highbd_inv_txfm_add_16x8_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case IDTX:
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
-
-void av1_highbd_inv_txfm_add_8x16_avx2(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- switch (tx_type) {
- case IDTX:
- case H_DCT:
- case H_ADST:
- case H_FLIPADST:
- case V_DCT:
- case V_ADST:
- case V_FLIPADST:
- av1_highbd_inv_txfm2d_add_universe_sse4_1(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- default:
- av1_highbd_inv_txfm2d_add_universe_avx2(input, dest, stride, tx_type,
- txfm_param->tx_size,
- txfm_param->eob, bd);
- break;
- }
-}
void av1_highbd_inv_txfm_add_avx2(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
- case TX_32X32:
- av1_highbd_inv_txfm_add_32x32_avx2(input, dest, stride, txfm_param);
- break;
- case TX_16X16:
- av1_highbd_inv_txfm_add_16x16_avx2(input, dest, stride, txfm_param);
- break;
- case TX_8X8:
- av1_highbd_inv_txfm_add_8x8_avx2(input, dest, stride, txfm_param);
- break;
case TX_4X8:
av1_highbd_inv_txfm_add_4x8_sse4_1(input, dest, stride, txfm_param);
break;
case TX_8X4:
av1_highbd_inv_txfm_add_8x4_sse4_1(input, dest, stride, txfm_param);
break;
- case TX_8X16:
- av1_highbd_inv_txfm_add_8x16_avx2(input, dest, stride, txfm_param);
- break;
- case TX_16X8:
- av1_highbd_inv_txfm_add_16x8_avx2(input, dest, stride, txfm_param);
- break;
- case TX_16X32:
- av1_highbd_inv_txfm_add_16x32_avx2(input, dest, stride, txfm_param);
- break;
- case TX_32X16:
- av1_highbd_inv_txfm_add_32x16_avx2(input, dest, stride, txfm_param);
- break;
case TX_4X4:
av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
break;
@@ -4550,21 +4342,10 @@
case TX_4X16:
av1_highbd_inv_txfm_add_4x16_sse4_1(input, dest, stride, txfm_param);
break;
- case TX_8X32:
- av1_highbd_inv_txfm_add_8x32_avx2(input, dest, stride, txfm_param);
- break;
- case TX_32X8:
- av1_highbd_inv_txfm_add_32x8_avx2(input, dest, stride, txfm_param);
- break;
- case TX_64X64:
- case TX_32X64:
- case TX_64X32:
- case TX_16X64:
- case TX_64X16:
+ default:
av1_highbd_inv_txfm2d_add_universe_avx2(
input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
txfm_param->eob, txfm_param->bd);
break;
- default: assert(0 && "Invalid transform size"); break;
}
}
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
index 39f3548..2f0eecb 100644
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -5245,79 +5245,6 @@
break;
}
}
-
-void av1_highbd_inv_txfm_add_16x8_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_8x16_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_16x16_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x32_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_16x32_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x16_sse4_1(const tran_low_t *input,
- uint8_t *dest, int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_8x32_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
-void av1_highbd_inv_txfm_add_32x8_sse4_1(const tran_low_t *input, uint8_t *dest,
- int stride,
- const TxfmParam *txfm_param) {
- int bd = txfm_param->bd;
- const TX_TYPE tx_type = txfm_param->tx_type;
- av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, tx_type, txfm_param->tx_size, txfm_param->eob, bd);
-}
-
void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t *input, uint8_t *dest,
int stride,
const TxfmParam *txfm_param) {
@@ -5999,12 +5926,6 @@
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
- case TX_32X32:
- av1_highbd_inv_txfm_add_32x32_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X16:
- av1_highbd_inv_txfm_add_16x16_sse4_1(input, dest, stride, txfm_param);
- break;
case TX_8X8:
av1_highbd_inv_txfm_add_8x8_sse4_1(input, dest, stride, txfm_param);
break;
@@ -6014,18 +5935,6 @@
case TX_8X4:
av1_highbd_inv_txfm_add_8x4_sse4_1(input, dest, stride, txfm_param);
break;
- case TX_8X16:
- av1_highbd_inv_txfm_add_8x16_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X8:
- av1_highbd_inv_txfm_add_16x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_16X32:
- av1_highbd_inv_txfm_add_16x32_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_32X16:
- av1_highbd_inv_txfm_add_32x16_sse4_1(input, dest, stride, txfm_param);
- break;
case TX_4X4:
av1_highbd_inv_txfm_add_4x4_sse4_1(input, dest, stride, txfm_param);
break;
@@ -6035,21 +5944,10 @@
case TX_4X16:
av1_highbd_inv_txfm_add_4x16_sse4_1(input, dest, stride, txfm_param);
break;
- case TX_8X32:
- av1_highbd_inv_txfm_add_8x32_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_32X8:
- av1_highbd_inv_txfm_add_32x8_sse4_1(input, dest, stride, txfm_param);
- break;
- case TX_64X64:
- case TX_32X64:
- case TX_64X32:
- case TX_16X64:
- case TX_64X16:
+ default:
av1_highbd_inv_txfm2d_add_universe_sse4_1(
- input, dest, stride, txfm_param->tx_type, txfm_param->tx_size,
- txfm_param->eob, txfm_param->bd);
+ input, dest, stride, txfm_param->tx_type, tx_size, txfm_param->eob,
+ txfm_param->bd);
break;
- default: assert(0 && "Invalid transform size"); break;
}
}