Hook in av1_lowbd_inv_txfm2d_add_8x8_sse2
Change-Id: Ib385a62aa269fd4eae883080bf1172ef58f9bf2d
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index f3fd840..64c052e 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -227,6 +227,9 @@
add_proto qw/void av1_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
#inv txfm
+add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
+specialize qw/av1_inv_txfm_add sse2/;
+
add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 07fef41..e939795 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -1625,7 +1625,7 @@
}
static void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest,
- int stride, TxfmParam *txfm_param) {
+ int stride, const TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
@@ -1695,8 +1695,8 @@
}
}
-static void av1_inv_txfm_add(const tran_low_t *dqcoeff, uint8_t *dst,
- int stride, TxfmParam *txfm_param) {
+void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
+ const TxfmParam *txfm_param) {
const TX_SIZE tx_size = txfm_param->tx_size;
DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
int tmp_stride = MAX_TX_SIZE;
@@ -1719,7 +1719,7 @@
}
typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
- TxfmParam *txfm_param);
+ const TxfmParam *txfm_param);
void av1_inverse_transform_block(const MACROBLOCKD *xd,
const tran_low_t *dqcoeff, int plane,
TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst,
@@ -1733,9 +1733,8 @@
&txfm_param);
assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]);
- static const InvTxfmFunc inv_txfm_func[2] = {
- av1_inv_txfm_add, av1_highbd_inv_txfm_add,
- };
-
- inv_txfm_func[txfm_param.is_hbd](dqcoeff, dst, stride, &txfm_param);
+ if (txfm_param.is_hbd)
+ av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
}
diff --git a/av1/common/x86/av1_inv_txfm_sse2.c b/av1/common/x86/av1_inv_txfm_sse2.c
index 1122316..269e2fc 100644
--- a/av1/common/x86/av1_inv_txfm_sse2.c
+++ b/av1/common/x86/av1_inv_txfm_sse2.c
@@ -1849,3 +1849,45 @@
round_shift_16bit(buf0, buf_size_h, shift[1]);
lowbd_write_buffer_8xn_sse2(buf0, output, stride, ud_flip, buf_size_h);
}
+
+typedef void (*inv_txfm_func)(const int32_t *input, uint8_t *output, int stride,
+ TX_TYPE tx_type, int bd);
+
+static inv_txfm_func inv_txfm_func_ls[TX_SIZES_ALL] = {
+ NULL, // 4x4 transform
+ av1_lowbd_inv_txfm2d_add_8x8_sse2, // 8x8 transform
+ NULL, // 16x16 transform
+ NULL, // 32x32 transform
+#if CONFIG_TX64X64
+ NULL, // 64x64 transform
+#endif // CONFIG_TX64X64
+ NULL, // 4x8 transform
+ NULL, // 8x4 transform
+ NULL, // 8x16 transform
+ NULL, // 16x8 transform
+ NULL, // 16x32 transform
+ NULL, // 32x16 transform
+#if CONFIG_TX64X64
+ NULL, // 32x64 transform
+ NULL, // 64x32 transform
+#endif // CONFIG_TX64X64
+ NULL, // 4x16 transform
+ NULL, // 16x4 transform
+ NULL, // 8x32 transform
+ NULL, // 32x8 transform
+#if CONFIG_TX64X64
+ NULL, // 16x64 transform
+ NULL, // 64x16 transform
+#endif // CONFIG_TX64X64
+};
+
+void av1_inv_txfm_add_sse2(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
+ const TxfmParam *txfm_param) {
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ const inv_txfm_func inv_func = inv_txfm_func_ls[txfm_param->tx_size];
+ if (inv_func != NULL) {
+ inv_func(dqcoeff, dst, stride, tx_type, txfm_param->bd);
+ } else {
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
+ }
+}