daala_tx: Add inverse TX SIMD dispatch
This just adds a top-level daala_inv_txfm_add_avx2(), but no actual
SIMD functions yet. It dispatches back to the C version for all TX
types and sizes for the moment.
Change-Id: I7a578a4af363f989615d01ea67ce031d8ceff977
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 18249a6..1b59bf4 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -185,6 +185,11 @@
set(AOM_AV1_COMMON_INTRIN_AVX2
"${AOM_ROOT}/av1/common/x86/highbd_inv_txfm_avx2.c"
"${AOM_ROOT}/av1/common/x86/hybrid_inv_txfm_avx2.c")
+if (CONFIG_DAALA_TX)
+ set(AOM_AV1_COMMON_INTRIN_AVX2
+ ${AOM_AV1_COMMON_INTRIN_AVX2}
+ "${AOM_ROOT}/av1/common/x86/daala_inv_txfm_avx2.c")
+endif ()
set(AOM_AV1_COMMON_INTRIN_MSA
"${AOM_ROOT}/av1/common/mips/msa/av1_idct16x16_msa.c"
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 6fcb4d9..40694a5 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -28,6 +28,7 @@
AV1_COMMON_SRCS-yes += common/daala_tx.h
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.c
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.h
+AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/daala_inv_txfm_avx2.c
AV1_COMMON_SRCS-yes += common/entropy.h
AV1_COMMON_SRCS-yes += common/entropymode.h
AV1_COMMON_SRCS-yes += common/entropymv.h
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 826884b..cbf79b8 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -18,6 +18,10 @@
#include "av1/common/restoration.h"
#endif
+#if CONFIG_DAALA_TX
+#include "av1/common/daala_inv_txfm.h"
+#endif
+
struct macroblockd;
/* Encoder forward decls */
@@ -624,4 +628,10 @@
}
}
+# DAALA_TX functions
+if (aom_config("CONFIG_DAALA_TX") eq "yes") {
+ add_proto qw/void daala_inv_txfm_add/, "const tran_low_t *input_coeffs, void *output_pixels, int output_stride, TxfmParam *txfm_param";
+ specialize qw/daala_inv_txfm_add avx2/;
+}
+
1;
diff --git a/av1/common/daala_inv_txfm.c b/av1/common/daala_inv_txfm.c
index ceb5961..6b75cf6 100644
--- a/av1/common/daala_inv_txfm.c
+++ b/av1/common/daala_inv_txfm.c
@@ -46,8 +46,8 @@
// intended for both low and high bitdepth cases with a tran_low_t of
// 32 bits (matching od_coeff), and a passed-in pixel buffer of either
// bytes (hbd=0) or shorts (hbd=1).
-void daala_inv_txfm_add(const tran_low_t *input_coeffs, void *output_pixels,
- int output_stride, TxfmParam *txfm_param) {
+void daala_inv_txfm_add_c(const tran_low_t *input_coeffs, void *output_pixels,
+ int output_stride, TxfmParam *txfm_param) {
const TX_SIZE tx_size = txfm_param->tx_size;
const TX_TYPE tx_type = txfm_param->tx_type;
const int px_depth = txfm_param->bd;
diff --git a/av1/common/daala_inv_txfm.h b/av1/common/daala_inv_txfm.h
index aa79a14..3e0df30 100644
--- a/av1/common/daala_inv_txfm.h
+++ b/av1/common/daala_inv_txfm.h
@@ -18,8 +18,8 @@
extern "C" {
#endif
-void daala_inv_txfm_add(const tran_low_t *input_coeffs, void *output_pixels,
- int output_stride, TxfmParam *txfm_param);
+void daala_inv_txfm_add_c(const tran_low_t *input_coeffs, void *output_pixels,
+ int output_stride, TxfmParam *txfm_param);
#ifdef __cplusplus
} // extern "C"
diff --git a/av1/common/x86/daala_inv_txfm_avx2.c b/av1/common/x86/daala_inv_txfm_avx2.c
new file mode 100644
index 0000000..6765bc7
--- /dev/null
+++ b/av1/common/x86/daala_inv_txfm_avx2.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "av1/common/daala_tx.h"
+#include "av1/common/daala_inv_txfm.h"
+#include "av1/common/idct.h"
+
+#if CONFIG_DAALA_TX
+
+typedef void (*daala_row_itx)(int16_t *out, int rows, const tran_low_t *in);
+typedef void (*daala_col_itx_add)(unsigned char *output_pixels,
+ int output_stride, int cols,
+ const int16_t *in, int bd);
+
+static const daala_row_itx TX_ROW_MAP[TX_SIZES][TX_TYPES] = {
+ // 4-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 8-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 16-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 32-point transforms
+ { NULL, NULL, NULL, NULL },
+#if CONFIG_TX64X64
+ // 64-point transforms
+ { NULL, NULL, NULL, NULL },
+#endif
+};
+
+static const daala_col_itx_add TX_COL_MAP[2][TX_SIZES][TX_TYPES] = {
+ // Low bit depth output
+ {
+ // 4-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 8-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 16-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 32-point transforms
+ { NULL, NULL, NULL, NULL },
+#if CONFIG_TX64X64
+ // 64-point transforms
+ { NULL, NULL, NULL, NULL },
+#endif
+ },
+ // High bit depth output
+ {
+ // 4-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 8-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 16-point transforms
+ { NULL, NULL, NULL, NULL },
+ // 32-point transforms
+ { NULL, NULL, NULL, NULL },
+#if CONFIG_TX64X64
+ // 64-point transforms
+ { NULL, NULL, NULL, NULL },
+#endif
+ }
+};
+
+void daala_inv_txfm_add_avx2(const tran_low_t *input_coeffs,
+ void *output_pixels, int output_stride,
+ TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ assert(tx_size <= TX_SIZES_ALL);
+ assert(tx_type <= TX_TYPES);
+
+ if (txfm_param->lossless) {
+ daala_inv_txfm_add_c(input_coeffs, output_pixels, output_stride,
+ txfm_param);
+ } else {
+ // General TX case
+ assert(sizeof(tran_low_t) == sizeof(od_coeff));
+ assert(sizeof(tran_low_t) >= 4);
+
+ // Hook into existing map translation infrastructure to select
+ // appropriate TX functions
+ const TX_SIZE col_idx = txsize_vert_map[tx_size];
+ const TX_SIZE row_idx = txsize_horz_map[tx_size];
+ assert(col_idx <= TX_SIZES);
+ assert(row_idx <= TX_SIZES);
+ assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
+ assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
+ daala_row_itx row_tx = TX_ROW_MAP[row_idx][htx_tab[tx_type]];
+ daala_col_itx_add col_tx =
+ TX_COL_MAP[txfm_param->is_hbd][col_idx][vtx_tab[tx_type]];
+ int16_t tmpsq[MAX_TX_SQUARE];
+
+ if (row_tx == NULL || col_tx == NULL) {
+ daala_inv_txfm_add_c(input_coeffs, output_pixels, output_stride,
+ txfm_param);
+ } else {
+ const int cols = tx_size_wide[tx_size];
+ const int rows = tx_size_high[tx_size];
+ // Inverse-transform rows
+ row_tx(tmpsq, rows, input_coeffs);
+ // Inverse-transform columns and sum with destination
+ col_tx(output_pixels, output_stride, cols, tmpsq, txfm_param->bd);
+ }
+ }
+}
+
+#endif