Simplify Daala inverse TX toplevel for constant shift
Rather than backing out all the LGT-related shifting matrices
throughout the existing TX code, separate out and simplify Daala
inverse TX into a single dedicated entry point. When DAALA_TX is
enabled, CONFIG_HIGHBITDEPTH is also forced, and all of Daala TX
(lowbd and highbd) uses this single TX dispatch.
This patch is purely non-functional changes.
subset 1:
monty-TXtesting-fwd-s1@2017-11-12T05:25:09.557Z ->
monty-TXtesting-inv-s1@2017-11-12T05:25:43.878Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
objective-1-fast:
monty-TXtesting-fwd-o1f@2017-11-12T05:25:29.386Z ->
monty-TXtesting-inv-o1f@2017-11-12T05:25:58.897Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000
Change-Id: I790e8d7ac08eb214eb712f5441d6e5f76ebddf17
diff --git a/av1/av1.cmake b/av1/av1.cmake
index b0a6943..f0b8541 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -35,6 +35,8 @@
"${AOM_ROOT}/av1/common/convolve.h"
"${AOM_ROOT}/av1/common/daala_tx.c"
"${AOM_ROOT}/av1/common/daala_tx.h"
+ "${AOM_ROOT}/av1/common/daala_inv_txfm.c"
+ "${AOM_ROOT}/av1/common/daala_inv_txfm.h"
"${AOM_ROOT}/av1/common/debugmodes.c"
"${AOM_ROOT}/av1/common/entropy.c"
"${AOM_ROOT}/av1/common/entropy.h"
diff --git a/av1/av1_common.mk b/av1/av1_common.mk
index 6c3e25c..f0c65c2 100644
--- a/av1/av1_common.mk
+++ b/av1/av1_common.mk
@@ -26,6 +26,8 @@
AV1_COMMON_SRCS-yes += common/common.h
AV1_COMMON_SRCS-yes += common/daala_tx.c
AV1_COMMON_SRCS-yes += common/daala_tx.h
+AV1_COMMON_SRCS-yes += common/daala_inv_txfm.c
+AV1_COMMON_SRCS-yes += common/daala_inv_txfm.h
AV1_COMMON_SRCS-yes += common/entropy.h
AV1_COMMON_SRCS-yes += common/entropymode.h
AV1_COMMON_SRCS-yes += common/entropymv.h
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index efacab2..39a7f29 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -78,62 +78,64 @@
}
}
-add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht4x8_32_add sse2/;
+if (aom_config("CONFIG_DAALA_TX") ne "yes") {
+ add_proto qw/void av1_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht4x8_32_add sse2/;
-add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht8x4_32_add sse2/;
+ add_proto qw/void av1_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht8x4_32_add sse2/;
-add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht8x16_128_add sse2/;
+ add_proto qw/void av1_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht8x16_128_add sse2/;
-add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht16x8_128_add sse2/;
+ add_proto qw/void av1_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht16x8_128_add sse2/;
-add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht16x32_512_add sse2/;
+ add_proto qw/void av1_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht16x32_512_add sse2/;
-add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-specialize qw/av1_iht32x16_512_add sse2/;
+ add_proto qw/void av1_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ specialize qw/av1_iht32x16_512_add sse2/;
-add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ add_proto qw/void av1_iht4x16_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ add_proto qw/void av1_iht16x4_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ add_proto qw/void av1_iht8x32_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ add_proto qw/void av1_iht32x8_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
-if (aom_config("CONFIG_DAALA_TX8") ne "yes") {
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- specialize qw/av1_iht8x8_64_add sse2/;
- } else {
- specialize qw/av1_iht8x8_64_add sse2 neon/;
+ add_proto qw/void av1_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, const struct txfm_param *param";
+ if (aom_config("CONFIG_DAALA_TX8") ne "yes") {
+ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+ specialize qw/av1_iht8x8_64_add sse2/;
+ } else {
+ specialize qw/av1_iht8x8_64_add sse2 neon/;
+ }
}
-}
-add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-if (aom_config("CONFIG_DAALA_TX16") ne "yes") {
- if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
- specialize qw/av1_iht16x16_256_add sse2 avx2/;
- } else {
- specialize qw/av1_iht16x16_256_add sse2 avx2/;
+ if (aom_config("CONFIG_DAALA_TX16") ne "yes") {
+ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
+ specialize qw/av1_iht16x16_256_add sse2 avx2/;
+ } else {
+ specialize qw/av1_iht16x16_256_add sse2 avx2/;
+ }
}
-}
-add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-if (aom_config("CONFIG_HIGHBITDEPTH") ne "yes") {
-}
+ if (aom_config("CONFIG_HIGHBITDEPTH") ne "yes") {
+ }
-add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
-if (aom_config("CONFIG_TX64X64") eq "yes") {
- add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- add_proto qw/void av1_iht32x64_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
- add_proto qw/void av1_iht64x32_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ if (aom_config("CONFIG_TX64X64") eq "yes") {
+ add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ add_proto qw/void av1_iht32x64_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ add_proto qw/void av1_iht64x32_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
+ }
}
if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
diff --git a/av1/common/daala_inv_txfm.c b/av1/common/daala_inv_txfm.c
new file mode 100644
index 0000000..031cf14
--- /dev/null
+++ b/av1/common/daala_inv_txfm.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "av1/common/daala_tx.h"
+#include "av1/common/daala_inv_txfm.h"
+
+#if CONFIG_DAALA_TX
+
+// Temporary while we still need av1_get_tx_scale() for testing
+#include "av1/common/idct.h"
+
+// Complete Daala TX map, sans lossless which is special cased
+typedef void (*daala_itx)(od_coeff *, int, const od_coeff[]);
+
+static daala_itx tx_map[TX_SIZES][TX_TYPES] = {
+ // 4-point transforms
+ { od_bin_idct4, od_bin_idst4, od_bin_idst4, od_bin_iidtx4 },
+
+ // 8-point transforms
+ { od_bin_idct8, od_bin_idst8, od_bin_idst8, od_bin_iidtx8 },
+
+ // 16-point transforms
+ { od_bin_idct16, od_bin_idst16, od_bin_idst16, od_bin_iidtx16 },
+
+ // 32-point transforms
+ { od_bin_idct32, od_bin_idst32, od_bin_idst32, od_bin_iidtx32 },
+
+#if CONFIG_TX64X64
+ // 64-point transforms
+ { od_bin_idct64, NULL, NULL, od_bin_iidtx64 },
+#endif
+};
+
+static int tx_flip(TX_TYPE_1D t) { return t == FLIPADST_1D; }
+
+// Daala TX toplevel inverse entry point. This same function is
+// intended for both low and high bitdepth cases with a tran_low_t of
+// 32 bits (matching od_coeff), and a passed-in pixel buffer of either
+// bytes (hbd=0) or shorts (hbd=1).
+void daala_inv_txfm_add(const tran_low_t *input_coeffs, void *output_pixels,
+ int output_stride, TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ const int px_depth = txfm_param->bd;
+ assert(tx_size <= TX_SIZES_ALL);
+ assert(tx_type <= TX_TYPES);
+
+ if (txfm_param->lossless) {
+ // Transform function special-cased for lossless
+ assert(tx_type == DCT_DCT);
+ assert(tx_size == TX_4X4);
+ if (txfm_param->is_hbd)
+ av1_iwht4x4_add(input_coeffs, output_pixels, output_stride, txfm_param);
+ else
+ // Note that the output pointer in the prototype is uint8, but the
+ // function converts to short internally
+ av1_highbd_iwht4x4_add(input_coeffs, output_pixels, output_stride,
+ txfm_param->eob, px_depth);
+ } else {
+ // General TX case
+ // Q3 coeff Q4 TX compatability mode, with av1_get_tx_scale
+ const int downshift = 4;
+ assert(sizeof(tran_low_t) == sizeof(od_coeff));
+ assert(sizeof(tran_low_t) >= 4);
+
+ // Hook into existing map translation infrastructure to select
+ // appropriate TX functions
+ const int cols = tx_size_wide[tx_size];
+ const int rows = tx_size_high[tx_size];
+ const TX_SIZE col_idx = txsize_vert_map[tx_size];
+ const TX_SIZE row_idx = txsize_horz_map[tx_size];
+ assert(col_idx <= TX_SIZES);
+ assert(row_idx <= TX_SIZES);
+ assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
+ assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
+ daala_itx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
+ daala_itx row_tx = tx_map[row_idx][htx_tab[tx_type]];
+ int col_flip = tx_flip(vtx_tab[tx_type]);
+ int row_flip = tx_flip(htx_tab[tx_type]);
+ od_coeff tmpsq[MAX_TX_SQUARE];
+ int r;
+ int c;
+
+ assert(col_tx);
+ assert(row_tx);
+
+ // This is temporary while we're testing against existing
+ // behavior (preshift up one plus av1_get_tx_scale).
+ // Remove before flight
+ od_coeff tmp[MAX_TX_SQUARE];
+ int upshift = 1 + av1_get_tx_scale(tx_size);
+ for (r = 0; r < rows; ++r)
+ for (c = 0; c < cols; ++c)
+ tmp[r * cols + c] = input_coeffs[r * cols + c] << upshift;
+ input_coeffs = tmp;
+
+ // Inverse-transform rows
+ for (r = 0; r < rows; ++r) {
+ // The output addressing transposes
+ if (row_flip)
+ row_tx(tmpsq + r + (rows * cols) - rows, -rows,
+ input_coeffs + r * cols);
+ else
+ row_tx(tmpsq + r, rows, input_coeffs + r * cols);
+ }
+
+ // Inverse-transform columns
+ for (c = 0; c < cols; ++c) {
+ // Above transposed, so our cols are now rows
+ if (col_flip)
+ col_tx(tmpsq + c * rows + rows - 1, -1, tmpsq + c * rows);
+ else
+ col_tx(tmpsq + c * rows, 1, tmpsq + c * rows);
+ }
+
+ // Sum with destination according to bit depth
+ // The tmpsq array is currently transposed relative to output
+ if (txfm_param->is_hbd) {
+ // Destination array is shorts
+ uint16_t *out16 = CONVERT_TO_SHORTPTR(output_pixels);
+ for (r = 0; r < rows; ++r)
+ for (c = 0; c < cols; ++c)
+ out16[r * output_stride + c] = highbd_clip_pixel_add(
+ out16[r * output_stride + c],
+ (tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift,
+ px_depth);
+ } else {
+ // Destination array is bytes
+ uint8_t *out8 = (uint8_t *)output_pixels;
+ for (r = 0; r < rows; ++r)
+ for (c = 0; c < cols; ++c)
+ out8[r * output_stride + c] = clip_pixel_add(
+ out8[r * output_stride + c],
+ (tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift);
+ }
+ }
+}
+
+#endif
diff --git a/av1/common/daala_inv_txfm.h b/av1/common/daala_inv_txfm.h
new file mode 100644
index 0000000..aa79a14
--- /dev/null
+++ b/av1/common/daala_inv_txfm.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_DAALA_INV_TXFM_H_
+#define AV1_ENCODER_DAALA_INV_TXFM_H_
+
+#include "./aom_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void daala_inv_txfm_add(const tran_low_t *input_coeffs, void *output_pixels,
+ int output_stride, TxfmParam *txfm_param);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AV1_ENCODER_DAALA_INV_TXFM_H_
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index ed2095b..e5c3c60 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -5218,6 +5218,38 @@
}
#endif
+void od_bin_iidtx4(od_coeff *x, int xstride, const od_coeff y[4]) {
+ int i;
+ for (i = 0; i < 4; i++)
+ x[i*xstride] = y[i];
+}
+
+void od_bin_iidtx8(od_coeff *x, int xstride, const od_coeff y[8]) {
+ int i;
+ for (i = 0; i < 8; i++)
+ x[i*xstride] = y[i];
+}
+
+void od_bin_iidtx16(od_coeff *x, int xstride, const od_coeff y[16]) {
+ int i;
+ for (i = 0; i < 16; i++)
+ x[i*xstride] = y[i];
+}
+
+void od_bin_iidtx32(od_coeff *x, int xstride, const od_coeff y[32]) {
+ int i;
+ for (i = 0; i < 32; i++)
+ x[i*xstride] = y[i];
+}
+
+#if CONFIG_TX64X64
+void od_bin_iidtx64(od_coeff *x, int xstride, const od_coeff y[64]) {
+ int i;
+ for (i = 0; i < 64; i++)
+ x[i*xstride] = y[i];
+}
+#endif
+
// Below are intermediate wrappers that handle the case when
// tran_low_t is a smaller type than od_coeff
void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
diff --git a/av1/common/daala_tx.h b/av1/common/daala_tx.h
index e482ed1..2943802 100644
--- a/av1/common/daala_tx.h
+++ b/av1/common/daala_tx.h
@@ -35,26 +35,31 @@
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
-void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
+void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride);
+void od_bin_iidtx4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
-void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
+void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride);
+void od_bin_iidtx8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
-void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
+void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride);
+void od_bin_iidtx16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride);
+void od_bin_idst32(od_coeff *x, int xstride, const od_coeff y[32]);
void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride);
+void od_bin_iidtx32(od_coeff *x, int xstride, const od_coeff y[32]);
#if CONFIG_TX64X64
void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride);
-
+void od_bin_iidtx64(od_coeff *x, int xstride, const od_coeff y[64]);
#endif
#endif
diff --git a/av1/common/idct.c b/av1/common/idct.c
index f5e4b7a..7279f5e 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -22,6 +22,9 @@
#if CONFIG_DAALA_TX4 || CONFIG_DAALA_TX8 || CONFIG_DAALA_TX16 || \
CONFIG_DAALA_TX32 || CONFIG_DAALA_TX64
#include "av1/common/daala_tx.h"
+#if CONFIG_DAALA_TX
+#include "av1/common/daala_inv_txfm.h"
+#endif
#endif
int av1_get_tx_scale(const TX_SIZE tx_size) {
@@ -29,6 +32,8 @@
return (pels > 256) + (pels > 1024) + (pels > 4096);
}
+#if !CONFIG_DAALA_TX
+
// NOTE: The implementation of all inverses need to be aware of the fact
// that input and output could be the same buffer.
@@ -1673,6 +1678,7 @@
else
aom_idct4x4_1_add(input, dest, stride);
}
+#endif // !CONFIG_DAALA_TX
void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
@@ -1684,6 +1690,7 @@
aom_iwht4x4_1_add(input, dest, stride);
}
+#if !CONFIG_DAALA_TX
#if !CONFIG_DAALA_TX8
static void idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
const TxfmParam *txfm_param) {
@@ -2080,6 +2087,7 @@
#endif
}
#endif // CONFIG_TX64X64
+#endif // !CONFIG_DAALA_TX
void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
@@ -2089,6 +2097,7 @@
aom_highbd_iwht4x4_1_add(input, dest, stride, bd);
}
+#if !CONFIG_DAALA_TX
static const int32_t *cast_to_int32(const tran_low_t *input) {
assert(sizeof(int32_t) == sizeof(tran_low_t));
return (const int32_t *)input;
@@ -2377,10 +2386,15 @@
}
}
#endif // CONFIG_TX64X64
+#endif // !CONFIG_DAALA_TX
void av1_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+ assert(!txfm_param->is_hbd);
+ daala_inv_txfm_add(input, dest, stride, txfm_param);
+#else
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
@@ -2413,6 +2427,7 @@
#endif
default: assert(0 && "Invalid transform size"); break;
}
+#endif
}
#if CONFIG_TXMG
@@ -2524,8 +2539,11 @@
void av1_highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
TxfmParam *txfm_param) {
- const TX_SIZE tx_size = txfm_param->tx_size;
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+ daala_inv_txfm_add(input, dest, stride, txfm_param);
+#else
+ const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
@@ -2589,4 +2607,5 @@
#endif
default: assert(0 && "Invalid transform size"); break;
}
+#endif
}
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index a95fa6f..de7010c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -29,6 +29,9 @@
#include "av1/encoder/encodetxb.h"
#endif
#include "av1/encoder/hybrid_fwd_txfm.h"
+#if CONFIG_DAALA_TX
+#include "av1/common/daala_inv_txfm.h"
+#endif
#include "av1/encoder/rd.h"
#include "av1/encoder/tokenize.h"
@@ -724,6 +727,9 @@
txfm_param.tx_set_type = get_ext_tx_set_type(
txfm_param.tx_size, plane_bsize, is_inter_block(&xd->mi[0]->mbmi),
cm->reduced_tx_set_used);
+#if CONFIG_DAALA_TX
+ daala_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
+#else
#if CONFIG_HIGHBITDEPTH
if (txfm_param.is_hbd) {
av1_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, &txfm_param);
@@ -735,6 +741,7 @@
} else {
av1_idct4x4_add(dqcoeff, dst, pd->dst.stride, &txfm_param);
}
+#endif
}
}
diff --git a/test/av1_fht16x16_test.cc b/test/av1_fht16x16_test.cc
index fefdab9..d428332 100644
--- a/test/av1_fht16x16_test.cc
+++ b/test/av1_fht16x16_test.cc
@@ -23,6 +23,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -268,3 +269,4 @@
#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_TX16
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht16x32_test.cc b/test/av1_fht16x32_test.cc
index ae37e2d..e6b960a 100644
--- a/test/av1_fht16x32_test.cc
+++ b/test/av1_fht16x32_test.cc
@@ -23,6 +23,8 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
+
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -151,3 +153,5 @@
#endif // HAVE_SSE2
} // namespace
+
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht16x8_test.cc b/test/av1_fht16x8_test.cc
index 8c49993..91cb69c 100644
--- a/test/av1_fht16x8_test.cc
+++ b/test/av1_fht16x8_test.cc
@@ -23,6 +23,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -149,3 +150,5 @@
#endif // HAVE_SSE2
} // namespace
+
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht32x16_test.cc b/test/av1_fht32x16_test.cc
index 97f564d..603f711 100644
--- a/test/av1_fht32x16_test.cc
+++ b/test/av1_fht32x16_test.cc
@@ -23,6 +23,8 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
+
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -151,3 +153,4 @@
#endif // HAVE_SSE2
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht4x4_test.cc b/test/av1_fht4x4_test.cc
index df7b03c..f11bfe5 100644
--- a/test/av1_fht4x4_test.cc
+++ b/test/av1_fht4x4_test.cc
@@ -23,6 +23,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -229,3 +230,4 @@
#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_TX4
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht4x8_test.cc b/test/av1_fht4x8_test.cc
index 2b13fcd..6adada5 100644
--- a/test/av1_fht4x8_test.cc
+++ b/test/av1_fht4x8_test.cc
@@ -23,6 +23,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -139,3 +140,4 @@
#endif // HAVE_SSE2
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht8x16_test.cc b/test/av1_fht8x16_test.cc
index 9490a32..c7e6160 100644
--- a/test/av1_fht8x16_test.cc
+++ b/test/av1_fht8x16_test.cc
@@ -22,6 +22,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -148,3 +149,4 @@
#endif // HAVE_SSE2
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht8x4_test.cc b/test/av1_fht8x4_test.cc
index b891031..b221e61 100644
--- a/test/av1_fht8x4_test.cc
+++ b/test/av1_fht8x4_test.cc
@@ -22,6 +22,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -138,3 +139,4 @@
#endif // HAVE_SSE2
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/av1_fht8x8_test.cc b/test/av1_fht8x8_test.cc
index 1bffe4e..ec8baac 100644
--- a/test/av1_fht8x8_test.cc
+++ b/test/av1_fht8x8_test.cc
@@ -23,6 +23,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
const TxfmParam *txfm_param);
@@ -227,3 +228,4 @@
#endif // HAVE_SSE4_1 && CONFIG_HIGHBITDEPTH && !CONFIG_DAALA_TX8
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 5adc296..2962ffb 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -30,6 +30,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
const int kNumCoeffs = 256;
@@ -873,3 +874,4 @@
DCT_DCT, AOM_BITS_8)));
#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 5ce263c..bcb2fbe 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -29,6 +29,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
@@ -336,3 +337,4 @@
DCT_DCT, AOM_BITS_8, 16)));
#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH
} // namespace
+#endif // !CONFIG_DAALA_TX
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 2b8f524..2ad1afc 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -29,6 +29,7 @@
using libaom_test::ACMRandom;
+#if !CONFIG_DAALA_TX
namespace {
const int kNumCoeffs = 64;
@@ -724,3 +725,4 @@
DCT_DCT, AOM_BITS_8)));
#endif // HAVE_MSA && !CONFIG_HIGHBITDEPTH
} // namespace
+#endif // !CONFIG_DAALA_TX