Simplify Daala forward TX toplevel for constant shift
Rather than backing out all the LGT-related shifting matrices
throughout the existing TX code, separate out and simplify Daala
forward TX into a single dedicated entry point. When DAALA_TX is
enabled, CONFIG_HIGHBITDEPTH is also forced, and all of Daala TX
(lowbd and highbd) uses this single TX dispatch.
At present, this should result in no effective functional change,
however rectangular transforms are now always column-first-- that
has minor rounding effects.
subset 1:
monty-daalaTX-fulltest-DaalaRDO-s1@2017-11-07T00:02:56.282Z ->
monty-daalaTX-fulltest-fwd-s1@2017-11-07T03:08:55.478Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
-0.0576 | N/A | -0.2646 | -0.0125 | -0.0439 | -0.0479 | -0.1798
objective 1 fast:
monty-daalaTX-fulltest-DaalaRDO-o1f4@2017-11-07T05:59:50.180Z ->
monty-daalaTX-fulltest-fwd-o1f4@2017-11-07T06:00:08.500Z
PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000
0.0036 | 0.0477 | 0.1132 | 0.0863 | -0.0017 | 0.0209 | 0.0240
Change-Id: I182a5c4388c410cbea8810e2f9e36fd37a4a46e5
diff --git a/av1/av1.cmake b/av1/av1.cmake
index f930b6e..5d2b641 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -109,6 +109,8 @@
"${AOM_ROOT}/av1/encoder/context_tree.h"
"${AOM_ROOT}/av1/encoder/cost.c"
"${AOM_ROOT}/av1/encoder/cost.h"
+ "${AOM_ROOT}/av1/encoder/daala_fwd_txfm.c"
+ "${AOM_ROOT}/av1/encoder/daala_fwd_txfm.h"
"${AOM_ROOT}/av1/encoder/dct.c"
"${AOM_ROOT}/av1/encoder/encodeframe.c"
"${AOM_ROOT}/av1/encoder/encodeframe.h"
diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk
index 9c53be6..3ae1772 100644
--- a/av1/av1_cx.mk
+++ b/av1/av1_cx.mk
@@ -27,6 +27,8 @@
AV1_CX_SRCS-yes += encoder/context_tree.h
AV1_CX_SRCS-yes += encoder/cost.h
AV1_CX_SRCS-yes += encoder/cost.c
+AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.c
+AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.h
AV1_CX_SRCS-yes += encoder/dct.c
AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h
diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index 8a47d64..ed2095b 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c
@@ -5186,6 +5186,40 @@
}
#endif
+void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride) {
+ int i;
+ for (i = 0; i < 4; i++)
+ y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride) {
+ int i;
+ for (i = 0; i < 8; i++)
+ y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride) {
+ int i;
+ for (i = 0; i < 16; i++)
+ y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride) {
+ int i;
+ for (i = 0; i < 32; i++)
+ y[i] = x[i*xstride];
+}
+
+#if CONFIG_TX64X64
+void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride) {
+ int i;
+ for (i = 0; i < 64; i++)
+ y[i] = x[i*xstride];
+}
+#endif
+
+// Below are intermediate wrappers that handle the case when
+// tran_low_t is a smaller type than od_coeff
void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[4];
diff --git a/av1/common/daala_tx.h b/av1/common/daala_tx.h
index 7145b66..e482ed1 100644
--- a/av1/common/daala_tx.h
+++ b/av1/common/daala_tx.h
@@ -35,19 +35,26 @@
void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
+void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride);
void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
+void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride);
void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
+void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride);
void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
+void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride);
+void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride);
#if CONFIG_TX64X64
void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
+void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride);
+
#endif
#endif
diff --git a/av1/encoder/daala_fwd_txfm.c b/av1/encoder/daala_fwd_txfm.c
new file mode 100644
index 0000000..37be8cd
--- /dev/null
+++ b/av1/encoder/daala_fwd_txfm.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "av1/common/daala_tx.h"
+#include "av1/encoder/daala_fwd_txfm.h"
+
+#if CONFIG_DAALA_TX
+
+// Temporary while we still need av1_get_tx_scale() for testing
+#include "av1/common/idct.h"
+
+// Complete Daala TX map, sans lossless which is special cased
+typedef void (*daala_ftx)(od_coeff[], const od_coeff *, int);
+
+static daala_ftx tx_map[TX_SIZES][TX_TYPES_1D] = {
+ // 4-point transforms
+ { od_bin_fdct4, od_bin_fdst4, od_bin_fdst4, od_bin_fidtx4 },
+
+ // 8-point transforms
+ { od_bin_fdct8, od_bin_fdst8, od_bin_fdst8, od_bin_fidtx8 },
+
+ // 16-point transforms
+ { od_bin_fdct16, od_bin_fdst16, od_bin_fdst16, od_bin_fidtx16 },
+
+ // 32-point transforms
+ { od_bin_fdct32, od_bin_fdst32, od_bin_fdst32, od_bin_fidtx32 },
+
+#if CONFIG_TX64X64
+ // 64-point transforms
+ { od_bin_fdct64, NULL, NULL, od_bin_fidtx64 },
+#endif
+};
+
+static int tx_flip(TX_TYPE_1D t) { return t == 2; }
+
+// Daala TX toplevel entry point, same interface as av1 low-bidepth
+// and high-bitdepth TX (av1_fwd_txfm and av1_highbd_fwd_txfm). This
+// same function is intended for both low and high bitdepth cases with
+// a tran_low_t of 32 bits (matching od_coeff).
+void daala_fwd_txfm(const int16_t *input_pixels, tran_low_t *output_coeffs,
+ int input_stride, TxfmParam *txfm_param) {
+ const TX_SIZE tx_size = txfm_param->tx_size;
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ assert(tx_size <= TX_SIZES_ALL);
+ assert(tx_type <= TX_TYPES);
+
+ if (txfm_param->lossless) {
+ // Transform function special-cased for lossless
+ assert(tx_type == DCT_DCT);
+ assert(tx_size == TX_4X4);
+ av1_fwht4x4(input_pixels, output_coeffs, input_stride);
+ } else {
+ // General TX case
+ // up 4, down 1 compatability mode with av1_get_tx_scale
+ const int upshift = 4;
+
+ assert(upshift >= 0);
+ assert(sizeof(tran_low_t) == sizeof(od_coeff));
+ assert(sizeof(tran_low_t) >= 4);
+
+ // Hook into existing map translation infrastructure to select
+ // appropriate TX functions
+ const int cols = tx_size_wide[tx_size];
+ const int rows = tx_size_high[tx_size];
+ const TX_SIZE col_idx = txsize_vert_map[tx_size];
+ const TX_SIZE row_idx = txsize_horz_map[tx_size];
+ assert(col_idx <= TX_SIZES);
+ assert(row_idx <= TX_SIZES);
+ assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
+ assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
+ daala_ftx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
+ daala_ftx row_tx = tx_map[row_idx][htx_tab[tx_type]];
+ int col_flip = tx_flip(vtx_tab[tx_type]);
+ int row_flip = tx_flip(htx_tab[tx_type]);
+ od_coeff tmp[MAX_TX_SIZE];
+ int r;
+ int c;
+
+ assert(col_tx);
+ assert(row_tx);
+
+ // Transform columns
+ for (c = 0; c < cols; ++c) {
+ // Cast and shift
+ for (r = 0; r < rows; ++r)
+ tmp[r] =
+ ((od_coeff)(input_pixels[r * input_stride + c])) * (1 << upshift);
+ if (col_flip)
+ col_tx(tmp, tmp + (rows - 1), -1);
+ else
+ col_tx(tmp, tmp, 1);
+ // No ystride in daala_tx lowlevel functions, store output vector
+ // into column the long way
+ for (r = 0; r < rows; ++r) output_coeffs[r * cols + c] = tmp[r];
+ }
+
+ // Transform rows
+ for (r = 0; r < rows; ++r) {
+ if (row_flip)
+ row_tx(output_coeffs + r * cols, output_coeffs + r * cols + cols - 1,
+ -1);
+ else
+ row_tx(output_coeffs + r * cols, output_coeffs + r * cols, 1);
+ }
+
+ // This is temporary while we're testing against existing
+ // behavior (preshift up 4, then downshift by one plus av1_get_tx_scale)
+ int downshift = 1 + av1_get_tx_scale(tx_size);
+ for (r = 0; r < rows; ++r)
+ for (c = 0; c < cols; ++c)
+ output_coeffs[r * cols + c] =
+ ROUND_POWER_OF_TWO_SIGNED(output_coeffs[r * cols + c], downshift);
+ }
+}
+
+#endif
diff --git a/av1/encoder/daala_fwd_txfm.h b/av1/encoder/daala_fwd_txfm.h
new file mode 100644
index 0000000..e8f777a
--- /dev/null
+++ b/av1/encoder/daala_fwd_txfm.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_DAALA_FWD_TXFM_H_
+#define AV1_ENCODER_DAALA_FWD_TXFM_H_
+
+#include "./aom_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void daala_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+ TxfmParam *txfm_param);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AV1_ENCODER_DAALA_FWD_TXFM_H_
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 0028edc..76b417c 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -15,6 +15,9 @@
#include "av1/common/idct.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
+#if CONFIG_DAALA_TX
+#include "av1/encoder/daala_fwd_txfm.h"
+#else
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
@@ -457,10 +460,14 @@
}
}
#endif // CONFIG_TX64X64
+#endif // CONFIG_DAALA_TXFM
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+ daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#else
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
@@ -512,11 +519,15 @@
#endif
default: assert(0); break;
}
+#endif
}
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+ daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#else
const TX_SIZE tx_size = txfm_param->tx_size;
switch (tx_size) {
#if CONFIG_TX64X64
@@ -562,4 +573,5 @@
break;
default: assert(0); break;
}
+#endif
}
diff --git a/build/cmake/aom_experiment_deps.cmake b/build/cmake/aom_experiment_deps.cmake
index 693108c..6e0c07d 100644
--- a/build/cmake/aom_experiment_deps.cmake
+++ b/build/cmake/aom_experiment_deps.cmake
@@ -39,6 +39,7 @@
endif ()
if (CONFIG_DAALA_TX)
+ set(CONFIG_HIGHBITDEPTH 1)
set(CONFIG_DAALA_TX4 1)
set(CONFIG_DAALA_TX8 1)
set(CONFIG_DAALA_TX16 1)
diff --git a/configure b/configure
index 0bab2a6..0b07c71 100755
--- a/configure
+++ b/configure
@@ -560,6 +560,7 @@
disable_feature rawbits
fi
if enabled daala_tx; then
+ enable_feature highbitdepth
enable_feature daala_tx4
enable_feature daala_tx8
enable_feature daala_tx16
diff --git a/test/av1_fht64x64_test.cc b/test/av1_fht64x64_test.cc
index 3414d00..adf3ccb 100644
--- a/test/av1_fht64x64_test.cc
+++ b/test/av1_fht64x64_test.cc
@@ -20,7 +20,7 @@
#include "test/transform_test_base.h"
#include "test/util.h"
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_TX
using libaom_test::ACMRandom;