Simplify Daala forward TX toplevel for constant shift Rather than backing out all the LGT-related shifting matrices throughout the existing TX code, separate out and simplify Daala forward TX into a single dedicated entry point. When DAALA_TX is enabled, CONFIG_HIGHBITDEPTH is also forced, and all of Daala TX (lowbd and highbd) uses this single TX dispatch. At present, this should result in no effective functional change, however rectangular transforms are now always column-first-- that has minor rounding effects. subset 1: monty-daalaTX-fulltest-DaalaRDO-s1@2017-11-07T00:02:56.282Z -> monty-daalaTX-fulltest-fwd-s1@2017-11-07T03:08:55.478Z PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.0576 | N/A | -0.2646 | -0.0125 | -0.0439 | -0.0479 | -0.1798 objective 1 fast: monty-daalaTX-fulltest-DaalaRDO-o1f4@2017-11-07T05:59:50.180Z -> monty-daalaTX-fulltest-fwd-o1f4@2017-11-07T06:00:08.500Z PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 0.0036 | 0.0477 | 0.1132 | 0.0863 | -0.0017 | 0.0209 | 0.0240 Change-Id: I182a5c4388c410cbea8810e2f9e36fd37a4a46e5

commit: a2d40a398958df6d6c9742e97aebf2fba671a43e [log] [tgz]
author: Monty Montgomery <cmontgomery@mozilla.com> Mon Oct 30 23:32:03 2017 -0400
committer: Christopher Montgomery <cmontgomery@mozilla.com> Sun Nov 12 05:18:31 2017 +0000
tree: 4b556932323fee80ac152c36e845702a3be96a5f
parent: 1aeee2e9687b431e7d32c907a7a0237445636780 [diff]
diff --git a/av1/av1.cmake b/av1/av1.cmake
index f930b6e..5d2b641 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake

@@ -109,6 +109,8 @@
     "${AOM_ROOT}/av1/encoder/context_tree.h"
     "${AOM_ROOT}/av1/encoder/cost.c"
     "${AOM_ROOT}/av1/encoder/cost.h"
+    "${AOM_ROOT}/av1/encoder/daala_fwd_txfm.c"
+    "${AOM_ROOT}/av1/encoder/daala_fwd_txfm.h"
     "${AOM_ROOT}/av1/encoder/dct.c"
     "${AOM_ROOT}/av1/encoder/encodeframe.c"
     "${AOM_ROOT}/av1/encoder/encodeframe.h"

diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk
index 9c53be6..3ae1772 100644
--- a/av1/av1_cx.mk
+++ b/av1/av1_cx.mk

@@ -27,6 +27,8 @@
 AV1_CX_SRCS-yes += encoder/context_tree.h
 AV1_CX_SRCS-yes += encoder/cost.h
 AV1_CX_SRCS-yes += encoder/cost.c
+AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.c
+AV1_CX_SRCS-yes += encoder/daala_fwd_txfm.h
 AV1_CX_SRCS-yes += encoder/dct.c
 AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
 AV1_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h

diff --git a/av1/common/daala_tx.c b/av1/common/daala_tx.c
index 8a47d64..ed2095b 100644
--- a/av1/common/daala_tx.c
+++ b/av1/common/daala_tx.c

@@ -5186,6 +5186,40 @@
 }
 #endif
 
+void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride) {
+  int i;
+  for (i = 0; i < 4; i++)
+    y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride) {
+  int i;
+  for (i = 0; i < 8; i++)
+    y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride) {
+  int i;
+  for (i = 0; i < 16; i++)
+    y[i] = x[i*xstride];
+}
+
+void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride) {
+  int i;
+  for (i = 0; i < 32; i++)
+    y[i] = x[i*xstride];
+}
+
+#if CONFIG_TX64X64
+void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride) {
+  int i;
+  for (i = 0; i < 64; i++)
+    y[i] = x[i*xstride];
+}
+#endif
+
+// Below are intermediate wrappers that handle the case when
+// tran_low_t is a smaller type than od_coeff
 void daala_fdct4(const tran_low_t *input, tran_low_t *output) {
   int i;
   od_coeff x[4];

diff --git a/av1/common/daala_tx.h b/av1/common/daala_tx.h
index 7145b66..e482ed1 100644
--- a/av1/common/daala_tx.h
+++ b/av1/common/daala_tx.h

@@ -35,19 +35,26 @@
 void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride);
 void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]);
 void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride);
+void od_bin_fidtx4(od_coeff y[4], const od_coeff *x, int xstride);
 void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]);
 void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride);
 void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]);
 void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride);
+void od_bin_fidtx8(od_coeff y[8], const od_coeff *x, int xstride);
 void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]);
 void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride);
 void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]);
 void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride);
+void od_bin_fidtx16(od_coeff y[16], const od_coeff *x, int xstride);
 void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]);
 void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride);
 void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]);
+void od_bin_fdst32(od_coeff y[32], const od_coeff *x, int xstride);
+void od_bin_fidtx32(od_coeff y[32], const od_coeff *x, int xstride);
 #if CONFIG_TX64X64
 void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride);
 void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]);
+void od_bin_fidtx64(od_coeff y[64], const od_coeff *x, int xstride);
+
 #endif
 #endif

diff --git a/av1/encoder/daala_fwd_txfm.c b/av1/encoder/daala_fwd_txfm.c
new file mode 100644
index 0000000..37be8cd
--- /dev/null
+++ b/av1/encoder/daala_fwd_txfm.c

@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "./av1_rtcd.h"
+#include "./aom_config.h"
+#include "./aom_dsp_rtcd.h"
+#include "av1/common/daala_tx.h"
+#include "av1/encoder/daala_fwd_txfm.h"
+
+#if CONFIG_DAALA_TX
+
+// Temporary while we still need av1_get_tx_scale() for testing
+#include "av1/common/idct.h"
+
+// Complete Daala TX map, sans lossless which is special cased
+typedef void (*daala_ftx)(od_coeff[], const od_coeff *, int);
+
+static daala_ftx tx_map[TX_SIZES][TX_TYPES_1D] = {
+  //  4-point transforms
+  { od_bin_fdct4, od_bin_fdst4, od_bin_fdst4, od_bin_fidtx4 },
+
+  //  8-point transforms
+  { od_bin_fdct8, od_bin_fdst8, od_bin_fdst8, od_bin_fidtx8 },
+
+  //  16-point transforms
+  { od_bin_fdct16, od_bin_fdst16, od_bin_fdst16, od_bin_fidtx16 },
+
+  //  32-point transforms
+  { od_bin_fdct32, od_bin_fdst32, od_bin_fdst32, od_bin_fidtx32 },
+
+#if CONFIG_TX64X64
+  //  64-point transforms
+  { od_bin_fdct64, NULL, NULL, od_bin_fidtx64 },
+#endif
+};
+
+static int tx_flip(TX_TYPE_1D t) { return t == 2; }
+
+// Daala TX toplevel entry point, same interface as av1 low-bidepth
+// and high-bitdepth TX (av1_fwd_txfm and av1_highbd_fwd_txfm).  This
+// same function is intended for both low and high bitdepth cases with
+// a tran_low_t of 32 bits (matching od_coeff).
+void daala_fwd_txfm(const int16_t *input_pixels, tran_low_t *output_coeffs,
+                    int input_stride, TxfmParam *txfm_param) {
+  const TX_SIZE tx_size = txfm_param->tx_size;
+  const TX_TYPE tx_type = txfm_param->tx_type;
+  assert(tx_size <= TX_SIZES_ALL);
+  assert(tx_type <= TX_TYPES);
+
+  if (txfm_param->lossless) {
+    // Transform function special-cased for lossless
+    assert(tx_type == DCT_DCT);
+    assert(tx_size == TX_4X4);
+    av1_fwht4x4(input_pixels, output_coeffs, input_stride);
+  } else {
+    // General TX case
+    // up 4, down 1 compatability mode with av1_get_tx_scale
+    const int upshift = 4;
+
+    assert(upshift >= 0);
+    assert(sizeof(tran_low_t) == sizeof(od_coeff));
+    assert(sizeof(tran_low_t) >= 4);
+
+    // Hook into existing map translation infrastructure to select
+    // appropriate TX functions
+    const int cols = tx_size_wide[tx_size];
+    const int rows = tx_size_high[tx_size];
+    const TX_SIZE col_idx = txsize_vert_map[tx_size];
+    const TX_SIZE row_idx = txsize_horz_map[tx_size];
+    assert(col_idx <= TX_SIZES);
+    assert(row_idx <= TX_SIZES);
+    assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
+    assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
+    daala_ftx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
+    daala_ftx row_tx = tx_map[row_idx][htx_tab[tx_type]];
+    int col_flip = tx_flip(vtx_tab[tx_type]);
+    int row_flip = tx_flip(htx_tab[tx_type]);
+    od_coeff tmp[MAX_TX_SIZE];
+    int r;
+    int c;
+
+    assert(col_tx);
+    assert(row_tx);
+
+    // Transform columns
+    for (c = 0; c < cols; ++c) {
+      // Cast and shift
+      for (r = 0; r < rows; ++r)
+        tmp[r] =
+            ((od_coeff)(input_pixels[r * input_stride + c])) * (1 << upshift);
+      if (col_flip)
+        col_tx(tmp, tmp + (rows - 1), -1);
+      else
+        col_tx(tmp, tmp, 1);
+      // No ystride in daala_tx lowlevel functions, store output vector
+      // into column the long way
+      for (r = 0; r < rows; ++r) output_coeffs[r * cols + c] = tmp[r];
+    }
+
+    // Transform rows
+    for (r = 0; r < rows; ++r) {
+      if (row_flip)
+        row_tx(output_coeffs + r * cols, output_coeffs + r * cols + cols - 1,
+               -1);
+      else
+        row_tx(output_coeffs + r * cols, output_coeffs + r * cols, 1);
+    }
+
+    // This is temporary while we're testing against existing
+    // behavior (preshift up 4, then downshift by one plus av1_get_tx_scale)
+    int downshift = 1 + av1_get_tx_scale(tx_size);
+    for (r = 0; r < rows; ++r)
+      for (c = 0; c < cols; ++c)
+        output_coeffs[r * cols + c] =
+            ROUND_POWER_OF_TWO_SIGNED(output_coeffs[r * cols + c], downshift);
+  }
+}
+
+#endif

diff --git a/av1/encoder/daala_fwd_txfm.h b/av1/encoder/daala_fwd_txfm.h
new file mode 100644
index 0000000..e8f777a
--- /dev/null
+++ b/av1/encoder/daala_fwd_txfm.h

@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AV1_ENCODER_DAALA_FWD_TXFM_H_
+#define AV1_ENCODER_DAALA_FWD_TXFM_H_
+
+#include "./aom_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void daala_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+                    TxfmParam *txfm_param);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AV1_ENCODER_DAALA_FWD_TXFM_H_

diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 0028edc..76b417c 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c

@@ -15,6 +15,9 @@
 
 #include "av1/common/idct.h"
 #include "av1/encoder/hybrid_fwd_txfm.h"
+#if CONFIG_DAALA_TX
+#include "av1/encoder/daala_fwd_txfm.h"
+#else
 
 static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
                          int diff_stride, TxfmParam *txfm_param) {
@@ -457,10 +460,14 @@
   }
 }
 #endif  // CONFIG_TX64X64
+#endif  // CONFIG_DAALA_TXFM
 
 void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
                   TxfmParam *txfm_param) {
   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+  daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#else
   const TX_SIZE tx_size = txfm_param->tx_size;
   switch (tx_size) {
 #if CONFIG_TX64X64
@@ -512,11 +519,15 @@
 #endif
     default: assert(0); break;
   }
+#endif
 }
 
 void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
                          int diff_stride, TxfmParam *txfm_param) {
   assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]);
+#if CONFIG_DAALA_TX
+  daala_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#else
   const TX_SIZE tx_size = txfm_param->tx_size;
   switch (tx_size) {
 #if CONFIG_TX64X64
@@ -562,4 +573,5 @@
       break;
     default: assert(0); break;
   }
+#endif
 }

diff --git a/build/cmake/aom_experiment_deps.cmake b/build/cmake/aom_experiment_deps.cmake
index 693108c..6e0c07d 100644
--- a/build/cmake/aom_experiment_deps.cmake
+++ b/build/cmake/aom_experiment_deps.cmake

@@ -39,6 +39,7 @@
   endif ()
 
   if (CONFIG_DAALA_TX)
+     set(CONFIG_HIGHBITDEPTH 1)
      set(CONFIG_DAALA_TX4 1)
      set(CONFIG_DAALA_TX8 1)
      set(CONFIG_DAALA_TX16 1)

diff --git a/configure b/configure
index 0bab2a6..0b07c71 100755
--- a/configure
+++ b/configure

@@ -560,6 +560,7 @@
       disable_feature rawbits
     fi
     if enabled daala_tx; then
+      enable_feature highbitdepth
       enable_feature daala_tx4
       enable_feature daala_tx8
       enable_feature daala_tx16

diff --git a/test/av1_fht64x64_test.cc b/test/av1_fht64x64_test.cc
index 3414d00..adf3ccb 100644
--- a/test/av1_fht64x64_test.cc
+++ b/test/av1_fht64x64_test.cc

@@ -20,7 +20,7 @@
 #include "test/transform_test_base.h"
 #include "test/util.h"
 
-#if CONFIG_TX64X64
+#if CONFIG_TX64X64 && !CONFIG_DAALA_TX
 
 using libaom_test::ACMRandom;
commit	a2d40a398958df6d6c9742e97aebf2fba671a43e	[log] [tgz]
author	Monty Montgomery <cmontgomery@mozilla.com>	Mon Oct 30 23:32:03 2017 -0400
committer	Christopher Montgomery <cmontgomery@mozilla.com>	Sun Nov 12 05:18:31 2017 +0000
tree	4b556932323fee80ac152c36e845702a3be96a5f
parent	1aeee2e9687b431e7d32c907a7a0237445636780 [diff]