blob: 04092e0272cd6241e4cdea4c8226adf57ed941ac [file] [log] [blame]
/*
* Copyright (c) 2017, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
#include "av1/common/daala_tx.h"
#include "av1/common/daala_inv_txfm.h"
#include "av1/common/idct.h"
#if CONFIG_DAALA_TX
// Complete Daala TX map, sans lossless which is special cased
typedef void (*daala_itx)(od_coeff *, int, const od_coeff[]);
static daala_itx tx_map[TX_SIZES][TX_TYPES] = {
// 4-point transforms
{ od_bin_idct4, od_bin_idst4, od_bin_idst4, od_bin_iidtx4 },
// 8-point transforms
{ od_bin_idct8, od_bin_idst8, od_bin_idst8, od_bin_iidtx8 },
// 16-point transforms
{ od_bin_idct16, od_bin_idst16, od_bin_idst16, od_bin_iidtx16 },
// 32-point transforms
{ od_bin_idct32, od_bin_idst32, od_bin_idst32, od_bin_iidtx32 },
#if CONFIG_TX64X64
// 64-point transforms
{ od_bin_idct64, NULL, NULL, od_bin_iidtx64 },
#endif
};
static int tx_flip(TX_TYPE_1D t) { return t == FLIPADST_1D; }
// Daala TX toplevel inverse entry point. This same function is
// intended for both low and high bitdepth cases with a tran_low_t of
// 32 bits (matching od_coeff), and a passed-in pixel buffer of either
// bytes (hbd=0) or shorts (hbd=1).
void daala_inv_txfm_add_c(const tran_low_t *input_coeffs, void *output_pixels,
int output_stride, TxfmParam *txfm_param) {
const TX_SIZE tx_size = txfm_param->tx_size;
const TX_TYPE tx_type = txfm_param->tx_type;
const int px_depth = txfm_param->bd;
assert(tx_size <= TX_SIZES_ALL);
assert(tx_type <= TX_TYPES);
if (txfm_param->lossless) {
// Transform function special-cased for lossless
assert(tx_type == DCT_DCT);
assert(tx_size == TX_4X4);
if (txfm_param->is_hbd)
// Note that the output pointer in the prototype is uint8, but the
// function converts to short internally
av1_highbd_iwht4x4_add(input_coeffs, output_pixels, output_stride,
txfm_param->eob, px_depth);
else
av1_iwht4x4_add(input_coeffs, output_pixels, output_stride, txfm_param);
} else {
// General TX case
const int downshift = TX_COEFF_DEPTH - px_depth;
assert(downshift >= 0);
assert(sizeof(tran_low_t) == sizeof(od_coeff));
assert(sizeof(tran_low_t) >= 4);
// Hook into existing map translation infrastructure to select
// appropriate TX functions
const int cols = tx_size_wide[tx_size];
const int rows = tx_size_high[tx_size];
const TX_SIZE col_idx = txsize_vert_map[tx_size];
const TX_SIZE row_idx = txsize_horz_map[tx_size];
assert(col_idx <= TX_SIZES);
assert(row_idx <= TX_SIZES);
assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
daala_itx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
daala_itx row_tx = tx_map[row_idx][htx_tab[tx_type]];
int col_flip = tx_flip(vtx_tab[tx_type]);
int row_flip = tx_flip(htx_tab[tx_type]);
od_coeff tmpsq[MAX_TX_SQUARE];
#if CONFIG_TX64X64
tran_low_t pad_input[MAX_TX_SQUARE];
#endif
int r;
int c;
assert(col_tx);
assert(row_tx);
#if CONFIG_TX64X64
if (rows > 32 || cols > 32) {
int avail_rows;
int avail_cols;
// TODO(urvang): Can the same array be reused, instead of using a new
// array?
// Remap 32x32 input into a modified input by:
// - Copying over these values in top-left 32x32 locations.
// - Setting the rest of the locations to 0.
avail_rows = AOMMIN(rows, 32);
avail_cols = AOMMIN(cols, 32);
for (r = 0; r < avail_rows; r++) {
memcpy(pad_input + r * cols, input_coeffs + r * avail_cols,
avail_cols * sizeof(*pad_input));
if (cols > avail_cols) {
memset(pad_input + r * cols + avail_cols, 0,
(cols - avail_cols) * sizeof(*pad_input));
}
}
if (rows > avail_rows) {
memset(pad_input + avail_rows * cols, 0,
(rows - avail_rows) * cols * sizeof(*pad_input));
}
input_coeffs = pad_input;
}
#endif
// Inverse-transform rows
for (r = 0; r < rows; ++r) {
// The output addressing transposes
if (row_flip)
row_tx(tmpsq + r + (rows * cols) - rows, -rows,
input_coeffs + r * cols);
else
row_tx(tmpsq + r, rows, input_coeffs + r * cols);
}
// Inverse-transform columns
for (c = 0; c < cols; ++c) {
// Above transposed, so our cols are now rows
if (col_flip)
col_tx(tmpsq + c * rows + rows - 1, -1, tmpsq + c * rows);
else
col_tx(tmpsq + c * rows, 1, tmpsq + c * rows);
}
// Sum with destination according to bit depth
// The tmpsq array is currently transposed relative to output
if (txfm_param->is_hbd) {
// Destination array is shorts
uint16_t *out16 = CONVERT_TO_SHORTPTR(output_pixels);
for (r = 0; r < rows; ++r)
for (c = 0; c < cols; ++c)
out16[r * output_stride + c] = highbd_clip_pixel_add(
out16[r * output_stride + c],
(tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift,
px_depth);
} else {
// Destination array is bytes
uint8_t *out8 = (uint8_t *)output_pixels;
for (r = 0; r < rows; ++r)
for (c = 0; c < cols; ++c)
out8[r * output_stride + c] = clip_pixel_add(
out8[r * output_stride + c],
(tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift);
}
}
}
#endif