av1/common/daala_inv_txfm.c - avm - Git at Google

 /*
  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #include "./av1_rtcd.h"
 #include "./aom_config.h"
 #include "./aom_dsp_rtcd.h"
 #include "av1/common/daala_tx.h"
 #include "av1/common/daala_inv_txfm.h"
 #include "av1/common/idct.h"

 #if CONFIG_DAALA_TX

 // Complete Daala TX map, sans lossless which is special cased
 typedef void (*daala_itx)(od_coeff *, int, const od_coeff[]);

 static daala_itx tx_map[TX_SIZES][TX_TYPES] = {
   //  4-point transforms
   { od_bin_idct4, od_bin_idst4, od_bin_idst4, od_bin_iidtx4 },

   //  8-point transforms
   { od_bin_idct8, od_bin_idst8, od_bin_idst8, od_bin_iidtx8 },

   //  16-point transforms
   { od_bin_idct16, od_bin_idst16, od_bin_idst16, od_bin_iidtx16 },

   //  32-point transforms
   { od_bin_idct32, od_bin_idst32, od_bin_idst32, od_bin_iidtx32 },

 #if CONFIG_TX64X64
   //  64-point transforms
   { od_bin_idct64, NULL, NULL, od_bin_iidtx64 },
 #endif
 };

 static int tx_flip(TX_TYPE_1D t) { return t == FLIPADST_1D; }

 // Daala TX toplevel inverse entry point.  This same function is
 // intended for both low and high bitdepth cases with a tran_low_t of
 // 32 bits (matching od_coeff), and a passed-in pixel buffer of either
 // bytes (hbd=0) or shorts (hbd=1).
 void daala_inv_txfm_add_c(const tran_low_t *input_coeffs, void *output_pixels,
                           int output_stride, TxfmParam *txfm_param) {
   const TX_SIZE tx_size = txfm_param->tx_size;
   const TX_TYPE tx_type = txfm_param->tx_type;
   const int px_depth = txfm_param->bd;
   assert(tx_size <= TX_SIZES_ALL);
   assert(tx_type <= TX_TYPES);

   if (txfm_param->lossless) {
     // Transform function special-cased for lossless
     assert(tx_type == DCT_DCT);
     assert(tx_size == TX_4X4);
     if (txfm_param->is_hbd)
       // Note that the output pointer in the prototype is uint8, but the
       // function converts to short internally
       av1_highbd_iwht4x4_add(input_coeffs, output_pixels, output_stride,
                              txfm_param->eob, px_depth);
     else
       av1_iwht4x4_add(input_coeffs, output_pixels, output_stride, txfm_param);
   } else {
     // General TX case
     const int downshift = TX_COEFF_DEPTH - px_depth;
     assert(downshift >= 0);
     assert(sizeof(tran_low_t) == sizeof(od_coeff));
     assert(sizeof(tran_low_t) >= 4);

     // Hook into existing map translation infrastructure to select
     // appropriate TX functions
     const int cols = tx_size_wide[tx_size];
     const int rows = tx_size_high[tx_size];
     const TX_SIZE col_idx = txsize_vert_map[tx_size];
     const TX_SIZE row_idx = txsize_horz_map[tx_size];
     assert(col_idx <= TX_SIZES);
     assert(row_idx <= TX_SIZES);
     assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
     assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
     daala_itx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
     daala_itx row_tx = tx_map[row_idx][htx_tab[tx_type]];
     int col_flip = tx_flip(vtx_tab[tx_type]);
     int row_flip = tx_flip(htx_tab[tx_type]);
     od_coeff tmpsq[MAX_TX_SQUARE];
 #if CONFIG_TX64X64
     tran_low_t pad_input[MAX_TX_SQUARE];
 #endif
     int r;
     int c;

     assert(col_tx);
     assert(row_tx);

 #if CONFIG_TX64X64
     if (rows > 32 || cols > 32) {
       int avail_rows;
       int avail_cols;
       // TODO(urvang): Can the same array be reused, instead of using a new
       // array?
       // Remap 32x32 input into a modified input by:
       // - Copying over these values in top-left 32x32 locations.
       // - Setting the rest of the locations to 0.
       avail_rows = AOMMIN(rows, 32);
       avail_cols = AOMMIN(cols, 32);
       for (r = 0; r < avail_rows; r++) {
         memcpy(pad_input + r * cols, input_coeffs + r * avail_cols,
                avail_cols * sizeof(*pad_input));
         if (cols > avail_cols) {
           memset(pad_input + r * cols + avail_cols, 0,
                  (cols - avail_cols) * sizeof(*pad_input));
         }
       }
       if (rows > avail_rows) {
         memset(pad_input + avail_rows * cols, 0,
                (rows - avail_rows) * cols * sizeof(*pad_input));
       }
       input_coeffs = pad_input;
     }
 #endif

     // Inverse-transform rows
     for (r = 0; r < rows; ++r) {
       // The output addressing transposes
       if (row_flip)
         row_tx(tmpsq + r + (rows * cols) - rows, -rows,
                input_coeffs + r * cols);
       else
         row_tx(tmpsq + r, rows, input_coeffs + r * cols);
     }

     // Inverse-transform columns
     for (c = 0; c < cols; ++c) {
       // Above transposed, so our cols are now rows
       if (col_flip)
         col_tx(tmpsq + c * rows + rows - 1, -1, tmpsq + c * rows);
       else
         col_tx(tmpsq + c * rows, 1, tmpsq + c * rows);
     }

     // Sum with destination according to bit depth
     // The tmpsq array is currently transposed relative to output
     if (txfm_param->is_hbd) {
       // Destination array is shorts
       uint16_t *out16 = CONVERT_TO_SHORTPTR(output_pixels);
       for (r = 0; r < rows; ++r)
         for (c = 0; c < cols; ++c)
           out16[r * output_stride + c] = highbd_clip_pixel_add(
               out16[r * output_stride + c],
               (tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift,
               px_depth);
     } else {
       // Destination array is bytes
       uint8_t *out8 = (uint8_t *)output_pixels;
       for (r = 0; r < rows; ++r)
         for (c = 0; c < cols; ++c)
           out8[r * output_stride + c] = clip_pixel_add(
               out8[r * output_stride + c],
               (tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift);
     }
   }
 }

 #endif
	/*
	* Copyright (c) 2017, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#include "./av1_rtcd.h"
	#include "./aom_config.h"
	#include "./aom_dsp_rtcd.h"
	#include "av1/common/daala_tx.h"
	#include "av1/common/daala_inv_txfm.h"
	#include "av1/common/idct.h"

	#if CONFIG_DAALA_TX

	// Complete Daala TX map, sans lossless which is special cased
	typedef void (daala_itx)(od_coeff , int, const od_coeff[]);

	static daala_itx tx_map[TX_SIZES][TX_TYPES] = {
	// 4-point transforms
	{ od_bin_idct4, od_bin_idst4, od_bin_idst4, od_bin_iidtx4 },

	// 8-point transforms
	{ od_bin_idct8, od_bin_idst8, od_bin_idst8, od_bin_iidtx8 },

	// 16-point transforms
	{ od_bin_idct16, od_bin_idst16, od_bin_idst16, od_bin_iidtx16 },

	// 32-point transforms
	{ od_bin_idct32, od_bin_idst32, od_bin_idst32, od_bin_iidtx32 },

	#if CONFIG_TX64X64
	// 64-point transforms
	{ od_bin_idct64, NULL, NULL, od_bin_iidtx64 },
	#endif
	};

	static int tx_flip(TX_TYPE_1D t) { return t == FLIPADST_1D; }

	// Daala TX toplevel inverse entry point. This same function is
	// intended for both low and high bitdepth cases with a tran_low_t of
	// 32 bits (matching od_coeff), and a passed-in pixel buffer of either
	// bytes (hbd=0) or shorts (hbd=1).
	void daala_inv_txfm_add_c(const tran_low_t input_coeffs, void output_pixels,
	int output_stride, TxfmParam *txfm_param) {
	const TX_SIZE tx_size = txfm_param->tx_size;
	const TX_TYPE tx_type = txfm_param->tx_type;
	const int px_depth = txfm_param->bd;
	assert(tx_size <= TX_SIZES_ALL);
	assert(tx_type <= TX_TYPES);

	if (txfm_param->lossless) {
	// Transform function special-cased for lossless
	assert(tx_type == DCT_DCT);
	assert(tx_size == TX_4X4);
	if (txfm_param->is_hbd)
	// Note that the output pointer in the prototype is uint8, but the
	// function converts to short internally
	av1_highbd_iwht4x4_add(input_coeffs, output_pixels, output_stride,
	txfm_param->eob, px_depth);
	else
	av1_iwht4x4_add(input_coeffs, output_pixels, output_stride, txfm_param);
	} else {
	// General TX case
	const int downshift = TX_COEFF_DEPTH - px_depth;
	assert(downshift >= 0);
	assert(sizeof(tran_low_t) == sizeof(od_coeff));
	assert(sizeof(tran_low_t) >= 4);

	// Hook into existing map translation infrastructure to select
	// appropriate TX functions
	const int cols = tx_size_wide[tx_size];
	const int rows = tx_size_high[tx_size];
	const TX_SIZE col_idx = txsize_vert_map[tx_size];
	const TX_SIZE row_idx = txsize_horz_map[tx_size];
	assert(col_idx <= TX_SIZES);
	assert(row_idx <= TX_SIZES);
	assert(vtx_tab[tx_type] <= (int)TX_TYPES_1D);
	assert(htx_tab[tx_type] <= (int)TX_TYPES_1D);
	daala_itx col_tx = tx_map[col_idx][vtx_tab[tx_type]];
	daala_itx row_tx = tx_map[row_idx][htx_tab[tx_type]];
	int col_flip = tx_flip(vtx_tab[tx_type]);
	int row_flip = tx_flip(htx_tab[tx_type]);
	od_coeff tmpsq[MAX_TX_SQUARE];
	#if CONFIG_TX64X64
	tran_low_t pad_input[MAX_TX_SQUARE];
	#endif
	int r;
	int c;

	assert(col_tx);
	assert(row_tx);

	#if CONFIG_TX64X64
	if (rows > 32 \|\| cols > 32) {
	int avail_rows;
	int avail_cols;
	// TODO(urvang): Can the same array be reused, instead of using a new
	// array?
	// Remap 32x32 input into a modified input by:
	// - Copying over these values in top-left 32x32 locations.
	// - Setting the rest of the locations to 0.
	avail_rows = AOMMIN(rows, 32);
	avail_cols = AOMMIN(cols, 32);
	for (r = 0; r < avail_rows; r++) {
	memcpy(pad_input + r * cols, input_coeffs + r * avail_cols,
	avail_cols * sizeof(*pad_input));
	if (cols > avail_cols) {
	memset(pad_input + r * cols + avail_cols, 0,
	(cols - avail_cols) * sizeof(*pad_input));
	}
	}
	if (rows > avail_rows) {
	memset(pad_input + avail_rows * cols, 0,
	(rows - avail_rows) * cols * sizeof(*pad_input));
	}
	input_coeffs = pad_input;
	}
	#endif

	// Inverse-transform rows
	for (r = 0; r < rows; ++r) {
	// The output addressing transposes
	if (row_flip)
	row_tx(tmpsq + r + (rows * cols) - rows, -rows,
	input_coeffs + r * cols);
	else
	row_tx(tmpsq + r, rows, input_coeffs + r * cols);
	}

	// Inverse-transform columns
	for (c = 0; c < cols; ++c) {
	// Above transposed, so our cols are now rows
	if (col_flip)
	col_tx(tmpsq + c * rows + rows - 1, -1, tmpsq + c * rows);
	else
	col_tx(tmpsq + c * rows, 1, tmpsq + c * rows);
	}

	// Sum with destination according to bit depth
	// The tmpsq array is currently transposed relative to output
	if (txfm_param->is_hbd) {
	// Destination array is shorts
	uint16_t *out16 = CONVERT_TO_SHORTPTR(output_pixels);
	for (r = 0; r < rows; ++r)
	for (c = 0; c < cols; ++c)
	out16[r * output_stride + c] = highbd_clip_pixel_add(
	out16[r * output_stride + c],
	(tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift,
	px_depth);
	} else {
	// Destination array is bytes
	uint8_t out8 = (uint8_t )output_pixels;
	for (r = 0; r < rows; ++r)
	for (c = 0; c < cols; ++c)
	out8[r * output_stride + c] = clip_pixel_add(
	out8[r * output_stride + c],
	(tmpsq[c * rows + r] + (1 << downshift >> 1)) >> downshift);
	}
	}
	}

	#endif