av1/common/av1_txfm.h - aom - Git at Google

 /*
  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #ifndef AOM_AV1_COMMON_AV1_TXFM_H_
 #define AOM_AV1_COMMON_AV1_TXFM_H_

 #include <assert.h>
 #include <math.h>
 #include <stdio.h>

 #include "config/aom_config.h"

 #include "av1/common/enums.h"
 #include "av1/common/blockd.h"
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_dsp_common.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 #if !defined(DO_RANGE_CHECK_CLAMP)
 #define DO_RANGE_CHECK_CLAMP 0
 #endif

 extern const int32_t av1_cospi_arr_data[4][64];
 extern const int32_t av1_sinpi_arr_data[4][5];

 #define MAX_TXFM_STAGE_NUM 12

 static const int cos_bit_min = 10;

 #define NewSqrt2Bits ((int32_t)12)
 // 2^12 * sqrt(2)
 static const int32_t NewSqrt2 = 5793;
 // 2^12 / sqrt(2)
 static const int32_t NewInvSqrt2 = 2896;

 static INLINE const int32_t *cospi_arr(int n) {
   return av1_cospi_arr_data[n - cos_bit_min];
 }

 static INLINE const int32_t *sinpi_arr(int n) {
   return av1_sinpi_arr_data[n - cos_bit_min];
 }

 // The reduced bit-width and permuted arrays are only used in the Arm Neon
 // implementations in av1_fwd_txfm2d_neon.c and highbd_fwd_txfm_neon.c for now.
 #if HAVE_NEON
 // Store cospi/sinpi costants in Q2.13 format.
 // See: https://en.wikipedia.org/wiki/Q_(number_format)
 extern const int16_t av1_cospi_arr_q13_data[4][128];
 extern const int16_t av1_sinpi_arr_q13_data[4][4];

 extern const int32_t av1_cospi_arr_s32_data[4][66];

 static INLINE const int16_t *cospi_arr_q13(int n) {
   return av1_cospi_arr_q13_data[n - cos_bit_min];
 }

 static INLINE const int16_t *sinpi_arr_q13(int n) {
   return av1_sinpi_arr_q13_data[n - cos_bit_min];
 }

 static INLINE const int32_t *cospi_arr_s32(int n) {
   return av1_cospi_arr_s32_data[n - cos_bit_min];
 }
 #endif  // HAVE_NEON

 static INLINE int32_t range_check_value(int32_t value, int8_t bit) {
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
   const int64_t max_value = (1LL << (bit - 1)) - 1;
   const int64_t min_value = -(1LL << (bit - 1));
   if (value < min_value || value > max_value) {
     fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit);
 #if !CONFIG_AV1_ENCODER
     assert(0);
 #endif
   }
 #endif  // CONFIG_COEFFICIENT_RANGE_CHECKING
 #if DO_RANGE_CHECK_CLAMP
   bit = AOMMIN(bit, 31);
   return clamp(value, -(1 << (bit - 1)), (1 << (bit - 1)) - 1);
 #endif  // DO_RANGE_CHECK_CLAMP
   (void)bit;
   return value;
 }

 static INLINE int32_t round_shift(int64_t value, int bit) {
   assert(bit >= 1);
   return (int32_t)((value + (1ll << (bit - 1))) >> bit);
 }

 static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
                                int bit) {
   int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1);
   int64_t intermediate = result_64 + (1LL << (bit - 1));
   // NOTE(rachelbarker): The value 'result_64' may not necessarily fit
   // into 32 bits. However, the result of this function is nominally
   // ROUND_POWER_OF_TWO_64(result_64, bit)
   // and that is required to fit into stage_range[stage] many bits
   // (checked by range_check_buf()).
   //
   // Here we've unpacked that rounding operation, and it can be shown
   // that the value of 'intermediate' here *does* fit into 32 bits
   // for any conformant bitstream.
   // The upshot is that, if you do all this calculation using
   // wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
   // then you'll still get the correct result.
   // To provide a check on this logic, we assert that 'intermediate'
   // would fit into an int32 if range checking is enabled.
 #if CONFIG_COEFFICIENT_RANGE_CHECKING
   assert(intermediate >= INT32_MIN && intermediate <= INT32_MAX);
 #endif
   return (int32_t)(intermediate >> bit);
 }

 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
                                              int bd) {
   return clip_pixel_highbd(dest + (int)trans, bd);
 }

 typedef void (*TxfmFunc)(const int32_t *input, int32_t *output, int8_t cos_bit,
                          const int8_t *stage_range);

 typedef void (*FwdTxfm2dFunc)(const int16_t *input, int32_t *output, int stride,
                               TX_TYPE tx_type, int bd);

 enum {
   TXFM_TYPE_DCT4,
   TXFM_TYPE_DCT8,
   TXFM_TYPE_DCT16,
   TXFM_TYPE_DCT32,
   TXFM_TYPE_DCT64,
   TXFM_TYPE_ADST4,
   TXFM_TYPE_ADST8,
   TXFM_TYPE_ADST16,
   TXFM_TYPE_IDENTITY4,
   TXFM_TYPE_IDENTITY8,
   TXFM_TYPE_IDENTITY16,
   TXFM_TYPE_IDENTITY32,
   TXFM_TYPES,
   TXFM_TYPE_INVALID,
 } UENUM1BYTE(TXFM_TYPE);

 typedef struct TXFM_2D_FLIP_CFG {
   TX_SIZE tx_size;
   int ud_flip;  // flip upside down
   int lr_flip;  // flip left to right
   const int8_t *shift;
   int8_t cos_bit_col;
   int8_t cos_bit_row;
   int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
   int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
   TXFM_TYPE txfm_type_col;
   TXFM_TYPE txfm_type_row;
   int stage_num_col;
   int stage_num_row;
 } TXFM_2D_FLIP_CFG;

 static INLINE void get_flip_cfg(TX_TYPE tx_type, int *ud_flip, int *lr_flip) {
   switch (tx_type) {
     case DCT_DCT:
     case ADST_DCT:
     case DCT_ADST:
     case ADST_ADST:
       *ud_flip = 0;
       *lr_flip = 0;
       break;
     case IDTX:
     case V_DCT:
     case H_DCT:
     case V_ADST:
     case H_ADST:
       *ud_flip = 0;
       *lr_flip = 0;
       break;
     case FLIPADST_DCT:
     case FLIPADST_ADST:
     case V_FLIPADST:
       *ud_flip = 1;
       *lr_flip = 0;
       break;
     case DCT_FLIPADST:
     case ADST_FLIPADST:
     case H_FLIPADST:
       *ud_flip = 0;
       *lr_flip = 1;
       break;
     case FLIPADST_FLIPADST:
       *ud_flip = 1;
       *lr_flip = 1;
       break;
     default:
       *ud_flip = 0;
       *lr_flip = 0;
       assert(0);
   }
 }

 static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
   get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
 }

 // Utility function that returns the log of the ratio of the col and row
 // sizes.
 static INLINE int get_rect_tx_log_ratio(int col, int row) {
   if (col == row) return 0;
   if (col > row) {
     if (col == row * 2) return 1;
     if (col == row * 4) return 2;
     assert(0 && "Unsupported transform size");
   } else {
     if (row == col * 2) return -1;
     if (row == col * 4) return -2;
     assert(0 && "Unsupported transform size");
   }
   return 0;  // Invalid
 }

 void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
                              const TXFM_2D_FLIP_CFG *cfg, int bd);

 void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
                              const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
                              int bd);

 void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
                           TXFM_2D_FLIP_CFG *cfg);
 void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
                           TXFM_2D_FLIP_CFG *cfg);
 extern const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D];
 extern const int8_t av1_txfm_stage_num_list[TXFM_TYPES];
 static INLINE int get_txw_idx(TX_SIZE tx_size) {
   return tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
 }
 static INLINE int get_txh_idx(TX_SIZE tx_size) {
   return tx_size_high_log2[tx_size] - tx_size_high_log2[0];
 }

 void av1_range_check_buf(int32_t stage, const int32_t *input,
                          const int32_t *buf, int32_t size, int8_t bit);
 #define MAX_TXWH_IDX 5
 #ifdef __cplusplus
 }
 #endif  // __cplusplus

 #endif  // AOM_AV1_COMMON_AV1_TXFM_H_
	/*
	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#ifndef AOM_AV1_COMMON_AV1_TXFM_H_
	#define AOM_AV1_COMMON_AV1_TXFM_H_

	#include <assert.h>
	#include <math.h>
	#include <stdio.h>

	#include "config/aom_config.h"

	#include "av1/common/enums.h"
	#include "av1/common/blockd.h"
	#include "aom/aom_integer.h"
	#include "aom_dsp/aom_dsp_common.h"

	#ifdef __cplusplus
	extern "C" {
	#endif

	#if !defined(DO_RANGE_CHECK_CLAMP)
	#define DO_RANGE_CHECK_CLAMP 0
	#endif

	extern const int32_t av1_cospi_arr_data[4][64];
	extern const int32_t av1_sinpi_arr_data[4][5];

	#define MAX_TXFM_STAGE_NUM 12

	static const int cos_bit_min = 10;

	#define NewSqrt2Bits ((int32_t)12)
	// 2^12 * sqrt(2)
	static const int32_t NewSqrt2 = 5793;
	// 2^12 / sqrt(2)
	static const int32_t NewInvSqrt2 = 2896;

	static INLINE const int32_t *cospi_arr(int n) {
	return av1_cospi_arr_data[n - cos_bit_min];
	}

	static INLINE const int32_t *sinpi_arr(int n) {
	return av1_sinpi_arr_data[n - cos_bit_min];
	}

	// The reduced bit-width and permuted arrays are only used in the Arm Neon
	// implementations in av1_fwd_txfm2d_neon.c and highbd_fwd_txfm_neon.c for now.
	#if HAVE_NEON
	// Store cospi/sinpi costants in Q2.13 format.
	// See: https://en.wikipedia.org/wiki/Q_(number_format)
	extern const int16_t av1_cospi_arr_q13_data[4][128];
	extern const int16_t av1_sinpi_arr_q13_data[4][4];

	extern const int32_t av1_cospi_arr_s32_data[4][66];

	static INLINE const int16_t *cospi_arr_q13(int n) {
	return av1_cospi_arr_q13_data[n - cos_bit_min];
	}

	static INLINE const int16_t *sinpi_arr_q13(int n) {
	return av1_sinpi_arr_q13_data[n - cos_bit_min];
	}

	static INLINE const int32_t *cospi_arr_s32(int n) {
	return av1_cospi_arr_s32_data[n - cos_bit_min];
	}
	#endif // HAVE_NEON

	static INLINE int32_t range_check_value(int32_t value, int8_t bit) {
	#if CONFIG_COEFFICIENT_RANGE_CHECKING
	const int64_t max_value = (1LL << (bit - 1)) - 1;
	const int64_t min_value = -(1LL << (bit - 1));
	if (value < min_value \|\| value > max_value) {
	fprintf(stderr, "coeff out of bit range, value: %d bit %d\n", value, bit);
	#if !CONFIG_AV1_ENCODER
	assert(0);
	#endif
	}
	#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
	#if DO_RANGE_CHECK_CLAMP
	bit = AOMMIN(bit, 31);
	return clamp(value, -(1 << (bit - 1)), (1 << (bit - 1)) - 1);
	#endif // DO_RANGE_CHECK_CLAMP
	(void)bit;
	return value;
	}

	static INLINE int32_t round_shift(int64_t value, int bit) {
	assert(bit >= 1);
	return (int32_t)((value + (1ll << (bit - 1))) >> bit);
	}

	static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
	int bit) {
	int64_t result_64 = (int64_t)(w0 * in0) + (int64_t)(w1 * in1);
	int64_t intermediate = result_64 + (1LL << (bit - 1));
	// NOTE(rachelbarker): The value 'result_64' may not necessarily fit
	// into 32 bits. However, the result of this function is nominally
	// ROUND_POWER_OF_TWO_64(result_64, bit)
	// and that is required to fit into stage_range[stage] many bits
	// (checked by range_check_buf()).
	//
	// Here we've unpacked that rounding operation, and it can be shown
	// that the value of 'intermediate' here does fit into 32 bits
	// for any conformant bitstream.
	// The upshot is that, if you do all this calculation using
	// wrapping 32-bit arithmetic instead of (non-wrapping) 64-bit arithmetic,
	// then you'll still get the correct result.
	// To provide a check on this logic, we assert that 'intermediate'
	// would fit into an int32 if range checking is enabled.
	#if CONFIG_COEFFICIENT_RANGE_CHECKING
	assert(intermediate >= INT32_MIN && intermediate <= INT32_MAX);
	#endif
	return (int32_t)(intermediate >> bit);
	}

	static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
	int bd) {
	return clip_pixel_highbd(dest + (int)trans, bd);
	}

	typedef void (TxfmFunc)(const int32_t input, int32_t *output, int8_t cos_bit,
	const int8_t *stage_range);

	typedef void (FwdTxfm2dFunc)(const int16_t input, int32_t *output, int stride,
	TX_TYPE tx_type, int bd);

	enum {
	TXFM_TYPE_DCT4,
	TXFM_TYPE_DCT8,
	TXFM_TYPE_DCT16,
	TXFM_TYPE_DCT32,
	TXFM_TYPE_DCT64,
	TXFM_TYPE_ADST4,
	TXFM_TYPE_ADST8,
	TXFM_TYPE_ADST16,
	TXFM_TYPE_IDENTITY4,
	TXFM_TYPE_IDENTITY8,
	TXFM_TYPE_IDENTITY16,
	TXFM_TYPE_IDENTITY32,
	TXFM_TYPES,
	TXFM_TYPE_INVALID,
	} UENUM1BYTE(TXFM_TYPE);

	typedef struct TXFM_2D_FLIP_CFG {
	TX_SIZE tx_size;
	int ud_flip; // flip upside down
	int lr_flip; // flip left to right
	const int8_t *shift;
	int8_t cos_bit_col;
	int8_t cos_bit_row;
	int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
	int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
	TXFM_TYPE txfm_type_col;
	TXFM_TYPE txfm_type_row;
	int stage_num_col;
	int stage_num_row;
	} TXFM_2D_FLIP_CFG;

	static INLINE void get_flip_cfg(TX_TYPE tx_type, int ud_flip, int lr_flip) {
	switch (tx_type) {
	case DCT_DCT:
	case ADST_DCT:
	case DCT_ADST:
	case ADST_ADST:
	*ud_flip = 0;
	*lr_flip = 0;
	break;
	case IDTX:
	case V_DCT:
	case H_DCT:
	case V_ADST:
	case H_ADST:
	*ud_flip = 0;
	*lr_flip = 0;
	break;
	case FLIPADST_DCT:
	case FLIPADST_ADST:
	case V_FLIPADST:
	*ud_flip = 1;
	*lr_flip = 0;
	break;
	case DCT_FLIPADST:
	case ADST_FLIPADST:
	case H_FLIPADST:
	*ud_flip = 0;
	*lr_flip = 1;
	break;
	case FLIPADST_FLIPADST:
	*ud_flip = 1;
	*lr_flip = 1;
	break;
	default:
	*ud_flip = 0;
	*lr_flip = 0;
	assert(0);
	}
	}

	static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
	get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
	}

	// Utility function that returns the log of the ratio of the col and row
	// sizes.
	static INLINE int get_rect_tx_log_ratio(int col, int row) {
	if (col == row) return 0;
	if (col > row) {
	if (col == row * 2) return 1;
	if (col == row * 4) return 2;
	assert(0 && "Unsupported transform size");
	} else {
	if (row == col * 2) return -1;
	if (row == col * 4) return -2;
	assert(0 && "Unsupported transform size");
	}
	return 0; // Invalid
	}

	void av1_gen_fwd_stage_range(int8_t stage_range_col, int8_t stage_range_row,
	const TXFM_2D_FLIP_CFG *cfg, int bd);

	void av1_gen_inv_stage_range(int8_t stage_range_col, int8_t stage_range_row,
	const TXFM_2D_FLIP_CFG *cfg, TX_SIZE tx_size,
	int bd);

	void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
	TXFM_2D_FLIP_CFG *cfg);
	void av1_get_inv_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size,
	TXFM_2D_FLIP_CFG *cfg);
	extern const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D];
	extern const int8_t av1_txfm_stage_num_list[TXFM_TYPES];
	static INLINE int get_txw_idx(TX_SIZE tx_size) {
	return tx_size_wide_log2[tx_size] - tx_size_wide_log2[0];
	}
	static INLINE int get_txh_idx(TX_SIZE tx_size) {
	return tx_size_high_log2[tx_size] - tx_size_high_log2[0];
	}

	void av1_range_check_buf(int32_t stage, const int32_t *input,
	const int32_t *buf, int32_t size, int8_t bit);
	#define MAX_TXWH_IDX 5
	#ifdef __cplusplus
	}
	#endif // __cplusplus

	#endif // AOM_AV1_COMMON_AV1_TXFM_H_