Misc fixes for 32x64 and 64x32 transforms
Change-Id: Ic843e99bd9b79cb9a0a26b95e3a48717ff2ec2a5
diff --git a/av1/common/av1_fwd_txfm1d.c b/av1/common/av1_fwd_txfm1d.c
index cfe2741..c9c7f43 100644
--- a/av1/common/av1_fwd_txfm1d.c
+++ b/av1/common/av1_fwd_txfm1d.c
@@ -1547,6 +1547,16 @@
for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
range_check(0, input, output, 32, stage_range[0]);
}
+
+#if CONFIG_TX64X64
+void av1_fidentity64_c(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ (void)cos_bit;
+ for (int i = 0; i < 64; ++i)
+ output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+ range_check(0, input, output, 64, stage_range[0]);
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
diff --git a/av1/common/av1_fwd_txfm1d.h b/av1/common/av1_fwd_txfm1d.h
index f641930..f880239 100644
--- a/av1/common/av1_fwd_txfm1d.h
+++ b/av1/common/av1_fwd_txfm1d.h
@@ -26,8 +26,10 @@
const int8_t *cos_bit, const int8_t *stage_range);
void av1_fdct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
void av1_fdct64_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#endif // CONFIG_TX64X64
void av1_fadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
@@ -46,6 +48,10 @@
const int8_t *cos_bit, const int8_t *stage_range);
void av1_fidentity32_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
+void av1_fidentity64_c(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#ifdef __cplusplus
diff --git a/av1/common/av1_fwd_txfm2d.c b/av1/common/av1_fwd_txfm2d.c
index 650b5bd..d4ff86b 100644
--- a/av1/common/av1_fwd_txfm2d.c
+++ b/av1/common/av1_fwd_txfm2d.c
@@ -36,6 +36,9 @@
case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c;
+#if CONFIG_TX64X64
+ case TXFM_TYPE_IDENTITY64: return av1_fidentity64_c;
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
default: assert(0); return NULL;
}
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 3bd8686..51f4b63 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c
@@ -1593,6 +1593,16 @@
for (int i = 0; i < 32; ++i) output[i] = input[i] * 4;
range_check(0, input, output, 32, stage_range[0]);
}
+
+#if CONFIG_TX64X64
+void av1_iidentity64_c(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ (void)cos_bit;
+ for (int i = 0; i < 64; ++i)
+ output[i] = (int32_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
+ range_check(0, input, output, 64, stage_range[0]);
+}
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
diff --git a/av1/common/av1_inv_txfm1d.h b/av1/common/av1_inv_txfm1d.h
index 037a3c6..8996f7c 100644
--- a/av1/common/av1_inv_txfm1d.h
+++ b/av1/common/av1_inv_txfm1d.h
@@ -26,8 +26,10 @@
const int8_t *cos_bit, const int8_t *stage_range);
void av1_idct32_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
void av1_idct64_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#endif // CONFIG_TX64X64
void av1_iadst4_new(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
@@ -46,6 +48,10 @@
const int8_t *cos_bit, const int8_t *stage_range);
void av1_iidentity32_c(const int32_t *input, int32_t *output,
const int8_t *cos_bit, const int8_t *stage_range);
+#if CONFIG_TX64X64
+void av1_iidentity64_c(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#ifdef __cplusplus
diff --git a/av1/common/av1_inv_txfm1d_cfg.h b/av1/common/av1_inv_txfm1d_cfg.h
index 78c9585..aeb2aec 100644
--- a/av1/common/av1_inv_txfm1d_cfg.h
+++ b/av1/common/av1_inv_txfm1d_cfg.h
@@ -173,6 +173,7 @@
TXFM_TYPE_DCT32 // .txfm_type
};
+#if CONFIG_TX64X64
// ---------------- row config inv_dct_64 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_dct_64 = {
64, // .txfm_size
@@ -182,6 +183,7 @@
inv_cos_bit_row_dct_64, // .cos_bit
TXFM_TYPE_DCT64, // .txfm_type_col
};
+#endif // CONFIG_TX64X64
// ---------------- row config inv_adst_4 ----------------
static const TXFM_1D_CFG inv_txfm_1d_row_cfg_adst_4 = {
@@ -353,5 +355,17 @@
NULL, // .cos_bit
TXFM_TYPE_IDENTITY32, // .txfm_type
};
+
+#if CONFIG_TX64X64
+// ---------------- row/col config inv_identity_32 ----------------
+static const TXFM_1D_CFG inv_txfm_1d_cfg_identity_64 = {
+ 64, // .txfm_size
+ 1, // .stage_num
+ inv_shift_64, // .shift
+ inv_stage_range_idx_64, // .stage_range
+ NULL, // .cos_bit
+ TXFM_TYPE_IDENTITY64, // .txfm_type
+};
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#endif // AV1_INV_TXFM2D_CFG_H_
diff --git a/av1/common/av1_inv_txfm2d.c b/av1/common/av1_inv_txfm2d.c
index 86d16b3..2c01f46 100644
--- a/av1/common/av1_inv_txfm2d.c
+++ b/av1/common/av1_inv_txfm2d.c
@@ -34,6 +34,9 @@
case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
case TXFM_TYPE_IDENTITY32: return av1_iidentity32_c;
+#if CONFIG_TX64X64
+ case TXFM_TYPE_IDENTITY64: return av1_iidentity64_c;
+#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
default: assert(0); return NULL;
}
@@ -46,14 +49,22 @@
NULL,
#endif
&inv_txfm_1d_col_cfg_dct_4, &inv_txfm_1d_col_cfg_dct_8,
- &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32 },
+ &inv_txfm_1d_col_cfg_dct_16, &inv_txfm_1d_col_cfg_dct_32,
+#if CONFIG_TX64X64
+ &inv_txfm_1d_col_cfg_dct_64
+#endif // CONFIG_TX64X64
+ },
// ADST
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
- &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+ &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
+#if CONFIG_TX64X64
+ NULL
+#endif // CONFIG_TX64X64
+ },
#if CONFIG_EXT_TX
// FLIPADST
{
@@ -61,14 +72,22 @@
NULL,
#endif
&inv_txfm_1d_col_cfg_adst_4, &inv_txfm_1d_col_cfg_adst_8,
- &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32 },
+ &inv_txfm_1d_col_cfg_adst_16, &inv_txfm_1d_col_cfg_adst_32,
+#if CONFIG_TX64X64
+ NULL
+#endif // CONFIG_TX64X64
+ },
// IDENTITY
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
- &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
+ &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
+#if CONFIG_TX64X64
+ &inv_txfm_1d_cfg_identity_64
+#endif // CONFIG_TX64X64
+ },
#endif // CONFIG_EXT_TX
};
@@ -79,14 +98,22 @@
NULL,
#endif
&inv_txfm_1d_row_cfg_dct_4, &inv_txfm_1d_row_cfg_dct_8,
- &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32 },
+ &inv_txfm_1d_row_cfg_dct_16, &inv_txfm_1d_row_cfg_dct_32,
+#if CONFIG_TX64X64
+ &inv_txfm_1d_row_cfg_dct_64,
+#endif // CONFIG_TX64X64
+ },
// ADST
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
- &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+ &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
+#if CONFIG_TX64X64
+ NULL
+#endif // CONFIG_TX64X64
+ },
#if CONFIG_EXT_TX
// FLIPADST
{
@@ -94,14 +121,22 @@
NULL,
#endif
&inv_txfm_1d_row_cfg_adst_4, &inv_txfm_1d_row_cfg_adst_8,
- &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32 },
+ &inv_txfm_1d_row_cfg_adst_16, &inv_txfm_1d_row_cfg_adst_32,
+#if CONFIG_TX64X64
+ NULL
+#endif // CONFIG_TX64X64
+ },
// IDENTITY
{
#if CONFIG_CHROMA_2X2
NULL,
#endif
&inv_txfm_1d_cfg_identity_4, &inv_txfm_1d_cfg_identity_8,
- &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32 },
+ &inv_txfm_1d_cfg_identity_16, &inv_txfm_1d_cfg_identity_32,
+#if CONFIG_TX64X64
+ &inv_txfm_1d_cfg_identity_64
+#endif // CONFIG_TX64X64
+ },
#endif // CONFIG_EXT_TX
};
@@ -117,6 +152,7 @@
return cfg;
}
+#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
switch (tx_type) {
@@ -130,6 +166,33 @@
return cfg;
}
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_32x64_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_64;
+ cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_32;
+ set_flip_cfg(tx_type, &cfg);
+ break;
+ default: assert(0);
+ }
+ return cfg;
+}
+
+TXFM_2D_FLIP_CFG av1_get_inv_txfm_64x32_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL, NULL };
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg.col_cfg = &inv_txfm_1d_col_cfg_dct_32;
+ cfg.row_cfg = &inv_txfm_1d_row_cfg_dct_64;
+ set_flip_cfg(tx_type, &cfg);
+ break;
+ default: assert(0);
+ }
+ return cfg;
+}
+#endif // CONFIG_TX64X64
+
void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const TXFM_2D_FLIP_CFG *cfg, int8_t fwd_shift,
int bd) {
@@ -353,15 +416,30 @@
void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
int txfm_buf[64 * 64 + 64 + 64];
- TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_64x64_cfg(tx_type);
- inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, -4, bd);
- assert(fwd_shift_sum[TX_64X64] == -4);
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X64, bd);
}
void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
+#if CONFIG_TXMG
int txfm_buf[64 * 32 + 64 + 64];
- inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
+ int32_t rinput[64 * 32];
+ uint16_t routput[64 * 32];
+ int tx_size = TX_64X32;
+ int rtx_size = av1_rotate_tx_size(tx_size);
+ int rtx_type = av1_rotate_tx_type(tx_type);
+ int w = tx_size_wide[tx_size];
+ int h = tx_size_high[tx_size];
+ int rw = h;
+ int rh = w;
+ transpose_int32(rinput, rw, input, w, w, h);
+ transpose_uint16(routput, rw, output, stride, w, h);
+ inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
+ transpose_uint16(output, stride, routput, rw, rw, rh);
+#else
+ int txfm_buf[64 * 32 + 64 + 64];
+ inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_64X32, bd);
+#endif
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
diff --git a/av1/common/av1_txfm.h b/av1/common/av1_txfm.h
index a61e25d..4c0a2d1 100644
--- a/av1/common/av1_txfm.h
+++ b/av1/common/av1_txfm.h
@@ -134,6 +134,7 @@
TXFM_TYPE_IDENTITY8,
TXFM_TYPE_IDENTITY16,
TXFM_TYPE_IDENTITY32,
+ TXFM_TYPE_IDENTITY64,
} TXFM_TYPE;
typedef struct TXFM_1D_CFG {
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 01214f3..eadd4eb 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -988,9 +988,9 @@
const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
#if CONFIG_CB4X4 && USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
(void)bs;
- if (tx_size_sqr > TX_32X32) return EXT_TX_SET_DCTONLY;
+ if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
#else
- if (tx_size_sqr > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
+ if (tx_size_sqr_up > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
#endif
if (use_reduced_set)
return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index ec28000..d063122 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -665,9 +665,9 @@
TX_8X16, TX_16X8, TX_16X16,
// 16X32, 32X16, 32X32
TX_16X32, TX_32X16, TX_32X32,
+#if CONFIG_TX64X64
// 32X64, 64X32,
TX_32X32, TX_32X32,
-#if CONFIG_TX64X64
// 64X64
TX_64X64,
#if CONFIG_EXT_PARTITION
@@ -675,6 +675,8 @@
TX_64X64, TX_64X64, TX_64X64,
#endif // CONFIG_EXT_PARTITION
#else
+ // 32X64, 64X32,
+ TX_32X32, TX_32X32,
// 64X64
TX_32X32,
#if CONFIG_EXT_PARTITION
@@ -775,9 +777,9 @@
TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
// 16X32, 32X16, 32X32
TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
- // 32X64, 64X32,
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
#if CONFIG_TX64X64
+ // 32X64, 64X32,
+ TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
// 64X64
TX_64X64 - TX_8X8,
#if CONFIG_EXT_PARTITION
@@ -785,6 +787,8 @@
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
#endif // CONFIG_EXT_PARTITION
#else
+ // 32X64, 64X32,
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
// 64X64
TX_32X32 - TX_8X8,
#if CONFIG_EXT_PARTITION
@@ -818,9 +822,9 @@
TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_16X16 - TX_8X8,
// 16X32, 32X16, 32X32
TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_32X32 - TX_8X8,
- // 32X64, 64X32,
- TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
#if CONFIG_TX64X64
+ // 32X64, 64X32,
+ TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
// 64X64
TX_64X64 - TX_8X8,
#if CONFIG_EXT_PARTITION
@@ -828,6 +832,8 @@
TX_64X64 - TX_8X8, TX_64X64 - TX_8X8, TX_64X64 - TX_8X8,
#endif // CONFIG_EXT_PARTITION
#else
+ // 32X64, 64X32,
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
// 64X64
TX_32X32 - TX_8X8,
#if CONFIG_EXT_PARTITION
diff --git a/av1/common/idct.c b/av1/common/idct.c
index ca1c361..5cde3b7 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -1566,7 +1566,7 @@
for (i = 0; i < n; ++i) {
IHT_64x32[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
input += n2;
}
@@ -1628,7 +1628,7 @@
for (i = 0; i < n2; ++i) {
IHT_32x64[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
- tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * InvSqrt2);
input += n;
}
@@ -2107,6 +2107,7 @@
static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
+ assert(tx_type == DCT_DCT);
switch (tx_type) {
#if !CONFIG_DAALA_DCT64
case DCT_DCT: idct64x64_add(input, dest, stride, txfm_param); break;
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 146e748..a5a1a9f 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -4572,7 +4572,7 @@
1726, 1789, 1789, 1852, 1852, 1915, 1915, 1978, 1978, 2041, 1727, 1790, 1790,
1853, 1853, 1916, 1916, 1979, 1979, 2042, 1791, 1854, 1854, 1917, 1917, 1980,
1980, 2043, 1855, 1918, 1918, 1981, 1981, 2044, 1919, 1982, 1982, 2045, 1983,
- 2046,
+ 2046, 0, 0
};
DECLARE_ALIGNED(16, static const int16_t,