DST7 16p & 32p experiment (No SIMD)
Change-Id: Ia8cd6da9bf885da9ae8f8722f42862b0eb52eebc
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 8d69efc..f081fbb 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c
@@ -818,6 +818,21 @@
bf1[7] = -bf0[1];
}
+#if CONFIG_DST7_16X16
+void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ assert(output != input);
+ (void)cos_bit;
+ (void)stage_range;
+ for (int32_t i = 0; i < 16; i++) {
+ int32_t sum = 0;
+ for (int32_t j = 0; j < 16; j++) {
+ sum += input[j] * dst7_16x16[j][i];
+ }
+ output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_16X16_PREC_BITS);
+ }
+}
+#else
void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
assert(output != input);
@@ -1025,6 +1040,22 @@
bf1[14] = bf0[9];
bf1[15] = -bf0[1];
}
+#endif // CONFIG_DST7_16X16
+
+#if CONFIG_DST_32X32
+void av1_iadst32(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ (void)cos_bit;
+ (void)stage_range;
+ for (int32_t i = 0; i < 32; i++) {
+ int32_t sum = 0;
+ for (int32_t j = 0; j < 32; j++) {
+ sum += input[j] * dst7_32x32[j][i];
+ }
+ output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_32X32_PREC_BITS);
+ }
+}
+#endif
void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
diff --git a/av1/common/av1_inv_txfm1d.h b/av1/common/av1_inv_txfm1d.h
index e1d5d98..b90643e 100644
--- a/av1/common/av1_inv_txfm1d.h
+++ b/av1/common/av1_inv_txfm1d.h
@@ -45,6 +45,10 @@
const int8_t *stage_range);
void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
+#if CONFIG_DST_32X32
+void av1_iadst32(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+#endif
void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
diff --git a/av1/common/av1_inv_txfm2d.c b/av1/common/av1_inv_txfm2d.c
index 559d121..3b5ae91 100644
--- a/av1/common/av1_inv_txfm2d.c
+++ b/av1/common/av1_inv_txfm2d.c
@@ -121,6 +121,9 @@
case TXFM_TYPE_ADST4: return av1_iadst4;
case TXFM_TYPE_ADST8: return av1_iadst8;
case TXFM_TYPE_ADST16: return av1_iadst16;
+#if CONFIG_DST_32X32
+ case TXFM_TYPE_ADST32: return av1_iadst32;
+#endif // CONFIG_DST_32X32
case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c;
case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
diff --git a/av1/common/av1_txfm.c b/av1/common/av1_txfm.c
index ac43402..498600d 100644
--- a/av1/common/av1_txfm.c
+++ b/av1/common/av1_txfm.c
@@ -59,6 +59,142 @@
14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
};
+#if CONFIG_DST7_16X16
+const int16_t dst7_16x16[16][16] = {
+ { 12, 24, 36, 47, 57, 69, 78, 87, 94, 103, 109, 115, 118, 123, 125, 126 },
+ { 36, 69, 94, 115, 125, 125, 115, 94, 69, 36, 0, -36, -69, -94, -115, -125 },
+ { 57, 103, 125, 118, 87, 36, -24, -78, -115, -126, -109, -69, -12, 47, 94,
+ 123 },
+ { 78, 123, 115, 57, -24, -94, -126, -103, -36, 47, 109, 125, 87, 12, -69,
+ -118 },
+ { 94, 125, 69, -36, -115, -115, -36, 69, 125, 94, 0, -94, -125, -69, 36,
+ 115 },
+ { 109, 109, 0, -109, -109, 0, 109, 109, 0, -109, -109, 0, 109, 109, 0, -109 },
+ { 118, 78, -69, -123, -12, 115, 87, -57, -125, -24, 109, 94, -47, -126, -36,
+ 103 },
+ { 125, 36, -115, -69, 94, 94, -69, -115, 36, 125, 0, -125, -36, 115, 69,
+ -94 },
+ { 126, -12, -125, 24, 123, -36, -118, 47, 115, -57, -109, 69, 103, -78, -94,
+ 87 },
+ { 123, -57, -94, 103, 47, -125, 12, 118, -69, -87, 109, 36, -126, 24, 115,
+ -78 },
+ { 115, -94, -36, 125, -69, -69, 125, -36, -94, 115, 0, -115, 94, 36, -125,
+ 69 },
+ { 103, -118, 36, 78, -126, 69, 47, -123, 94, 12, -109, 115, -24, -87, 125,
+ -57 },
+ { 87, -126, 94, -12, -78, 125, -103, 24, 69, -123, 109, -36, -57, 118, -115,
+ 47 },
+ { 69, -115, 125, -94, 36, 36, -94, 125, -115, 69, 0, -69, 115, -125, 94,
+ -36 },
+ { 47, -87, 115, -126, 118, -94, 57, -12, -36, 78, -109, 125, -123, 103, -69,
+ 24 },
+ { 24, -47, 69, -87, 103, -115, 123, -126, 125, -118, 109, -94, 78, -57, 36,
+ -12 },
+};
+#endif
+
+#if CONFIG_DST_32X32
+const int16_t
+ dst7_32x32[32][32] = {
+ { 6, 12, 18, 24, 30, 36, 42, 48, 54, 59, 64,
+ 70, 75, 80, 84, 89, 93, 97, 101, 105, 108, 111,
+ 114, 116, 119, 121, 123, 124, 125, 126, 127, 127 },
+ { 18, 36, 54, 70, 84, 97, 108, 116, 123, 126, 127,
+ 125, 121, 114, 105, 93, 80, 64, 48, 30, 12, -6,
+ -24, -42, -59, -75, -89, -101, -111, -119, -124, -127 },
+ { 30, 59, 84, 105, 119, 126, 126, 119, 105, 84, 59,
+ 30, 0, -30, -59, -84, -105, -119, -126, -126, -119, -105,
+ -84, -59, -30, 0, 30, 59, 84, 105, 119, 126 },
+ { 42, 80, 108, 124, 126, 114, 89, 54, 12, -30, -70,
+ -101, -121, -127, -119, -97, -64, -24, 18, 59, 93, 116,
+ 127, 123, 105, 75, 36, -6, -48, -84, -111, -125 },
+ { 54, 97, 123, 125, 105, 64, 12, -42, -89, -119, -127,
+ -111, -75, -24, 30, 80, 114, 127, 116, 84, 36, -18,
+ -70, -108, -126, -121, -93, -48, 6, 59, 101, 124 },
+ { 64, 111, 127, 108, 59, -6, -70, -114, -127, -105, -54,
+ 12, 75, 116, 126, 101, 48, -18, -80, -119, -125, -97,
+ -42, 24, 84, 121, 124, 93, 36, -30, -89, -123 },
+ { 75, 121, 121, 75, 0, -75, -121, -121, -75, 0, 75,
+ 121, 121, 75, 0, -75, -121, -121, -75, 0, 75, 121,
+ 121, 75, 0, -75, -121, -121, -75, 0, 75, 121 },
+ { 84, 126, 105, 30, -59, -119, -119, -59, 30, 105, 126,
+ 84, 0, -84, -126, -105, -30, 59, 119, 119, 59, -30,
+ -105, -126, -84, 0, 84, 126, 105, 30, -59, -119 },
+ { 93, 127, 80, -18, -105, -124, -64, 36, 114, 119, 48,
+ -54, -121, -111, -30, 70, 125, 101, 12, -84, -127, -89,
+ 6, 97, 126, 75, -24, -108, -123, -59, 42, 116 },
+ { 101, 123, 48, -64, -126, -89, 18, 111, 116, 30, -80,
+ -127, -75, 36, 119, 108, 12, -93, -125, -59, 54, 124,
+ 97, -6, -105, -121, -42, 70, 127, 84, -24, -114 },
+ { 108, 114, 12, -101, -119, -24, 93, 123, 36, -84, -125,
+ -48, 75, 127, 59, -64, -127, -70, 54, 126, 80, -42,
+ -124, -89, 30, 121, 97, -18, -116, -105, 6, 111 },
+ { 114, 101, -24, -123, -84, 48, 127, 64, -70, -126, -42,
+ 89, 121, 18, -105, -111, 6, 116, 97, -30, -124, -80,
+ 54, 127, 59, -75, -125, -36, 93, 119, 12, -108 },
+ { 119, 84, -59, -126, -30, 105, 105, -30, -126, -59, 84,
+ 119, 0, -119, -84, 59, 126, 30, -105, -105, 30, 126,
+ 59, -84, -119, 0, 119, 84, -59, -126, -30, 105 },
+ { 123, 64, -89, -111, 30, 127, 36, -108, -93, 59, 124,
+ 6, -121, -70, 84, 114, -24, -127, -42, 105, 97, -54,
+ -125, -12, 119, 75, -80, -116, 18, 126, 48, -101 },
+ { 125, 42, -111, -80, 84, 108, -48, -124, 6, 126, 36,
+ -114, -75, 89, 105, -54, -123, 12, 127, 30, -116, -70,
+ 93, 101, -59, -121, 18, 127, 24, -119, -64, 97 },
+ { 127, 18, -124, -36, 119, 54, -111, -70, 101, 84, -89,
+ -97, 75, 108, -59, -116, 42, 123, -24, -126, 6, 127,
+ 12, -125, -30, 121, 48, -114, -64, 105, 80, -93 },
+ { 127, -6, -127, 12, 126, -18, -125, 24, 124, -30, -123,
+ 36, 121, -42, -119, 48, 116, -54, -114, 59, 111, -64,
+ -108, 70, 105, -75, -101, 80, 97, -84, -93, 89 },
+ { 126, -30, -119, 59, 105, -84, -84, 105, 59, -119, -30,
+ 126, 0, -126, 30, 119, -59, -105, 84, 84, -105, -59,
+ 119, 30, -126, 0, 126, -30, -119, 59, 105, -84 },
+ { 124, -54, -101, 97, 59, -123, -6, 125, -48, -105, 93,
+ 64, -121, -12, 126, -42, -108, 89, 70, -119, -18, 127,
+ -36, -111, 84, 75, -116, -24, 127, -30, -114, 80 },
+ { 121, -75, -75, 121, 0, -121, 75, 75, -121, 0, 121,
+ -75, -75, 121, 0, -121, 75, 75, -121, 0, 121, -75,
+ -75, 121, 0, -121, 75, 75, -121, 0, 121, -75 },
+ { 116, -93, -42, 127, -59, -80, 123, -18, -108, 105, 24,
+ -124, 75, 64, -126, 36, 97, -114, -6, 119, -89, -48,
+ 127, -54, -84, 121, -12, -111, 101, 30, -125, 70 },
+ { 111, -108, -6, 114, -105, -12, 116, -101, -18, 119, -97,
+ -24, 121, -93, -30, 123, -89, -36, 124, -84, -42, 125,
+ -80, -48, 126, -75, -54, 127, -70, -59, 127, -64 },
+ { 105, -119, 30, 84, -126, 59, 59, -126, 84, 30, -119,
+ 105, 0, -105, 119, -30, -84, 126, -59, -59, 126, -84,
+ -30, 119, -105, 0, 105, -119, 30, 84, -126, 59 },
+ { 97, -125, 64, 42, -119, 111, -24, -80, 127, -84, -18,
+ 108, -121, 48, 59, -124, 101, -6, -93, 126, -70, -36,
+ 116, -114, 30, 75, -127, 89, 12, -105, 123, -54 },
+ { 89, -127, 93, -6, -84, 127, -97, 12, 80, -126, 101,
+ -18, -75, 125, -105, 24, 70, -124, 108, -30, -64, 123,
+ -111, 36, 59, -121, 114, -42, -54, 119, -116, 48 },
+ { 80, -124, 114, -54, -30, 101, -127, 97, -24, -59, 116,
+ -123, 75, 6, -84, 125, -111, 48, 36, -105, 127, -93,
+ 18, 64, -119, 121, -70, -12, 89, -126, 108, -42 },
+ { 70, -116, 125, -93, 30, 42, -101, 127, -111, 59, 12,
+ -80, 121, -123, 84, -18, -54, 108, -127, 105, -48, -24,
+ 89, -124, 119, -75, 6, 64, -114, 126, -97, 36 },
+ { 59, -105, 126, -119, 84, -30, -30, 84, -119, 126, -105,
+ 59, 0, -59, 105, -126, 119, -84, 30, 30, -84, 119,
+ -126, 105, -59, 0, 59, -105, 126, -119, 84, -30 },
+ { 48, -89, 116, -127, 119, -93, 54, -6, -42, 84, -114,
+ 127, -121, 97, -59, 12, 36, -80, 111, -126, 123, -101,
+ 64, -18, -30, 75, -108, 125, -124, 105, -70, 24 },
+ { 36, -70, 97, -116, 126, -125, 114, -93, 64, -30, -6,
+ 42, -75, 101, -119, 127, -124, 111, -89, 59, -24, -12,
+ 48, -80, 105, -121, 127, -123, 108, -84, 54, -18 },
+ { 24, -48, 70, -89, 105, -116, 124, -127, 125, -119, 108,
+ -93, 75, -54, 30, -6, -18, 42, -64, 84, -101, 114,
+ -123, 127, -126, 121, -111, 97, -80, 59, -36, 12 },
+ { 12, -24, 36, -48, 59, -70, 80, -89, 97, -105, 111,
+ -116, 121, -124, 126, -127, 127, -125, 123, -119, 114, -108,
+ 101, -93, 84, -75, 64, -54, 42, -30, 18, -6 }
+ };
+#endif // CONFIG_DST_32X32
+
// av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1
// << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4.
const int32_t av1_sinpi_arr_data[7][5] = {
@@ -90,8 +226,12 @@
{ TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 },
{ TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 },
{ TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 },
+#if CONFIG_DST_32X32
+ { TXFM_TYPE_DCT32, TXFM_TYPE_ADST32, TXFM_TYPE_ADST32, TXFM_TYPE_IDENTITY32 },
+#else
{ TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID,
TXFM_TYPE_IDENTITY32 },
+#endif // CONFIG_DST_32X32
{ TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID }
};
@@ -108,6 +248,9 @@
1, // TXFM_TYPE_IDENTITY8
1, // TXFM_TYPE_IDENTITY16
1, // TXFM_TYPE_IDENTITY32
+#if CONFIG_DST_32X32
+ 1, // TXFM_TYPE_ADST32
+#endif
};
void av1_range_check_buf(int32_t stage, const int32_t *input,
diff --git a/av1/common/av1_txfm.h b/av1/common/av1_txfm.h
index 20049b6..77bb19e 100644
--- a/av1/common/av1_txfm.h
+++ b/av1/common/av1_txfm.h
@@ -34,6 +34,16 @@
extern const int32_t av1_cospi_arr_data[7][64];
extern const int32_t av1_sinpi_arr_data[7][5];
+#if CONFIG_DST7_16X16
+extern const int16_t dst7_16x16[16][16];
+#define DST_16X16_PREC_BITS 7
+#endif
+
+#if CONFIG_DST_32X32
+extern const int16_t dst7_32x32[32][32];
+#define DST_32X32_PREC_BITS 7
+#endif // CONFIG_DST_32X32
+
#define MAX_TXFM_STAGE_NUM 12
static const int cos_bit_min = 10;
@@ -125,6 +135,9 @@
TXFM_TYPE_IDENTITY8,
TXFM_TYPE_IDENTITY16,
TXFM_TYPE_IDENTITY32,
+#if CONFIG_DST_32X32
+ TXFM_TYPE_ADST32,
+#endif
TXFM_TYPES,
TXFM_TYPE_INVALID,
} UENUM1BYTE(TXFM_TYPE);
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index d5f3ff9..c535ca8 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -1026,6 +1026,18 @@
{ EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT },
};
+#if CONFIG_DST_32X32
+static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
+ int use_reduced_set) {
+ const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
+ if (tx_size_sqr_up == TX_64X64) return EXT_TX_SET_DCTONLY;
+ if (tx_size_sqr_up == TX_32X32) return EXT_TX_SET_DTT4_IDTX;
+ if (use_reduced_set)
+ return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
+ const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
+ return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16];
+}
+#else
static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
int use_reduced_set) {
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
@@ -1037,6 +1049,7 @@
const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16];
}
+#endif // CONFIG_DST_32X32
// Maps tx set types to the indices.
static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
diff --git a/av1/common/idct.c b/av1/common/idct.c
index bff438f..0e9c727 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -290,9 +290,32 @@
tmp[r * tmp_stride + c] = dst[r * stride + c];
}
}
-
+#if CONFIG_DST7_16X16 && CONFIG_DST_32X32
+ if (tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32 ||
+ tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16)
+ av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+#elif CONFIG_DST7_16X16
+ if (tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16)
+ av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+#elif CONFIG_DST_32X32
+ if (tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32)
+ av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ txfm_param);
+#else
av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
txfm_param);
+#endif
for (int r = 0; r < h; ++r) {
for (int c = 0; c < w; ++c) {
@@ -314,9 +337,56 @@
&txfm_param);
assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]);
+#if CONFIG_DST7_16X16 || CONFIG_DST_32X32
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+#endif
+
if (txfm_param.is_hbd) {
+#if CONFIG_DST7_16X16 && CONFIG_DST_32X32
+ if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16 ||
+ tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#elif CONFIG_DST7_16X16
+ if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) &&
+ allowed_tx_mask)
+ av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#elif CONFIG_DST_32X32
+ if ((tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#else
av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#endif // CONFIG_DST7_16X16 && CONFIG_DST_32X32
} else {
+#if CONFIG_DST7_16X16 && CONFIG_DST_32X32
+ if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16 ||
+ tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#elif CONFIG_DST7_16X16
+ if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) &&
+ allowed_tx_mask)
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#elif CONFIG_DST_32X32
+ if ((tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param);
+ else
+ av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#else
av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+#endif // CONFIG_DST7_16X16 && CONFIG_DST_32X32
}
}
diff --git a/av1/encoder/av1_fwd_txfm1d.c b/av1/encoder/av1_fwd_txfm1d.c
index 6601c19..854acfa 100644
--- a/av1/encoder/av1_fwd_txfm1d.c
+++ b/av1/encoder/av1_fwd_txfm1d.c
@@ -846,6 +846,20 @@
av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
}
+#if CONFIG_DST7_16X16
+void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ (void)cos_bit;
+ (void)stage_range;
+ for (int32_t i = 0; i < 16; i++) {
+ int32_t sum = 0;
+ for (int32_t j = 0; j < 16; j++) {
+ sum += input[j] * dst7_16x16[i][j];
+ }
+ output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_16X16_PREC_BITS);
+ }
+}
+#else
void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
const int32_t size = 16;
@@ -1060,6 +1074,22 @@
bf1[15] = bf0[0];
av1_range_check_buf(stage, input, bf1, size, stage_range[stage]);
}
+#endif // CONFIG_DST7_16X16
+
+#if CONFIG_DST_32X32
+void av1_fadst32(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range) {
+ (void)cos_bit;
+ (void)stage_range;
+ for (int32_t i = 0; i < 32; i++) {
+ int32_t sum = 0;
+ for (int32_t j = 0; j < 32; j++) {
+ sum += input[j] * dst7_32x32[i][j];
+ }
+ output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_32X32_PREC_BITS);
+ }
+}
+#endif
void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range) {
diff --git a/av1/encoder/av1_fwd_txfm1d.h b/av1/encoder/av1_fwd_txfm1d.h
index 9ef54fe..877b40b 100644
--- a/av1/encoder/av1_fwd_txfm1d.h
+++ b/av1/encoder/av1_fwd_txfm1d.h
@@ -34,6 +34,10 @@
const int8_t *stage_range);
void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
+#if CONFIG_DST_32X32
+void av1_fadst32(const int32_t *input, int32_t *output, int8_t cos_bit,
+ const int8_t *stage_range);
+#endif
void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit,
const int8_t *stage_range);
void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
diff --git a/av1/encoder/av1_fwd_txfm2d.c b/av1/encoder/av1_fwd_txfm2d.c
index bcb829d..98d2887 100644
--- a/av1/encoder/av1_fwd_txfm2d.c
+++ b/av1/encoder/av1_fwd_txfm2d.c
@@ -30,6 +30,9 @@
case TXFM_TYPE_ADST4: return av1_fadst4;
case TXFM_TYPE_ADST8: return av1_fadst8;
case TXFM_TYPE_ADST16: return av1_fadst16;
+#if CONFIG_DST_32X32
+ case TXFM_TYPE_ADST32: return av1_fadst32;
+#endif // CONFIG_DST_32X32
case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c;
case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c;
case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c;
@@ -364,6 +367,10 @@
static const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 };
static const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 };
+#if CONFIG_DST_32X32
+static const int8_t fadst32_range_mult2[1] = { 9 };
+#endif
+
static const int8_t fidtx4_range_mult2[1] = { 1 };
static const int8_t fidtx8_range_mult2[1] = { 2 };
static const int8_t fidtx16_range_mult2[1] = { 3 };
@@ -380,10 +387,22 @@
#endif
static const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = {
- fdct4_range_mult2, fdct8_range_mult2, fdct16_range_mult2,
- fdct32_range_mult2, fdct64_range_mult2, fadst4_range_mult2,
- fadst8_range_mult2, fadst16_range_mult2, fidtx4_range_mult2,
- fidtx8_range_mult2, fidtx16_range_mult2, fidtx32_range_mult2
+ fdct4_range_mult2,
+ fdct8_range_mult2,
+ fdct16_range_mult2,
+ fdct32_range_mult2,
+ fdct64_range_mult2,
+ fadst4_range_mult2,
+ fadst8_range_mult2,
+ fadst16_range_mult2,
+ fidtx4_range_mult2,
+ fidtx8_range_mult2,
+ fidtx16_range_mult2,
+ fidtx32_range_mult2
+#if CONFIG_DST_32X32
+ ,
+ fadst32_range_mult2,
+#endif
};
static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) {
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 0699085..bd09770 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -109,7 +109,16 @@
int32_t *dst_coeff = (int32_t *)coeff;
const TX_TYPE tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
+#if CONFIG_DST7_16X16
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ else
+ av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#else
av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#endif // CONFIG_DST7_16X16
}
static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
@@ -117,49 +126,132 @@
int32_t *dst_coeff = (int32_t *)coeff;
const TX_TYPE tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
+#if CONFIG_DST7_16X16
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ else
+ av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#else
av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#endif // CONFIG_DST7_16X16
}
static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST_32X32
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride,
+ txfm_param->tx_type, txfm_param->bd);
+ else
+ av1_fwd_txfm2d_16x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
+ assert(txfm_param->tx_type == DCT_DCT || txfm_param->tx_type == IDTX);
av1_fwd_txfm2d_16x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST_32X32
}
static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST_32X32
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride,
+ txfm_param->tx_type, txfm_param->bd);
+ else
+ av1_fwd_txfm2d_32x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
+ assert(txfm_param->tx_type == DCT_DCT || txfm_param->tx_type == IDTX);
av1_fwd_txfm2d_32x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST_32X32
}
static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST7_16X16
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_16x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+ else
+ av1_fwd_txfm2d_16x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
av1_fwd_txfm2d_16x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST7_16X16
}
static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST7_16X16
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_4x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+ else
+ av1_fwd_txfm2d_4x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
av1_fwd_txfm2d_4x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST7_16X16
}
static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST_32X32
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_32x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+ else
+ av1_fwd_txfm2d_32x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
av1_fwd_txfm2d_32x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST_32X32
}
static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
+#if CONFIG_DST_32X32
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_8x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+ else
+ av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
+ txfm_param->bd);
+#else
av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type,
txfm_param->bd);
+#endif // CONFIG_DST_32X32
}
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
@@ -175,7 +267,16 @@
int32_t *dst_coeff = (int32_t *)coeff;
const TX_TYPE tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
+#if CONFIG_DST7_16X16
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ else
+ av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#else
av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#endif // CONFIG_DST7_16X16
}
static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
@@ -183,7 +284,16 @@
int32_t *dst_coeff = (int32_t *)coeff;
const TX_TYPE tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
+#if CONFIG_DST_32X32
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+ if (allowed_tx_mask)
+ av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
+ else
+ av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#else
av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
+#endif // CONFIG_DST_32X32
}
static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
@@ -230,10 +340,41 @@
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
TxfmParam *txfm_param) {
- if (txfm_param->bd == 8)
+ if (txfm_param->bd == 8) {
+#if CONFIG_DST7_16X16 || CONFIG_DST_32X32
+ const TX_TYPE tx_type = txfm_param->tx_type;
+ uint16_t allowed_tx_mask = 0xF1FE;
+ allowed_tx_mask &= (1 << tx_type);
+#endif
+#if CONFIG_DST7_16X16 && CONFIG_DST_32X32
+ if ((tx_size_wide[txfm_param->tx_size] == 16 ||
+ tx_size_high[txfm_param->tx_size] == 16 ||
+ tx_size_wide[txfm_param->tx_size] == 32 ||
+ tx_size_high[txfm_param->tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
+ else
+ av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#elif CONFIG_DST7_16X16
+ if ((tx_size_wide[txfm_param->tx_size] == 16 ||
+ tx_size_high[txfm_param->tx_size] == 16) &&
+ allowed_tx_mask)
+ av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
+ else
+ av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#elif CONFIG_DST_32X32
+ if ((tx_size_wide[txfm_param->tx_size] == 32 ||
+ tx_size_high[txfm_param->tx_size] == 32) &&
+ allowed_tx_mask)
+ av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param);
+ else
+ av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+#else
av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
- else
+#endif // CONFIG_DST7_16X16
+ } else {
av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+ }
}
void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff,
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 81646ed..f3a69cf 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -1995,6 +1995,11 @@
if (cpi->oxcf.txfm_cfg.enable_flip_idtx == 0)
ext_tx_used_flag &= DCT_ADST_TX_MASK;
+#if CONFIG_DST_32X32
+ if (!is_inter && (txsize_sqr_up_map[tx_size] == TX_32X32))
+ ext_tx_used_flag &= DCT_ADST_TX_MASK;
+#endif
+
uint16_t allowed_tx_mask = 0; // 1: allow; 0: skip.
if (txk_allowed < TX_TYPES) {
allowed_tx_mask = 1 << txk_allowed;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index fa53112..b4e03d3 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -139,7 +139,9 @@
"AV2 experiment flag to remove dual filter.")
# Partitioning
set_aom_config_var(CONFIG_SDP 0 NUMBER "AV2 Semi-Decoupled Partitioning.")
-
+# Primary Transforms
+set_aom_config_var(CONFIG_DST7_16X16 0 NUMBER "AV2 DST7 16x16 experiment flag.")
+set_aom_config_var(CONFIG_DST_32X32 0 NUMBER "AV2 DST7 32x32 experiment flag.")
#
# Variables in this section control optional features of the build system.
#