DST7 16p & 32p experiment (No SIMD) Change-Id: Ia8cd6da9bf885da9ae8f8722f42862b0eb52eebc
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c index 8d69efc..f081fbb 100644 --- a/av1/common/av1_inv_txfm1d.c +++ b/av1/common/av1_inv_txfm1d.c
@@ -818,6 +818,21 @@ bf1[7] = -bf0[1]; } +#if CONFIG_DST7_16X16 +void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range) { + assert(output != input); + (void)cos_bit; + (void)stage_range; + for (int32_t i = 0; i < 16; i++) { + int32_t sum = 0; + for (int32_t j = 0; j < 16; j++) { + sum += input[j] * dst7_16x16[j][i]; + } + output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_16X16_PREC_BITS); + } +} +#else void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range) { assert(output != input); @@ -1025,6 +1040,22 @@ bf1[14] = bf0[9]; bf1[15] = -bf0[1]; } +#endif // CONFIG_DST7_16X16 + +#if CONFIG_DST_32X32 +void av1_iadst32(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range) { + (void)cos_bit; + (void)stage_range; + for (int32_t i = 0; i < 32; i++) { + int32_t sum = 0; + for (int32_t j = 0; j < 32; j++) { + sum += input[j] * dst7_32x32[j][i]; + } + output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_32X32_PREC_BITS); + } +} +#endif void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range) {
diff --git a/av1/common/av1_inv_txfm1d.h b/av1/common/av1_inv_txfm1d.h index e1d5d98..b90643e 100644 --- a/av1/common/av1_inv_txfm1d.h +++ b/av1/common/av1_inv_txfm1d.h
@@ -45,6 +45,10 @@ const int8_t *stage_range); void av1_iadst16(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range); +#if CONFIG_DST_32X32 +void av1_iadst32(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range); +#endif void av1_iidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range); void av1_iidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
diff --git a/av1/common/av1_inv_txfm2d.c b/av1/common/av1_inv_txfm2d.c index 559d121..3b5ae91 100644 --- a/av1/common/av1_inv_txfm2d.c +++ b/av1/common/av1_inv_txfm2d.c
@@ -121,6 +121,9 @@ case TXFM_TYPE_ADST4: return av1_iadst4; case TXFM_TYPE_ADST8: return av1_iadst8; case TXFM_TYPE_ADST16: return av1_iadst16; +#if CONFIG_DST_32X32 + case TXFM_TYPE_ADST32: return av1_iadst32; +#endif // CONFIG_DST_32X32 case TXFM_TYPE_IDENTITY4: return av1_iidentity4_c; case TXFM_TYPE_IDENTITY8: return av1_iidentity8_c; case TXFM_TYPE_IDENTITY16: return av1_iidentity16_c;
diff --git a/av1/common/av1_txfm.c b/av1/common/av1_txfm.c index ac43402..498600d 100644 --- a/av1/common/av1_txfm.c +++ b/av1/common/av1_txfm.c
@@ -59,6 +59,142 @@ 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 } }; +#if CONFIG_DST7_16X16 +const int16_t dst7_16x16[16][16] = { + { 12, 24, 36, 47, 57, 69, 78, 87, 94, 103, 109, 115, 118, 123, 125, 126 }, + { 36, 69, 94, 115, 125, 125, 115, 94, 69, 36, 0, -36, -69, -94, -115, -125 }, + { 57, 103, 125, 118, 87, 36, -24, -78, -115, -126, -109, -69, -12, 47, 94, + 123 }, + { 78, 123, 115, 57, -24, -94, -126, -103, -36, 47, 109, 125, 87, 12, -69, + -118 }, + { 94, 125, 69, -36, -115, -115, -36, 69, 125, 94, 0, -94, -125, -69, 36, + 115 }, + { 109, 109, 0, -109, -109, 0, 109, 109, 0, -109, -109, 0, 109, 109, 0, -109 }, + { 118, 78, -69, -123, -12, 115, 87, -57, -125, -24, 109, 94, -47, -126, -36, + 103 }, + { 125, 36, -115, -69, 94, 94, -69, -115, 36, 125, 0, -125, -36, 115, 69, + -94 }, + { 126, -12, -125, 24, 123, -36, -118, 47, 115, -57, -109, 69, 103, -78, -94, + 87 }, + { 123, -57, -94, 103, 47, -125, 12, 118, -69, -87, 109, 36, -126, 24, 115, + -78 }, + { 115, -94, -36, 125, -69, -69, 125, -36, -94, 115, 0, -115, 94, 36, -125, + 69 }, + { 103, -118, 36, 78, -126, 69, 47, -123, 94, 12, -109, 115, -24, -87, 125, + -57 }, + { 87, -126, 94, -12, -78, 125, -103, 24, 69, -123, 109, -36, -57, 118, -115, + 47 }, + { 69, -115, 125, -94, 36, 36, -94, 125, -115, 69, 0, -69, 115, -125, 94, + -36 }, + { 47, -87, 115, -126, 118, -94, 57, -12, -36, 78, -109, 125, -123, 103, -69, + 24 }, + { 24, -47, 69, -87, 103, -115, 123, -126, 125, -118, 109, -94, 78, -57, 36, + -12 }, +}; +#endif + +#if CONFIG_DST_32X32 +const int16_t + dst7_32x32[32][32] = { + { 6, 12, 18, 24, 30, 36, 42, 48, 54, 59, 64, + 70, 75, 80, 84, 89, 93, 97, 101, 105, 108, 111, + 114, 116, 119, 121, 123, 124, 125, 126, 127, 127 }, + { 18, 36, 54, 70, 84, 97, 108, 116, 123, 126, 127, + 125, 121, 114, 105, 93, 80, 64, 48, 30, 12, -6, + -24, -42, -59, -75, -89, -101, -111, -119, -124, -127 }, + { 30, 59, 84, 105, 119, 126, 126, 119, 105, 84, 59, + 30, 0, -30, -59, -84, -105, -119, -126, -126, -119, -105, + -84, -59, -30, 0, 30, 59, 84, 105, 119, 126 }, + { 42, 80, 108, 124, 126, 114, 89, 54, 12, -30, -70, + -101, -121, -127, -119, -97, -64, -24, 18, 59, 93, 116, + 127, 123, 105, 75, 36, -6, -48, -84, -111, -125 }, + { 54, 97, 123, 125, 105, 64, 12, -42, -89, -119, -127, + -111, -75, -24, 30, 80, 114, 127, 116, 84, 36, -18, + -70, -108, -126, -121, -93, -48, 6, 59, 101, 124 }, + { 64, 111, 127, 108, 59, -6, -70, -114, -127, -105, -54, + 12, 75, 116, 126, 101, 48, -18, -80, -119, -125, -97, + -42, 24, 84, 121, 124, 93, 36, -30, -89, -123 }, + { 75, 121, 121, 75, 0, -75, -121, -121, -75, 0, 75, + 121, 121, 75, 0, -75, -121, -121, -75, 0, 75, 121, + 121, 75, 0, -75, -121, -121, -75, 0, 75, 121 }, + { 84, 126, 105, 30, -59, -119, -119, -59, 30, 105, 126, + 84, 0, -84, -126, -105, -30, 59, 119, 119, 59, -30, + -105, -126, -84, 0, 84, 126, 105, 30, -59, -119 }, + { 93, 127, 80, -18, -105, -124, -64, 36, 114, 119, 48, + -54, -121, -111, -30, 70, 125, 101, 12, -84, -127, -89, + 6, 97, 126, 75, -24, -108, -123, -59, 42, 116 }, + { 101, 123, 48, -64, -126, -89, 18, 111, 116, 30, -80, + -127, -75, 36, 119, 108, 12, -93, -125, -59, 54, 124, + 97, -6, -105, -121, -42, 70, 127, 84, -24, -114 }, + { 108, 114, 12, -101, -119, -24, 93, 123, 36, -84, -125, + -48, 75, 127, 59, -64, -127, -70, 54, 126, 80, -42, + -124, -89, 30, 121, 97, -18, -116, -105, 6, 111 }, + { 114, 101, -24, -123, -84, 48, 127, 64, -70, -126, -42, + 89, 121, 18, -105, -111, 6, 116, 97, -30, -124, -80, + 54, 127, 59, -75, -125, -36, 93, 119, 12, -108 }, + { 119, 84, -59, -126, -30, 105, 105, -30, -126, -59, 84, + 119, 0, -119, -84, 59, 126, 30, -105, -105, 30, 126, + 59, -84, -119, 0, 119, 84, -59, -126, -30, 105 }, + { 123, 64, -89, -111, 30, 127, 36, -108, -93, 59, 124, + 6, -121, -70, 84, 114, -24, -127, -42, 105, 97, -54, + -125, -12, 119, 75, -80, -116, 18, 126, 48, -101 }, + { 125, 42, -111, -80, 84, 108, -48, -124, 6, 126, 36, + -114, -75, 89, 105, -54, -123, 12, 127, 30, -116, -70, + 93, 101, -59, -121, 18, 127, 24, -119, -64, 97 }, + { 127, 18, -124, -36, 119, 54, -111, -70, 101, 84, -89, + -97, 75, 108, -59, -116, 42, 123, -24, -126, 6, 127, + 12, -125, -30, 121, 48, -114, -64, 105, 80, -93 }, + { 127, -6, -127, 12, 126, -18, -125, 24, 124, -30, -123, + 36, 121, -42, -119, 48, 116, -54, -114, 59, 111, -64, + -108, 70, 105, -75, -101, 80, 97, -84, -93, 89 }, + { 126, -30, -119, 59, 105, -84, -84, 105, 59, -119, -30, + 126, 0, -126, 30, 119, -59, -105, 84, 84, -105, -59, + 119, 30, -126, 0, 126, -30, -119, 59, 105, -84 }, + { 124, -54, -101, 97, 59, -123, -6, 125, -48, -105, 93, + 64, -121, -12, 126, -42, -108, 89, 70, -119, -18, 127, + -36, -111, 84, 75, -116, -24, 127, -30, -114, 80 }, + { 121, -75, -75, 121, 0, -121, 75, 75, -121, 0, 121, + -75, -75, 121, 0, -121, 75, 75, -121, 0, 121, -75, + -75, 121, 0, -121, 75, 75, -121, 0, 121, -75 }, + { 116, -93, -42, 127, -59, -80, 123, -18, -108, 105, 24, + -124, 75, 64, -126, 36, 97, -114, -6, 119, -89, -48, + 127, -54, -84, 121, -12, -111, 101, 30, -125, 70 }, + { 111, -108, -6, 114, -105, -12, 116, -101, -18, 119, -97, + -24, 121, -93, -30, 123, -89, -36, 124, -84, -42, 125, + -80, -48, 126, -75, -54, 127, -70, -59, 127, -64 }, + { 105, -119, 30, 84, -126, 59, 59, -126, 84, 30, -119, + 105, 0, -105, 119, -30, -84, 126, -59, -59, 126, -84, + -30, 119, -105, 0, 105, -119, 30, 84, -126, 59 }, + { 97, -125, 64, 42, -119, 111, -24, -80, 127, -84, -18, + 108, -121, 48, 59, -124, 101, -6, -93, 126, -70, -36, + 116, -114, 30, 75, -127, 89, 12, -105, 123, -54 }, + { 89, -127, 93, -6, -84, 127, -97, 12, 80, -126, 101, + -18, -75, 125, -105, 24, 70, -124, 108, -30, -64, 123, + -111, 36, 59, -121, 114, -42, -54, 119, -116, 48 }, + { 80, -124, 114, -54, -30, 101, -127, 97, -24, -59, 116, + -123, 75, 6, -84, 125, -111, 48, 36, -105, 127, -93, + 18, 64, -119, 121, -70, -12, 89, -126, 108, -42 }, + { 70, -116, 125, -93, 30, 42, -101, 127, -111, 59, 12, + -80, 121, -123, 84, -18, -54, 108, -127, 105, -48, -24, + 89, -124, 119, -75, 6, 64, -114, 126, -97, 36 }, + { 59, -105, 126, -119, 84, -30, -30, 84, -119, 126, -105, + 59, 0, -59, 105, -126, 119, -84, 30, 30, -84, 119, + -126, 105, -59, 0, 59, -105, 126, -119, 84, -30 }, + { 48, -89, 116, -127, 119, -93, 54, -6, -42, 84, -114, + 127, -121, 97, -59, 12, 36, -80, 111, -126, 123, -101, + 64, -18, -30, 75, -108, 125, -124, 105, -70, 24 }, + { 36, -70, 97, -116, 126, -125, 114, -93, 64, -30, -6, + 42, -75, 101, -119, 127, -124, 111, -89, 59, -24, -12, + 48, -80, 105, -121, 127, -123, 108, -84, 54, -18 }, + { 24, -48, 70, -89, 105, -116, 124, -127, 125, -119, 108, + -93, 75, -54, 30, -6, -18, 42, -64, 84, -101, 114, + -123, 127, -126, 121, -111, 97, -80, 59, -36, 12 }, + { 12, -24, 36, -48, 59, -70, 80, -89, 97, -105, 111, + -116, 121, -124, 126, -127, 127, -125, 123, -119, 114, -108, + 101, -93, 84, -75, 64, -54, 42, -30, 18, -6 } + }; +#endif // CONFIG_DST_32X32 + // av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1 // << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4. const int32_t av1_sinpi_arr_data[7][5] = { @@ -90,8 +226,12 @@ { TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 }, { TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 }, { TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 }, +#if CONFIG_DST_32X32 + { TXFM_TYPE_DCT32, TXFM_TYPE_ADST32, TXFM_TYPE_ADST32, TXFM_TYPE_IDENTITY32 }, +#else { TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_IDENTITY32 }, +#endif // CONFIG_DST_32X32 { TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID } }; @@ -108,6 +248,9 @@ 1, // TXFM_TYPE_IDENTITY8 1, // TXFM_TYPE_IDENTITY16 1, // TXFM_TYPE_IDENTITY32 +#if CONFIG_DST_32X32 + 1, // TXFM_TYPE_ADST32 +#endif }; void av1_range_check_buf(int32_t stage, const int32_t *input,
diff --git a/av1/common/av1_txfm.h b/av1/common/av1_txfm.h index 20049b6..77bb19e 100644 --- a/av1/common/av1_txfm.h +++ b/av1/common/av1_txfm.h
@@ -34,6 +34,16 @@ extern const int32_t av1_cospi_arr_data[7][64]; extern const int32_t av1_sinpi_arr_data[7][5]; +#if CONFIG_DST7_16X16 +extern const int16_t dst7_16x16[16][16]; +#define DST_16X16_PREC_BITS 7 +#endif + +#if CONFIG_DST_32X32 +extern const int16_t dst7_32x32[32][32]; +#define DST_32X32_PREC_BITS 7 +#endif // CONFIG_DST_32X32 + #define MAX_TXFM_STAGE_NUM 12 static const int cos_bit_min = 10; @@ -125,6 +135,9 @@ TXFM_TYPE_IDENTITY8, TXFM_TYPE_IDENTITY16, TXFM_TYPE_IDENTITY32, +#if CONFIG_DST_32X32 + TXFM_TYPE_ADST32, +#endif TXFM_TYPES, TXFM_TYPE_INVALID, } UENUM1BYTE(TXFM_TYPE);
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index d5f3ff9..c535ca8 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -1026,6 +1026,18 @@ { EXT_TX_SET_ALL16, EXT_TX_SET_DTT9_IDTX_1DDCT }, }; +#if CONFIG_DST_32X32 +static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter, + int use_reduced_set) { + const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size]; + if (tx_size_sqr_up == TX_64X64) return EXT_TX_SET_DCTONLY; + if (tx_size_sqr_up == TX_32X32) return EXT_TX_SET_DTT4_IDTX; + if (use_reduced_set) + return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX; + const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; + return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16]; +} +#else static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter, int use_reduced_set) { const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size]; @@ -1037,6 +1049,7 @@ const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16]; } +#endif // CONFIG_DST_32X32 // Maps tx set types to the indices. static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
diff --git a/av1/common/idct.c b/av1/common/idct.c index bff438f..0e9c727 100644 --- a/av1/common/idct.c +++ b/av1/common/idct.c
@@ -290,9 +290,32 @@ tmp[r * tmp_stride + c] = dst[r * stride + c]; } } - +#if CONFIG_DST7_16X16 && CONFIG_DST_32X32 + if (tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32 || + tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) + av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); +#elif CONFIG_DST7_16X16 + if (tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) + av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); +#elif CONFIG_DST_32X32 + if (tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) + av1_highbd_inv_txfm_add_c(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, + txfm_param); +#else av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, txfm_param); +#endif for (int r = 0; r < h; ++r) { for (int c = 0; c < w; ++c) { @@ -314,9 +337,56 @@ &txfm_param); assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]); +#if CONFIG_DST7_16X16 || CONFIG_DST_32X32 + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); +#endif + if (txfm_param.is_hbd) { +#if CONFIG_DST7_16X16 && CONFIG_DST_32X32 + if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16 || + tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) && + allowed_tx_mask) + av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#elif CONFIG_DST7_16X16 + if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) && + allowed_tx_mask) + av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#elif CONFIG_DST_32X32 + if ((tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) && + allowed_tx_mask) + av1_highbd_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#else av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#endif // CONFIG_DST7_16X16 && CONFIG_DST_32X32 } else { +#if CONFIG_DST7_16X16 && CONFIG_DST_32X32 + if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16 || + tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) && + allowed_tx_mask) + av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#elif CONFIG_DST7_16X16 + if ((tx_size_wide[tx_size] == 16 || tx_size_high[tx_size] == 16) && + allowed_tx_mask) + av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#elif CONFIG_DST_32X32 + if ((tx_size_wide[tx_size] == 32 || tx_size_high[tx_size] == 32) && + allowed_tx_mask) + av1_inv_txfm_add_c(dqcoeff, dst, stride, &txfm_param); + else + av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#else av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); +#endif // CONFIG_DST7_16X16 && CONFIG_DST_32X32 } }
diff --git a/av1/encoder/av1_fwd_txfm1d.c b/av1/encoder/av1_fwd_txfm1d.c index 6601c19..854acfa 100644 --- a/av1/encoder/av1_fwd_txfm1d.c +++ b/av1/encoder/av1_fwd_txfm1d.c
@@ -846,6 +846,20 @@ av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); } +#if CONFIG_DST7_16X16 +void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range) { + (void)cos_bit; + (void)stage_range; + for (int32_t i = 0; i < 16; i++) { + int32_t sum = 0; + for (int32_t j = 0; j < 16; j++) { + sum += input[j] * dst7_16x16[i][j]; + } + output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_16X16_PREC_BITS); + } +} +#else void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range) { const int32_t size = 16; @@ -1060,6 +1074,22 @@ bf1[15] = bf0[0]; av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); } +#endif // CONFIG_DST7_16X16 + +#if CONFIG_DST_32X32 +void av1_fadst32(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range) { + (void)cos_bit; + (void)stage_range; + for (int32_t i = 0; i < 32; i++) { + int32_t sum = 0; + for (int32_t j = 0; j < 32; j++) { + sum += input[j] * dst7_32x32[i][j]; + } + output[i] = ROUND_POWER_OF_TWO_SIGNED(sum, DST_32X32_PREC_BITS); + } +} +#endif void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range) {
diff --git a/av1/encoder/av1_fwd_txfm1d.h b/av1/encoder/av1_fwd_txfm1d.h index 9ef54fe..877b40b 100644 --- a/av1/encoder/av1_fwd_txfm1d.h +++ b/av1/encoder/av1_fwd_txfm1d.h
@@ -34,6 +34,10 @@ const int8_t *stage_range); void av1_fadst16(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range); +#if CONFIG_DST_32X32 +void av1_fadst32(const int32_t *input, int32_t *output, int8_t cos_bit, + const int8_t *stage_range); +#endif void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, const int8_t *stage_range); void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit,
diff --git a/av1/encoder/av1_fwd_txfm2d.c b/av1/encoder/av1_fwd_txfm2d.c index bcb829d..98d2887 100644 --- a/av1/encoder/av1_fwd_txfm2d.c +++ b/av1/encoder/av1_fwd_txfm2d.c
@@ -30,6 +30,9 @@ case TXFM_TYPE_ADST4: return av1_fadst4; case TXFM_TYPE_ADST8: return av1_fadst8; case TXFM_TYPE_ADST16: return av1_fadst16; +#if CONFIG_DST_32X32 + case TXFM_TYPE_ADST32: return av1_fadst32; +#endif // CONFIG_DST_32X32 case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c; case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c; case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c; @@ -364,6 +367,10 @@ static const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 }; static const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 }; +#if CONFIG_DST_32X32 +static const int8_t fadst32_range_mult2[1] = { 9 }; +#endif + static const int8_t fidtx4_range_mult2[1] = { 1 }; static const int8_t fidtx8_range_mult2[1] = { 2 }; static const int8_t fidtx16_range_mult2[1] = { 3 }; @@ -380,10 +387,22 @@ #endif static const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = { - fdct4_range_mult2, fdct8_range_mult2, fdct16_range_mult2, - fdct32_range_mult2, fdct64_range_mult2, fadst4_range_mult2, - fadst8_range_mult2, fadst16_range_mult2, fidtx4_range_mult2, - fidtx8_range_mult2, fidtx16_range_mult2, fidtx32_range_mult2 + fdct4_range_mult2, + fdct8_range_mult2, + fdct16_range_mult2, + fdct32_range_mult2, + fdct64_range_mult2, + fadst4_range_mult2, + fadst8_range_mult2, + fadst16_range_mult2, + fidtx4_range_mult2, + fidtx8_range_mult2, + fidtx16_range_mult2, + fidtx32_range_mult2 +#if CONFIG_DST_32X32 + , + fadst32_range_mult2, +#endif }; static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) {
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c index 0699085..bd09770 100644 --- a/av1/encoder/hybrid_fwd_txfm.c +++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -109,7 +109,16 @@ int32_t *dst_coeff = (int32_t *)coeff; const TX_TYPE tx_type = txfm_param->tx_type; const int bd = txfm_param->bd; +#if CONFIG_DST7_16X16 + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); + else + av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd); +#else av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd); +#endif // CONFIG_DST7_16X16 } static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, @@ -117,49 +126,132 @@ int32_t *dst_coeff = (int32_t *)coeff; const TX_TYPE tx_type = txfm_param->tx_type; const int bd = txfm_param->bd; +#if CONFIG_DST7_16X16 + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); + else + av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd); +#else av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd); +#endif // CONFIG_DST7_16X16 } static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST_32X32 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, + txfm_param->tx_type, txfm_param->bd); + else + av1_fwd_txfm2d_16x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else + assert(txfm_param->tx_type == DCT_DCT || txfm_param->tx_type == IDTX); av1_fwd_txfm2d_16x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST_32X32 } static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST_32X32 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, + txfm_param->tx_type, txfm_param->bd); + else + av1_fwd_txfm2d_32x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else + assert(txfm_param->tx_type == DCT_DCT || txfm_param->tx_type == IDTX); av1_fwd_txfm2d_32x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST_32X32 } static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST7_16X16 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_16x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); + else + av1_fwd_txfm2d_16x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else av1_fwd_txfm2d_16x4(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST7_16X16 } static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST7_16X16 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_4x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); + else + av1_fwd_txfm2d_4x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else av1_fwd_txfm2d_4x16(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST7_16X16 } static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST_32X32 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_32x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); + else + av1_fwd_txfm2d_32x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else av1_fwd_txfm2d_32x8(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST_32X32 } static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { int32_t *dst_coeff = (int32_t *)coeff; +#if CONFIG_DST_32X32 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_8x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); + else + av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, + txfm_param->bd); +#else av1_fwd_txfm2d_8x32(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, txfm_param->bd); +#endif // CONFIG_DST_32X32 } static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, @@ -175,7 +267,16 @@ int32_t *dst_coeff = (int32_t *)coeff; const TX_TYPE tx_type = txfm_param->tx_type; const int bd = txfm_param->bd; +#if CONFIG_DST7_16X16 + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); + else + av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); +#else av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); +#endif // CONFIG_DST7_16X16 } static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, @@ -183,7 +284,16 @@ int32_t *dst_coeff = (int32_t *)coeff; const TX_TYPE tx_type = txfm_param->tx_type; const int bd = txfm_param->bd; +#if CONFIG_DST_32X32 + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); + if (allowed_tx_mask) + av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd); + else + av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); +#else av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); +#endif // CONFIG_DST_32X32 } static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff, @@ -230,10 +340,41 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param) { - if (txfm_param->bd == 8) + if (txfm_param->bd == 8) { +#if CONFIG_DST7_16X16 || CONFIG_DST_32X32 + const TX_TYPE tx_type = txfm_param->tx_type; + uint16_t allowed_tx_mask = 0xF1FE; + allowed_tx_mask &= (1 << tx_type); +#endif +#if CONFIG_DST7_16X16 && CONFIG_DST_32X32 + if ((tx_size_wide[txfm_param->tx_size] == 16 || + tx_size_high[txfm_param->tx_size] == 16 || + tx_size_wide[txfm_param->tx_size] == 32 || + tx_size_high[txfm_param->tx_size] == 32) && + allowed_tx_mask) + av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); + else + av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); +#elif CONFIG_DST7_16X16 + if ((tx_size_wide[txfm_param->tx_size] == 16 || + tx_size_high[txfm_param->tx_size] == 16) && + allowed_tx_mask) + av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); + else + av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); +#elif CONFIG_DST_32X32 + if ((tx_size_wide[txfm_param->tx_size] == 32 || + tx_size_high[txfm_param->tx_size] == 32) && + allowed_tx_mask) + av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); + else + av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); +#else av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); - else +#endif // CONFIG_DST7_16X16 + } else { av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); + } } void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff,
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index 81646ed..f3a69cf 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -1995,6 +1995,11 @@ if (cpi->oxcf.txfm_cfg.enable_flip_idtx == 0) ext_tx_used_flag &= DCT_ADST_TX_MASK; +#if CONFIG_DST_32X32 + if (!is_inter && (txsize_sqr_up_map[tx_size] == TX_32X32)) + ext_tx_used_flag &= DCT_ADST_TX_MASK; +#endif + uint16_t allowed_tx_mask = 0; // 1: allow; 0: skip. if (txk_allowed < TX_TYPES) { allowed_tx_mask = 1 << txk_allowed;
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index fa53112..b4e03d3 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -139,7 +139,9 @@ "AV2 experiment flag to remove dual filter.") # Partitioning set_aom_config_var(CONFIG_SDP 0 NUMBER "AV2 Semi-Decoupled Partitioning.") - +# Primary Transforms +set_aom_config_var(CONFIG_DST7_16X16 0 NUMBER "AV2 DST7 16x16 experiment flag.") +set_aom_config_var(CONFIG_DST_32X32 0 NUMBER "AV2 DST7 32x32 experiment flag.") # # Variables in this section control optional features of the build system. #