Add Daala TX to 16x32 and 32x16 transforms
Rectangular 416x32 and 32x16 will now use Daala TX when CONFIG_DAALA_DCT16 and
CONFIG_DAALA_DCT32 are both enabled.
Change-Id: Iab3737605fa10dc09ceab18856a26165c502e6e5
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 0ac9612..358f558 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -1696,6 +1696,26 @@
assert(tx_type == DCT_DCT);
#endif
static const transform_2d IHT_16x32[] = {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ { daala_idct32, daala_idct16 }, // DCT_DCT = 0
+ { daala_idst32, daala_idct16 }, // ADST_DCT = 1
+ { daala_idct32, daala_idst16 }, // DCT_ADST = 2
+ { daala_idst32, daala_idst16 }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { daala_idst32, daala_idct16 }, // FLIPADST_DCT
+ { daala_idct32, daala_idst16 }, // DCT_FLIPADST
+ { daala_idst32, daala_idst16 }, // FLIPADST_FLIPADST
+ { daala_idst32, daala_idst16 }, // ADST_FLIPADST
+ { daala_idst32, daala_idst16 }, // FLIPADST_ADST
+ { daala_idtx32, daala_idtx16 }, // IDTX
+ { daala_idct32, daala_idtx16 }, // V_DCT
+ { daala_idtx32, daala_idct16 }, // H_DCT
+ { daala_idst32, daala_idtx16 }, // V_ADST
+ { daala_idtx32, daala_idst16 }, // H_ADST
+ { daala_idst32, daala_idtx16 }, // V_FLIPADST
+ { daala_idtx32, daala_idst16 }, // H_FLIPADST
+#endif
+#else
{ aom_idct32_c, aom_idct16_c }, // DCT_DCT
{ ihalfright32_c, aom_idct16_c }, // ADST_DCT
{ aom_idct32_c, aom_iadst16_c }, // DCT_ADST
@@ -1714,6 +1734,7 @@
{ ihalfright32_c, iidtx16_c }, // V_FLIPADST
{ iidtx32_c, aom_iadst16_c }, // H_FLIPADST
#endif
+#endif
};
const int n = 16;
@@ -1725,9 +1746,16 @@
// inverse transform row vectors and transpose
for (i = 0; i < n2; ++i) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ tran_low_t temp_in[16];
+ for (j = 0; j < n; j++) temp_in[j] = input[j] * 2;
+ IHT_16x32[tx_type].rows(temp_in, outtmp);
+ for (j = 0; j < n; ++j) tmp[j][i] = outtmp[j] * 4;
+#else
IHT_16x32[tx_type].rows(input, outtmp);
for (j = 0; j < n; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+#endif
input += n;
}
@@ -1743,7 +1771,11 @@
for (j = 0; j < n; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+#endif
}
}
}
@@ -1758,6 +1790,26 @@
assert(tx_type == DCT_DCT);
#endif
static const transform_2d IHT_32x16[] = {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ { daala_idct16, daala_idct32 }, // DCT_DCT = 0
+ { daala_idst16, daala_idct32 }, // ADST_DCT = 1
+ { daala_idct16, daala_idst32 }, // DCT_ADST = 2
+ { daala_idst16, daala_idst32 }, // ADST_ADST = 3
+#if CONFIG_EXT_TX
+ { daala_idst16, daala_idct32 }, // FLIPADST_DCT
+ { daala_idct16, daala_idst32 }, // DCT_FLIPADST
+ { daala_idst16, daala_idst32 }, // FLIPADST_FLIPADST
+ { daala_idst16, daala_idst32 }, // ADST_FLIPADST
+ { daala_idst16, daala_idst32 }, // FLIPADST_ADST
+ { daala_idtx16, daala_idtx32 }, // IDTX
+ { daala_idct16, daala_idtx32 }, // V_DCT
+ { daala_idtx16, daala_idct32 }, // H_DCT
+ { daala_idst16, daala_idtx32 }, // V_ADST
+ { daala_idtx16, daala_idst32 }, // H_ADST
+ { daala_idst16, daala_idtx32 }, // V_FLIPADST
+ { daala_idtx16, daala_idst32 }, // H_FLIPADST
+#endif
+#else
{ aom_idct16_c, aom_idct32_c }, // DCT_DCT
{ aom_iadst16_c, aom_idct32_c }, // ADST_DCT
{ aom_idct16_c, ihalfright32_c }, // DCT_ADST
@@ -1776,6 +1828,7 @@
{ aom_iadst16_c, iidtx32_c }, // V_FLIPADST
{ iidtx16_c, ihalfright32_c }, // H_FLIPADST
#endif
+#endif
};
const int n = 16;
const int n2 = 32;
@@ -1787,9 +1840,16 @@
// inverse transform row vectors and transpose
for (i = 0; i < n; ++i) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ tran_low_t temp_in[32];
+ for (j = 0; j < n2; j++) temp_in[j] = input[j] * 2;
+ IHT_32x16[tx_type].rows(temp_in, outtmp);
+ for (j = 0; j < n2; ++j) tmp[j][i] = outtmp[j] * 4;
+#else
IHT_32x16[tx_type].rows(input, outtmp);
for (j = 0; j < n2; ++j)
tmp[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+#endif
input += n2;
}
@@ -1805,7 +1865,11 @@
for (j = 0; j < n2; ++j) {
int d = i * stride + j;
int s = j * outstride + i;
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+#else
dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+#endif
}
}
}
@@ -2623,12 +2687,20 @@
static void inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ av1_iht16x32_512_add_c(input, dest, stride, txfm_param);
+#else
av1_iht16x32_512_add(input, dest, stride, txfm_param);
+#endif
}
static void inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
int stride, const TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ av1_iht32x16_512_add_c(input, dest, stride, txfm_param);
+#else
av1_iht32x16_512_add(input, dest, stride, txfm_param);
+#endif
}
#if CONFIG_TX64X64
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index ce4ca4d..c1a255b 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -2273,6 +2273,26 @@
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ { daala_fdct32, daala_fdct16 }, // DCT_DCT
+ { daala_fdst32, daala_fdct16 }, // ADST_DCT
+ { daala_fdct32, daala_fdst16 }, // DCT_ADST
+ { daala_fdst32, daala_fdst16 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { daala_fdst32, daala_fdct16 }, // FLIPADST_DCT
+ { daala_fdct32, daala_fdst16 }, // DCT_FLIPADST
+ { daala_fdst32, daala_fdst16 }, // FLIPADST_FLIPADST
+ { daala_fdst32, daala_fdst16 }, // ADST_FLIPADST
+ { daala_fdst32, daala_fdst16 }, // FLIPADST_ADST
+ { daala_idtx32, daala_idtx16 }, // IDTX
+ { daala_fdct32, daala_idtx16 }, // V_DCT
+ { daala_idtx32, daala_fdct16 }, // H_DCT
+ { daala_fdst32, daala_idtx16 }, // V_ADST
+ { daala_idtx32, daala_fdst16 }, // H_ADST
+ { daala_fdst32, daala_idtx16 }, // V_FLIPADST
+ { daala_idtx32, daala_fdst16 }, // H_FLIPADST
+#endif
+#else
{ fdct32, fdct16 }, // DCT_DCT
{ fhalfright32, fdct16 }, // ADST_DCT
{ fdct32, fadst16 }, // DCT_ADST
@@ -2291,6 +2311,7 @@
{ fhalfright32, fidtx16 }, // V_FLIPADST
{ fidtx32, fadst16 }, // H_FLIPADST
#endif
+#endif
};
const transform_2d ht = FHT[tx_type];
const int n = 16;
@@ -2305,12 +2326,22 @@
// Rows
for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j)
+ for (j = 0; j < n; ++j) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ temp_in[j] = input[i * stride + j] * 16;
+#else
temp_in[j] =
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
+#endif
+ }
ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j)
+ for (j = 0; j < n; ++j) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+#else
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
+#endif
+ }
}
// Columns
@@ -2332,6 +2363,26 @@
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ { daala_fdct16, daala_fdct32 }, // DCT_DCT
+ { daala_fdst16, daala_fdct32 }, // ADST_DCT
+ { daala_fdct16, daala_fdst32 }, // DCT_ADST
+ { daala_fdst16, daala_fdst32 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { daala_fdst16, daala_fdct32 }, // FLIPADST_DCT
+ { daala_fdct16, daala_fdst32 }, // DCT_FLIPADST
+ { daala_fdst16, daala_fdst32 }, // FLIPADST_FLIPADST
+ { daala_fdst16, daala_fdst32 }, // ADST_FLIPADST
+ { daala_fdst16, daala_fdst32 }, // FLIPADST_ADST
+ { daala_idtx16, daala_idtx32 }, // IDTX
+ { daala_fdct16, daala_idtx32 }, // V_DCT
+ { daala_idtx16, daala_fdct32 }, // H_DCT
+ { daala_fdst16, daala_idtx32 }, // V_ADST
+ { daala_idtx16, daala_fdst32 }, // H_ADST
+ { daala_fdst16, daala_idtx32 }, // V_FLIPADST
+ { daala_idtx16, daala_fdst32 }, // H_FLIPADST
+#endif
+#else
{ fdct16, fdct32 }, // DCT_DCT
{ fadst16, fdct32 }, // ADST_DCT
{ fdct16, fhalfright32 }, // DCT_ADST
@@ -2350,6 +2401,7 @@
{ fadst16, fidtx32 }, // V_FLIPADST
{ fidtx16, fhalfright32 }, // H_FLIPADST
#endif
+#endif
};
const transform_2d ht = FHT[tx_type];
const int n = 16;
@@ -2364,12 +2416,22 @@
// Columns
for (i = 0; i < n2; ++i) {
- for (j = 0; j < n; ++j)
+ for (j = 0; j < n; ++j) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ temp_in[j] = input[j * stride + i] * 16;
+#else
temp_in[j] =
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
+#endif
+ }
ht.cols(temp_in, temp_out);
- for (j = 0; j < n; ++j)
+ for (j = 0; j < n; ++j) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
+#else
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
+#endif
+ }
}
// Rows
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 772231e..486c604 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -70,12 +70,20 @@
static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ av1_fht16x32_c(src_diff, coeff, diff_stride, txfm_param);
+#else
av1_fht16x32(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
+#if CONFIG_DAALA_TX16 && CONFIG_DAALA_TX32
+ av1_fht32x16_c(src_diff, coeff, diff_stride, txfm_param);
+#else
av1_fht32x16(src_diff, coeff, diff_stride, txfm_param);
+#endif
}
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,