Add txmg experiment
This experiment aims at merging lbd/hbd txfms
So far this exp uses hbd transform on lbd path.
The performances I observed are
lowres -0.089%
midres 0.065%
(negative means performance drop)
Started from here, two main things are needed to be done.
1) Fix overflow due to quantizer noise
2) Generate a 16-bit version from the hbd txfm
Change-Id: I35bb1fc0cbb78decad2570ff5826ed665f739752
diff --git a/av1/common/idct.c b/av1/common/idct.c
index 1677780..58cfc45 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -2195,11 +2195,13 @@
#endif
}
+#if !CONFIG_TXMG
typedef void (*InvTxfmFunc)(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
TxfmParam *txfm_param);
static InvTxfmFunc inv_txfm_func[2] = { av1_inv_txfm_add,
av1_highbd_inv_txfm_add };
+#endif
// TODO(kslu) Change input arguments to TxfmParam, which contains mode,
// tx_type, tx_size, dst, stride, eob. Thus, the additional argument when LGT
@@ -2240,8 +2242,29 @@
txfm_param.mode = mode;
#endif
+#if CONFIG_TXMG
+ DECLARE_ALIGNED(16, uint16_t, tmp[MAX_TX_SQUARE]);
+ int tmp_stride = MAX_TX_SIZE;
+ int w = tx_size_wide[tx_size];
+ int h = tx_size_high[tx_size];
+ for (int r = 0; r < h; ++r) {
+ for (int c = 0; c < w; ++c) {
+ tmp[r * tmp_stride + c] = dst[r * stride + c];
+ }
+ }
+
+ av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride,
+ &txfm_param);
+
+ for (int r = 0; r < h; ++r) {
+ for (int c = 0; c < w; ++c) {
+ dst[r * stride + c] = tmp[r * tmp_stride + c];
+ }
+ }
+#else // CONFIG_TXMG
const int is_hbd = get_bitdepth_data_path_index(xd);
inv_txfm_func[is_hbd](dqcoeff, dst, stride, &txfm_param);
+#endif // CONFIG_TXMG
}
void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,