Fix the overflow of av1_fht32x32() in 2D DCT_DCT
- Use range check function to avoid DCT_DCT overflow.
We need to re-develop the column txfm side scaling/rounding. Now,
we prefer to maintain the current BDRate level.
- Encoder user level time reduction <1% owing to av1_fht32x32_avx2.
- Add MemCheck unit test and fdct32() unit test.
Change-Id: I1e67030f67bc637859798ebe2f6698afffb8531c
diff --git a/test/fht32x32_test.cc b/test/fht32x32_test.cc
index a949ebf..3d07b44 100644
--- a/test/fht32x32_test.cc
+++ b/test/fht32x32_test.cc
@@ -69,6 +69,7 @@
inv_txfm_ = GET_PARAM(1);
tx_type_ = GET_PARAM(2);
pitch_ = 32;
+ height_ = 32;
fwd_txfm_ref = fht32x32_ref;
bit_depth_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
@@ -90,6 +91,7 @@
};
TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
+TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
#if CONFIG_AOM_HIGHBITDEPTH
class AV1HighbdTrans32x32HT
@@ -164,8 +166,7 @@
#if HAVE_AVX2
const Ht32x32Param kArrayHt32x32Param_avx2[] = {
- // TODO(luoyi): DCT_DCT tx_type is not enabled in av1_fht32x32_c(avx2) yet.
- // make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 0, AOM_BITS_8, 1024),
+ make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 0, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 1, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 2, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, dummy_inv_txfm, 3, AOM_BITS_8, 1024),