Tune the inv_shift Let the second stage of 10 bit inv txfms fit within 16 bits Change-Id: Ia087d65484cd410651190dcd9d3292cce6594d34

commit: 0625027652de1248acef4fa221150b787461e938 [log] [tgz]
author: Angie Chiang <angiebird@google.com> Tue Jan 16 17:03:35 2018 -0800
committer: Angie Chiang <angiebird@google.com> Tue Jan 23 18:29:54 2018 +0000
tree: 602cf1e2f05ab69773ea566ea456efff76caee27
parent: a8b45c37f5dcee072c9ef1de32374aee9ce1ebf5 [diff]
diff --git a/av1/common/av1_inv_txfm2d.c b/av1/common/av1_inv_txfm2d.c
index c929540..bf7d6a1 100644
--- a/av1/common/av1_inv_txfm2d.c
+++ b/av1/common/av1_inv_txfm2d.c

@@ -246,14 +246,14 @@
 };
 
 static const int8_t inv_shift_4x4[2] = { 0, -4 };
-static const int8_t inv_shift_8x8[2] = { 0, -5 };
-static const int8_t inv_shift_16x16[2] = { -1, -5 };
-static const int8_t inv_shift_32x32[2] = { -1, -5 };
+static const int8_t inv_shift_8x8[2] = { -1, -4 };
+static const int8_t inv_shift_16x16[2] = { -2, -4 };
+static const int8_t inv_shift_32x32[2] = { -2, -4 };
 #if CONFIG_TX64X64
-static const int8_t inv_shift_64x64[2] = { -1, -5 };
+static const int8_t inv_shift_64x64[2] = { -2, -4 };
 #endif
-static const int8_t inv_shift_4x8[2] = { 0, -4 };
-static const int8_t inv_shift_8x4[2] = { 0, -4 };
+static const int8_t inv_shift_4x8[2] = { -1, -3 };
+static const int8_t inv_shift_8x4[2] = { -1, -3 };
 static const int8_t inv_shift_8x16[2] = { -1, -4 };
 static const int8_t inv_shift_16x8[2] = { -1, -4 };
 static const int8_t inv_shift_16x32[2] = { -1, -4 };
@@ -264,11 +264,11 @@
 #endif
 static const int8_t inv_shift_4x16[2] = { -1, -4 };
 static const int8_t inv_shift_16x4[2] = { -1, -4 };
-static const int8_t inv_shift_8x32[2] = { -1, -5 };
-static const int8_t inv_shift_32x8[2] = { -1, -5 };
+static const int8_t inv_shift_8x32[2] = { -2, -4 };
+static const int8_t inv_shift_32x8[2] = { -2, -4 };
 #if CONFIG_TX64X64
-static const int8_t inv_shift_16x64[2] = { -1, -5 };
-static const int8_t inv_shift_64x16[2] = { -1, -5 };
+static const int8_t inv_shift_16x64[2] = { -2, -4 };
+static const int8_t inv_shift_64x16[2] = { -2, -4 };
 #endif  // CONFIG_TX64X64
 
 const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = {

diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
index d0d54b5..3e004e2 100644
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c

@@ -831,6 +831,7 @@
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
@@ -841,6 +842,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
@@ -851,6 +853,7 @@
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
@@ -861,6 +864,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
@@ -871,6 +875,7 @@
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
       break;
@@ -881,6 +886,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
       break;
@@ -891,6 +897,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
       break;
@@ -901,6 +908,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd);
       break;
@@ -911,6 +919,7 @@
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
       transpose_8x8(in, out);
+      round_shift_8x8(out, -shift[0]);
       iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
       write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
       break;

diff --git a/test/av1_inv_txfm2d_test.cc b/test/av1_inv_txfm2d_test.cc
index 6a50723..fd7ed86 100644
--- a/test/av1_inv_txfm2d_test.cc
+++ b/test/av1_inv_txfm2d_test.cc

@@ -145,19 +145,19 @@
   for (int t = 0; t < TX_TYPES; ++t) {
     const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
     param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X4, 2, 0.002));
-    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X8, 2, 0.025));
+    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X8, 2, 0.05));
     param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X16, 2, 0.04));
     param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X32, 4, 0.4));
 #if CONFIG_TX64X64
     if (tx_type == DCT_DCT) {  // Other types not supported by these tx sizes.
-      param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X64, 3, 0.2));
+      param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X64, 3, 0.3));
     }
 #endif  // CONFIG_TX64X64
 
-    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X8, 2, 0.016));
-    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X4, 2, 0.045));
-    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.2));
-    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.2));
+    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X8, 2, 0.09));
+    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X4, 2, 0.11));
+    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.03));
+    param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.06));
     param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X32, 3, 0.4));
     param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X16, 3, 0.5));
 

diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 09e6e01..096a3c6 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc

@@ -244,9 +244,9 @@
         << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
         << " roundtrip error > 1";
 
-    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
+    EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 4, total_error)
         << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
-        << "error > 1/5 per block";
+        << "error > 1/4 per block";
   }
 
   void RunExtremalCheck() {
commit	0625027652de1248acef4fa221150b787461e938	[log] [tgz]
author	Angie Chiang <angiebird@google.com>	Tue Jan 16 17:03:35 2018 -0800
committer	Angie Chiang <angiebird@google.com>	Tue Jan 23 18:29:54 2018 +0000
tree	602cf1e2f05ab69773ea566ea456efff76caee27
parent	a8b45c37f5dcee072c9ef1de32374aee9ce1ebf5 [diff]