Use saturation addition to do rounding for avx2 IDCT

- Found this bug when increasing unit test number to 10000.
- Unit test is therefore also updated.

Change-Id: I938e96f6ebd35ae1bd8affebf8665e1da49a324b
diff --git a/aom_dsp/x86/inv_txfm_common_avx2.h b/aom_dsp/x86/inv_txfm_common_avx2.h
index 7e99be9..4cded19 100644
--- a/aom_dsp/x86/inv_txfm_common_avx2.h
+++ b/aom_dsp/x86/inv_txfm_common_avx2.h
@@ -58,7 +58,7 @@
   int i = 0;
 
   while (i < 16) {
-    in[i] = _mm256_add_epi16(in[i], rounding);
+    in[i] = _mm256_adds_epi16(in[i], rounding);
     in[i] = _mm256_srai_epi16(in[i], IDCT_ROUNDING_POS);
     recon_and_store(&in[i], output + i * stride);
     i += 1;
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 872f33a..0697791 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -52,7 +52,7 @@
                         TX_SIZE, int, int, int>
     PartialInvTxfmParam;
 const int kMaxNumCoeffs = 1024;
-const int kCountTestBlock = 1000;
+const int kCountTestBlock = 10000;
 
 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
  public: