Fixed a computation bug in fdct16_sse2() fdct16_sse2() was not bit-exact with C reference, fdct16(). The inconsistency was found by writing a unit test for vp10_fht16x16_sse2(). Since the unit test needs a pending change on the inherited base class. I will commit this unit test after making a header file for this base class. Passed the uncommitted unit test: vp10_fht16x16_test.cc. Change-Id: If2b617883c633a3ea90c19e1d018240c8007102b

commit: 68d6a5073a6ffe717c2a8b63166b0e16f5fa9ab7 [log] [tgz]
author: Yi Luo <luoyi@google.com> Wed Mar 02 13:45:52 2016 -0800
committer: Yi Luo <luoyi@google.com> Wed Mar 02 15:20:12 2016 -0800
tree: 48041d383f11e0ca1e1d11e847e055246c0eea1d
parent: 0353f596e9b0763b621d9d15761365f4816f8761 [diff]
diff --git a/vp10/encoder/x86/dct_sse2.c b/vp10/encoder/x86/dct_sse2.c
index 79d1e88..aaf1e6a 100644
--- a/vp10/encoder/x86/dct_sse2.c
+++ b/vp10/encoder/x86/dct_sse2.c

@@ -1635,7 +1635,7 @@
   const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
   const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
   const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
-  const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
+  const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
   const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
   const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
   const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
@@ -1839,10 +1839,10 @@
 
   v[0] = _mm_madd_epi16(u[0], k__cospi_m08_p24);
   v[1] = _mm_madd_epi16(u[1], k__cospi_m08_p24);
-  v[2] = _mm_madd_epi16(u[2], k__cospi_p24_p08);
-  v[3] = _mm_madd_epi16(u[3], k__cospi_p24_p08);
-  v[4] = _mm_madd_epi16(u[2], k__cospi_p08_m24);
-  v[5] = _mm_madd_epi16(u[3], k__cospi_p08_m24);
+  v[2] = _mm_madd_epi16(u[2], k__cospi_m24_m08);
+  v[3] = _mm_madd_epi16(u[3], k__cospi_m24_m08);
+  v[4] = _mm_madd_epi16(u[2], k__cospi_m08_p24);
+  v[5] = _mm_madd_epi16(u[3], k__cospi_m08_p24);
   v[6] = _mm_madd_epi16(u[0], k__cospi_p24_p08);
   v[7] = _mm_madd_epi16(u[1], k__cospi_p24_p08);
 
@@ -1872,10 +1872,10 @@
   // stage 5
   s[0] = _mm_add_epi16(p[0], t[1]);
   s[1] = _mm_sub_epi16(p[0], t[1]);
-  s[2] = _mm_add_epi16(p[3], t[2]);
-  s[3] = _mm_sub_epi16(p[3], t[2]);
-  s[4] = _mm_sub_epi16(p[4], t[5]);
-  s[5] = _mm_add_epi16(p[4], t[5]);
+  s[2] = _mm_sub_epi16(p[3], t[2]);
+  s[3] = _mm_add_epi16(p[3], t[2]);
+  s[4] = _mm_add_epi16(p[4], t[5]);
+  s[5] = _mm_sub_epi16(p[4], t[5]);
   s[6] = _mm_sub_epi16(p[7], t[6]);
   s[7] = _mm_add_epi16(p[7], t[6]);
commit	68d6a5073a6ffe717c2a8b63166b0e16f5fa9ab7	[log] [tgz]
author	Yi Luo <luoyi@google.com>	Wed Mar 02 13:45:52 2016 -0800
committer	Yi Luo <luoyi@google.com>	Wed Mar 02 15:20:12 2016 -0800
tree	48041d383f11e0ca1e1d11e847e055246c0eea1d
parent	0353f596e9b0763b621d9d15761365f4816f8761 [diff]