Improve rectangular transform accuracy
By adjusting the internal scaling and rounding in the transforms,
we can adjust the maximum round-trip errors to:
* 8x16 and 16x8: 0 pixel values (ie, transforms are exact)
* 16x32: 1 pixel value
* 32x16: 2 pixel values
Change-Id: I0ba691a8d27042dcf1dd5ae81568d07a92d68781
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 03a0c92..168ffd2 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -1337,7 +1337,9 @@
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
+ for (j = 0; j < n; ++j)
+ output[j + i * n] =
+ saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
@@ -1388,7 +1390,9 @@
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
- for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
+ for (j = 0; j < n2; ++j)
+ output[j + i * n2] =
+ saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 8 times unitary
}
@@ -1429,16 +1433,20 @@
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
- temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
- for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j];
+ for (j = 0; j < n2; ++j)
+ out[j * n + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
- for (j = 0; j < n; ++j) output[j + i * n] = temp_out[j] >> 2;
+ for (j = 0; j < n; ++j)
+ output[j + i * n] =
+ saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
@@ -1479,16 +1487,20 @@
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
- temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
- for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
+ for (j = 0; j < n; ++j)
+ out[j * n2 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
- for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j] >> 2;
+ for (j = 0; j < n2; ++j)
+ output[j + i * n2] =
+ saturate_int16(temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Note: overall scale factor of transform is 4 times unitary
}
diff --git a/av1/encoder/x86/dct_intrin_sse2.c b/av1/encoder/x86/dct_intrin_sse2.c
index 1f3b669..727ff19 100644
--- a/av1/encoder/x86/dct_intrin_sse2.c
+++ b/av1/encoder/x86/dct_intrin_sse2.c
@@ -796,14 +796,14 @@
if (bit == 2) {
const __m128i const_rounding = _mm_set1_epi16(1);
- res[0] = _mm_add_epi16(res[0], const_rounding);
- res[1] = _mm_add_epi16(res[1], const_rounding);
- res[2] = _mm_add_epi16(res[2], const_rounding);
- res[3] = _mm_add_epi16(res[3], const_rounding);
- res[4] = _mm_add_epi16(res[4], const_rounding);
- res[5] = _mm_add_epi16(res[5], const_rounding);
- res[6] = _mm_add_epi16(res[6], const_rounding);
- res[7] = _mm_add_epi16(res[7], const_rounding);
+ res[0] = _mm_adds_epi16(res[0], const_rounding);
+ res[1] = _mm_adds_epi16(res[1], const_rounding);
+ res[2] = _mm_adds_epi16(res[2], const_rounding);
+ res[3] = _mm_adds_epi16(res[3], const_rounding);
+ res[4] = _mm_adds_epi16(res[4], const_rounding);
+ res[5] = _mm_adds_epi16(res[5], const_rounding);
+ res[6] = _mm_adds_epi16(res[6], const_rounding);
+ res[7] = _mm_adds_epi16(res[7], const_rounding);
}
res[0] = _mm_sub_epi16(res[0], sign0);
@@ -3140,14 +3140,6 @@
scale_sqrt2_8x8_signed(in + 8);
}
-static INLINE void right_shift(__m128i *in, int size, int bit) {
- int i = 0;
- while (i < size) {
- in[i] = _mm_srai_epi16(in[i], bit);
- i += 1;
- }
-}
-
void av1_fht8x16_sse2(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
__m128i in[16];
@@ -3288,8 +3280,8 @@
#endif
default: assert(0); break;
}
- right_shift(t, 8, 2);
- right_shift(b, 8, 2);
+ right_shift_8x8(t, 2);
+ right_shift_8x8(b, 2);
write_buffer_8x8(output, t, 8);
write_buffer_8x8(output + 64, b, 8);
}
@@ -3424,8 +3416,8 @@
}
array_transpose_8x8(l, l);
array_transpose_8x8(r, r);
- right_shift(l, 8, 2);
- right_shift(r, 8, 2);
+ right_shift_8x8(l, 2);
+ right_shift_8x8(r, 2);
write_buffer_8x8(output, l, 16);
write_buffer_8x8(output + 8, r, 16);
}
@@ -3496,12 +3488,14 @@
}
for (i = 0; i < 16; ++i) {
- intl[i + 0] = _mm_load_si128((const __m128i *)(input + i * stride + 0));
- intr[i + 0] = _mm_load_si128((const __m128i *)(input + i * stride + 8));
- inbl[i + 0] =
- _mm_load_si128((const __m128i *)(input + (i + 16) * stride + 0));
- inbr[i + 0] =
- _mm_load_si128((const __m128i *)(input + (i + 16) * stride + 8));
+ intl[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 0)), 2);
+ intr[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 8)), 2);
+ inbl[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + (i + 16) * stride + 0)), 2);
+ inbr[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + (i + 16) * stride + 8)), 2);
}
if (fliplr) {
@@ -3526,10 +3520,8 @@
__m128i *restr, __m128i *resbl,
__m128i *resbr) {
int i;
- right_shift(restl, 16, 2);
- right_shift(restr, 16, 2);
- right_shift(resbl, 16, 2);
- right_shift(resbr, 16, 2);
+ right_shift_16x16(restl, restr);
+ right_shift_16x16(resbl, resbr);
for (i = 0; i < 16; ++i) {
store_output(&restl[i], output + i * 16 + 0);
store_output(&restr[i], output + i * 16 + 8);
@@ -3551,24 +3543,32 @@
case DCT_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr);
break;
case ADST_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr);
break;
case DCT_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case ADST_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
@@ -3576,72 +3576,96 @@
case FLIPADST_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr);
break;
case DCT_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fdct32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case FLIPADST_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 1);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case ADST_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case FLIPADST_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case IDTX:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr);
break;
case V_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fdct32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr);
break;
case H_DCT:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fdct16_sse2(intl, intr);
fdct16_sse2(inbl, inbr);
break;
case V_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr);
break;
case H_ADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 0);
fidtx32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
case V_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 1, 0);
fhalfright32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fidtx16_sse2(intl, intr);
fidtx16_sse2(inbl, inbr);
break;
case H_FLIPADST:
load_buffer_16x32(input, intl, intr, inbl, inbr, stride, 0, 1);
fidtx32_16col(intl, intr, inbl, inbr);
+ right_shift_16x16(intl, intr);
+ right_shift_16x16(inbl, inbr);
fadst16_sse2(intl, intr);
fadst16_sse2(inbl, inbr);
break;
@@ -3661,10 +3685,14 @@
}
for (i = 0; i < 16; ++i) {
- in0[i] = _mm_load_si128((const __m128i *)(input + i * stride + 0));
- in1[i] = _mm_load_si128((const __m128i *)(input + i * stride + 8));
- in2[i] = _mm_load_si128((const __m128i *)(input + i * stride + 16));
- in3[i] = _mm_load_si128((const __m128i *)(input + i * stride + 24));
+ in0[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 0)), 2);
+ in1[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 8)), 2);
+ in2[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 16)), 2);
+ in3[i] = _mm_slli_epi16(
+ _mm_load_si128((const __m128i *)(input + i * stride + 24)), 2);
}
if (fliplr) {
@@ -3688,10 +3716,8 @@
__m128i *res1, __m128i *res2,
__m128i *res3) {
int i;
- right_shift(res0, 16, 2);
- right_shift(res1, 16, 2);
- right_shift(res2, 16, 2);
- right_shift(res3, 16, 2);
+ right_shift_16x16(res0, res1);
+ right_shift_16x16(res2, res3);
for (i = 0; i < 16; ++i) {
store_output(&res0[i], output + i * 32 + 0);
store_output(&res1[i], output + i * 32 + 8);
@@ -3709,21 +3735,29 @@
case DCT_DCT:
fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3);
break;
case ADST_DCT:
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3);
break;
case DCT_ADST:
fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case ADST_ADST:
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
#if CONFIG_EXT_TX
@@ -3731,72 +3765,96 @@
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3);
break;
case DCT_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case FLIPADST_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 1);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case ADST_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case FLIPADST_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case IDTX:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3);
break;
case V_DCT:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fdct16_sse2(in0, in1);
fdct16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3);
break;
case H_DCT:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fdct32_16col(in0, in1, in2, in3);
break;
case V_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3);
break;
case H_ADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 0);
fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
case V_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 1, 0);
fadst16_sse2(in0, in1);
fadst16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fidtx32_16col(in0, in1, in2, in3);
break;
case H_FLIPADST:
load_buffer_32x16(input, in0, in1, in2, in3, stride, 0, 1);
fidtx16_sse2(in0, in1);
fidtx16_sse2(in2, in3);
+ right_shift_16x16(in0, in1);
+ right_shift_16x16(in2, in3);
fhalfright32_16col(in0, in1, in2, in3);
break;
#endif
diff --git a/test/av1_fht16x32_test.cc b/test/av1_fht16x32_test.cc
index d45fcde..0bba3d6 100644
--- a/test/av1_fht16x32_test.cc
+++ b/test/av1_fht16x32_test.cc
@@ -69,11 +69,11 @@
IhtFunc inv_txfm_;
};
-TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(48); }
+TEST_P(AV1Trans16x32HT, AccuracyCheck) { RunAccuracyCheck(1); }
TEST_P(AV1Trans16x32HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x32HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x32HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
+TEST_P(AV1Trans16x32HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
const Ht16x32Param kArrayHt16x32Param_c[] = {
diff --git a/test/av1_fht16x8_test.cc b/test/av1_fht16x8_test.cc
index a70356a..fb0b8e8 100644
--- a/test/av1_fht16x8_test.cc
+++ b/test/av1_fht16x8_test.cc
@@ -69,11 +69,11 @@
IhtFunc inv_txfm_;
};
-TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(1); }
+TEST_P(AV1Trans16x8HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans16x8HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans16x8HT, MemCheck) { RunMemCheck(); }
TEST_P(AV1Trans16x8HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+TEST_P(AV1Trans16x8HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
diff --git a/test/av1_fht32x16_test.cc b/test/av1_fht32x16_test.cc
index 2470b83..f86e305 100644
--- a/test/av1_fht32x16_test.cc
+++ b/test/av1_fht32x16_test.cc
@@ -70,10 +70,10 @@
};
TEST_P(AV1Trans32x16HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(43); }
+TEST_P(AV1Trans32x16HT, AccuracyCheck) { RunAccuracyCheck(2); }
TEST_P(AV1Trans32x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans32x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(9); }
+TEST_P(AV1Trans32x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
using std::tr1::make_tuple;
const Ht32x16Param kArrayHt32x16Param_c[] = {
diff --git a/test/av1_fht8x16_test.cc b/test/av1_fht8x16_test.cc
index e3e1819..7936074 100644
--- a/test/av1_fht8x16_test.cc
+++ b/test/av1_fht8x16_test.cc
@@ -70,10 +70,10 @@
};
TEST_P(AV1Trans8x16HT, MemCheck) { RunMemCheck(); }
-TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(1); }
+TEST_P(AV1Trans8x16HT, AccuracyCheck) { RunAccuracyCheck(0); }
TEST_P(AV1Trans8x16HT, CoeffCheck) { RunCoeffCheck(); }
TEST_P(AV1Trans8x16HT, InvCoeffCheck) { RunInvCoeffCheck(); }
-TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+TEST_P(AV1Trans8x16HT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;