Increase mask sad resolution
Return the actual SAD result without downscaling for higher
precision. This slightly improves the coding efficiency:
avg PSNR ovr PSNR SSIM
low -0.040 -0.048 -0.075
mid -0.050 -0.062 -0.075
STATS_CHANGED
Change-Id: Ie9302f6271b5c6389a8543c58dcfc8d8153c1874
diff --git a/aom_dsp/sad_av1.c b/aom_dsp/sad_av1.c
index 10c6ac7..1ff3d4f 100644
--- a/aom_dsp/sad_av1.c
+++ b/aom_dsp/sad_av1.c
@@ -35,7 +35,6 @@
b += b_stride;
m += m_stride;
}
- sad = (sad + 31) >> 6;
return sad;
}
@@ -101,7 +100,6 @@
b += b_stride;
m += m_stride;
}
- sad = (sad + 31) >> 6;
return sad;
}
diff --git a/aom_dsp/x86/masked_sad_intrin_avx2.c b/aom_dsp/x86/masked_sad_intrin_avx2.c
index 774fab6..60f0ab3 100644
--- a/aom_dsp/x86/masked_sad_intrin_avx2.c
+++ b/aom_dsp/x86/masked_sad_intrin_avx2.c
@@ -64,7 +64,7 @@
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int32_t sad = _mm256_extract_epi32(res, 0);
- return (sad + 31) >> 6;
+ return sad;
}
static INLINE __m256i xx_loadu2_m128i(const void *hi, const void *lo) {
@@ -117,7 +117,7 @@
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int32_t sad = _mm256_extract_epi32(res, 0);
- return (sad + 31) >> 6;
+ return sad;
}
static INLINE unsigned int aom_masked_sad_avx2(
@@ -253,7 +253,7 @@
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
- return (sad + 31) >> 6;
+ return sad;
}
static INLINE unsigned int highbd_masked_sad16xh_avx2(
@@ -311,7 +311,7 @@
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
- return (sad + 31) >> 6;
+ return sad;
}
static INLINE unsigned int aom_highbd_masked_sad_avx2(
diff --git a/aom_dsp/x86/masked_sad_intrin_ssse3.c b/aom_dsp/x86/masked_sad_intrin_ssse3.c
index a179f2e..7168277 100644
--- a/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/aom_dsp/x86/masked_sad_intrin_ssse3.c
@@ -134,7 +134,7 @@
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
int32_t sad =
_mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
- return (sad + 31) >> 6;
+ return sad;
}
unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
@@ -179,7 +179,7 @@
}
int32_t sad =
_mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
- return (sad + 31) >> 6;
+ return sad;
}
unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
@@ -223,7 +223,7 @@
}
// At this point, the SAD is stored in lane 0 of 'res'
int32_t sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
+ return sad;
}
// For width a multiple of 8
@@ -338,7 +338,7 @@
res = _mm_hadd_epi32(res, res);
res = _mm_hadd_epi32(res, res);
int sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
+ return sad;
}
unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
@@ -398,5 +398,5 @@
res = _mm_hadd_epi32(res, res);
res = _mm_hadd_epi32(res, res);
int sad = _mm_cvtsi128_si32(res);
- return (sad + 31) >> 6;
+ return sad;
}