Correct scaling of sad in sad_skip functions for hbd encode
Bit-depth based scaling is added for sad_skip functions by
introducing relevant wrapper functions to down-convert the
sad to 8-bit domain.
Results on 10-bit encode show speed improvement for speed
levels 0 to 6.
Instruction count BD-Rate Impact(%)
cpu-used Reduction(%) avg.psnr ovr.psnr ssim
0 2.47 -0.0107 -0.0022 -0.0154
1 3.11 -0.0131 -0.0119 -0.0189
2 2.90 -0.0023 -0.0076 -0.0073
3 3.02 0.0358 0.0246 0.0658
4 3.27 0.0425 0.0289 0.0693
5 4.29 0.0555 0.0501 0.0958
6 4.57 0.2623 0.283 0.3235
STATS_CHANGED for hbd encoding
Change-Id: Idd3906bf5699c6ed3baa5569960fb81f6e461c66
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b8aea1d..2154fa9 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1323,10 +1323,7 @@
SDSFP(BLOCK_8X8, aom_sad_skip_8x8, aom_sad_skip_8x8x4d);
SDSFP(BLOCK_4X16, aom_sad_skip_4x16, aom_sad_skip_4x16x4d);
SDSFP(BLOCK_4X8, aom_sad_skip_4x8, aom_sad_skip_4x8x4d);
- SDSFP(BLOCK_4X16, aom_sad_skip_4x16, aom_sad_skip_4x16x4d);
SDSFP(BLOCK_8X32, aom_sad_skip_8x32, aom_sad_skip_8x32x4d);
- SDSFP(BLOCK_32X8, aom_sad_skip_32x8, aom_sad_skip_32x8x4d);
- SDSFP(BLOCK_64X16, aom_sad_skip_64x16, aom_sad_skip_64x16x4d);
#undef SDSFP
#if CONFIG_AV1_HIGHBITDEPTH
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 40e7c08..37b200f 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h
@@ -328,15 +328,6 @@
aom_highbd_masked_sad##WIDTH##x##HEIGHT##_bits##BD, \
aom_highbd_##BD##_masked_sub_pixel_variance##WIDTH##x##HEIGHT)
-#define HIGHBD_SDSFP(BT, SDSF, SDSX4DF) \
- cpi->fn_ptr[BT].sdsf = SDSF; \
- cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
-
-#define HIGHBD_SDSFP_WRAPPER(WIDTH, HEIGHT) \
- HIGHBD_SDSFP(BLOCK_##WIDTH##X##HEIGHT, \
- aom_highbd_sad_skip_##WIDTH##x##HEIGHT, \
- aom_highbd_sad_skip_##WIDTH##x##HEIGHT##x4d)
-
#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8( \
const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
@@ -387,6 +378,92 @@
MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
#endif
+#define HIGHBD_SDSFP(BT, SDSF, SDSX4DF) \
+ cpi->fn_ptr[BT].sdsf = SDSF; \
+ cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
+
+#define HIGHBD_SDSFP_WRAPPER(WIDTH, HEIGHT, BD) \
+ HIGHBD_SDSFP(BLOCK_##WIDTH##X##HEIGHT, \
+ aom_highbd_sad_skip_##WIDTH##x##HEIGHT##_bits##BD, \
+ aom_highbd_sad_skip_##WIDTH##x##HEIGHT##x4d##_bits##BD)
+
+#define MAKE_SDSF_SKIP_SAD_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return fnname(src, src_stride, ref, ref_stride); \
+ } \
+ static unsigned int fnname##_bits10(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return fnname(src, src_stride, ref, ref_stride) >> 2; \
+ } \
+ static unsigned int fnname##_bits12(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride) { \
+ return fnname(src, src_stride, ref, ref_stride) >> 4; \
+ }
+
+#define MAKE_SDSF_SKIP_SAD_4D_WRAPPER(fnname) \
+ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ } \
+ static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 4; i++) sad_array[i] >>= 2; \
+ } \
+ static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 4; i++) sad_array[i] >>= 4; \
+ }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_128x128)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_128x64)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_64x128)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_64x64)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_64x32)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_64x16)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_32x64)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_32x32)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_32x16)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_32x8)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_16x64)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_16x32)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_16x16)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_16x8)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_8x16)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_8x8)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_4x16)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_4x8)
+MAKE_SDSF_SKIP_SAD_WRAPPER(aom_highbd_sad_skip_8x32)
+
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_128x128x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_128x64x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_64x128x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_64x64x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_64x32x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_64x16x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_32x64x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_32x32x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_32x16x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_32x8x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_16x64x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_16x32x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_16x16x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_16x8x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_8x16x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_8x8x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_4x16x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_4x8x4d)
+MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_8x32x4d)
+#endif
+
#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
cpi->fn_ptr[BT].osdf = OSDF; \
cpi->fn_ptr[BT].ovf = OVF; \
@@ -518,6 +595,26 @@
LOWBD_OBFP_WRAPPER(8, 32)
LOWBD_OBFP_WRAPPER(16, 4)
LOWBD_OBFP_WRAPPER(4, 16)
+
+ HIGHBD_SDSFP_WRAPPER(128, 128, 8);
+ HIGHBD_SDSFP_WRAPPER(128, 64, 8);
+ HIGHBD_SDSFP_WRAPPER(64, 128, 8);
+ HIGHBD_SDSFP_WRAPPER(64, 64, 8);
+ HIGHBD_SDSFP_WRAPPER(64, 32, 8);
+ HIGHBD_SDSFP_WRAPPER(64, 16, 8);
+ HIGHBD_SDSFP_WRAPPER(32, 64, 8);
+ HIGHBD_SDSFP_WRAPPER(32, 32, 8);
+ HIGHBD_SDSFP_WRAPPER(32, 16, 8);
+ HIGHBD_SDSFP_WRAPPER(32, 8, 8);
+ HIGHBD_SDSFP_WRAPPER(16, 64, 8);
+ HIGHBD_SDSFP_WRAPPER(16, 32, 8);
+ HIGHBD_SDSFP_WRAPPER(16, 16, 8);
+ HIGHBD_SDSFP_WRAPPER(16, 8, 8);
+ HIGHBD_SDSFP_WRAPPER(8, 16, 8);
+ HIGHBD_SDSFP_WRAPPER(8, 8, 8);
+ HIGHBD_SDSFP_WRAPPER(4, 16, 8);
+ HIGHBD_SDSFP_WRAPPER(4, 8, 8);
+ HIGHBD_SDSFP_WRAPPER(8, 32, 8);
break;
case AOM_BITS_10:
@@ -589,6 +686,26 @@
HIGHBD_OBFP_WRAPPER(8, 32, 10)
HIGHBD_OBFP_WRAPPER(16, 4, 10)
HIGHBD_OBFP_WRAPPER(4, 16, 10)
+
+ HIGHBD_SDSFP_WRAPPER(128, 128, 10);
+ HIGHBD_SDSFP_WRAPPER(128, 64, 10);
+ HIGHBD_SDSFP_WRAPPER(64, 128, 10);
+ HIGHBD_SDSFP_WRAPPER(64, 64, 10);
+ HIGHBD_SDSFP_WRAPPER(64, 32, 10);
+ HIGHBD_SDSFP_WRAPPER(64, 16, 10);
+ HIGHBD_SDSFP_WRAPPER(32, 64, 10);
+ HIGHBD_SDSFP_WRAPPER(32, 32, 10);
+ HIGHBD_SDSFP_WRAPPER(32, 16, 10);
+ HIGHBD_SDSFP_WRAPPER(32, 8, 10);
+ HIGHBD_SDSFP_WRAPPER(16, 64, 10);
+ HIGHBD_SDSFP_WRAPPER(16, 32, 10);
+ HIGHBD_SDSFP_WRAPPER(16, 16, 10);
+ HIGHBD_SDSFP_WRAPPER(16, 8, 10);
+ HIGHBD_SDSFP_WRAPPER(8, 16, 10);
+ HIGHBD_SDSFP_WRAPPER(8, 8, 10);
+ HIGHBD_SDSFP_WRAPPER(4, 16, 10);
+ HIGHBD_SDSFP_WRAPPER(4, 8, 10);
+ HIGHBD_SDSFP_WRAPPER(8, 32, 10);
break;
case AOM_BITS_12:
@@ -660,6 +777,26 @@
HIGHBD_OBFP_WRAPPER(8, 32, 12)
HIGHBD_OBFP_WRAPPER(16, 4, 12)
HIGHBD_OBFP_WRAPPER(4, 16, 12)
+
+ HIGHBD_SDSFP_WRAPPER(128, 128, 12);
+ HIGHBD_SDSFP_WRAPPER(128, 64, 12);
+ HIGHBD_SDSFP_WRAPPER(64, 128, 12);
+ HIGHBD_SDSFP_WRAPPER(64, 64, 12);
+ HIGHBD_SDSFP_WRAPPER(64, 32, 12);
+ HIGHBD_SDSFP_WRAPPER(64, 16, 12);
+ HIGHBD_SDSFP_WRAPPER(32, 64, 12);
+ HIGHBD_SDSFP_WRAPPER(32, 32, 12);
+ HIGHBD_SDSFP_WRAPPER(32, 16, 12);
+ HIGHBD_SDSFP_WRAPPER(32, 8, 12);
+ HIGHBD_SDSFP_WRAPPER(16, 64, 12);
+ HIGHBD_SDSFP_WRAPPER(16, 32, 12);
+ HIGHBD_SDSFP_WRAPPER(16, 16, 12);
+ HIGHBD_SDSFP_WRAPPER(16, 8, 12);
+ HIGHBD_SDSFP_WRAPPER(8, 16, 12);
+ HIGHBD_SDSFP_WRAPPER(8, 8, 12);
+ HIGHBD_SDSFP_WRAPPER(4, 16, 12);
+ HIGHBD_SDSFP_WRAPPER(4, 8, 12);
+ HIGHBD_SDSFP_WRAPPER(8, 32, 12);
break;
default:
@@ -667,29 +804,6 @@
"cm->seq_params.bit_depth should be AOM_BITS_8, "
"AOM_BITS_10 or AOM_BITS_12");
}
-
- HIGHBD_SDSFP_WRAPPER(128, 128);
- HIGHBD_SDSFP_WRAPPER(128, 64);
- HIGHBD_SDSFP_WRAPPER(64, 128);
- HIGHBD_SDSFP_WRAPPER(64, 64);
- HIGHBD_SDSFP_WRAPPER(64, 32);
- HIGHBD_SDSFP_WRAPPER(64, 16);
- HIGHBD_SDSFP_WRAPPER(32, 64);
- HIGHBD_SDSFP_WRAPPER(32, 32);
- HIGHBD_SDSFP_WRAPPER(32, 16);
- HIGHBD_SDSFP_WRAPPER(32, 8);
- HIGHBD_SDSFP_WRAPPER(16, 64);
- HIGHBD_SDSFP_WRAPPER(16, 32);
- HIGHBD_SDSFP_WRAPPER(16, 16);
- HIGHBD_SDSFP_WRAPPER(16, 8);
- HIGHBD_SDSFP_WRAPPER(8, 16);
- HIGHBD_SDSFP_WRAPPER(8, 8);
- HIGHBD_SDSFP_WRAPPER(4, 16);
- HIGHBD_SDSFP_WRAPPER(4, 8);
- HIGHBD_SDSFP_WRAPPER(4, 16);
- HIGHBD_SDSFP_WRAPPER(8, 32);
- HIGHBD_SDSFP_WRAPPER(32, 8);
- HIGHBD_SDSFP_WRAPPER(64, 16);
}
}
#endif // CONFIG_AV1_HIGHBITDEPTH