[wedge/compound-segment, normative] Remove more rounding
This reduces the overall rounding in the masked blend process -
the result is now equivalent to having a single round operation
at the end of the prediction process.
This increases the range of the intermediate values inside
aom_blend_a64_d32_mask() by 2 bits, but has no effect on the
ranges of any values outside that function.
Change-Id: I1010ed94c7d8db75bb3d8157c864c5527005725b
diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c
index 4c42274..c9901dc 100644
--- a/aom_dsp/blend_a64_mask.c
+++ b/aom_dsp/blend_a64_mask.c
@@ -30,6 +30,9 @@
// This works because of the identity:
// ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
//
+// In addition, to avoid rounding when subsampling the mask, we upgrade the mask
+// from 6-bit to 8-bit precision before blending.
+//
// In contrast, the output of the non-d32 functions will not be further rounded,
// so we *should* use ROUND_POWER_OF_TWO there.
@@ -51,48 +54,48 @@
if (subw == 0 && subh == 0) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int m = mask[i * mask_stride + j];
+ const int m = mask[i * mask_stride + j] << 2;
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
+ (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+ AOM_BLEND_A256_ROUND_BITS);
}
}
} else if (subw == 1 && subh == 1) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int m = ROUND_POWER_OF_TWO(
- mask[(2 * i) * mask_stride + (2 * j)] +
- mask[(2 * i + 1) * mask_stride + (2 * j)] +
- mask[(2 * i) * mask_stride + (2 * j + 1)] +
- mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
- 2);
+ const int m = (mask[(2 * i) * mask_stride + (2 * j)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j)] +
+ mask[(2 * i) * mask_stride + (2 * j + 1)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j + 1)]);
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
+ (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+ AOM_BLEND_A256_ROUND_BITS);
}
}
} else if (subw == 1 && subh == 0) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
- mask[i * mask_stride + (2 * j + 1)]);
+ const int m = (mask[i * mask_stride + (2 * j)] +
+ mask[i * mask_stride + (2 * j + 1)])
+ << 1;
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
+ (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+ AOM_BLEND_A256_ROUND_BITS);
}
}
} else {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
- const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
- mask[(2 * i + 1) * mask_stride + j]);
+ const int m = (mask[(2 * i) * mask_stride + j] +
+ mask[(2 * i + 1) * mask_stride + j])
+ << 1;
dst[i * dst_stride + j] =
((m * src0[i * src0_stride + j] +
- (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
- AOM_BLEND_A64_ROUND_BITS);
+ (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+ AOM_BLEND_A256_ROUND_BITS);
}
}
}