[wedge/compound-segment, normative] Remove more rounding This reduces the overall rounding in the masked blend process - the result is now equivalent to having a single round operation at the end of the prediction process. This increases the range of the intermediate values inside aom_blend_a64_d32_mask() by 2 bits, but has no effect on the ranges of any values outside that function. Change-Id: I1010ed94c7d8db75bb3d8157c864c5527005725b

commit: 7dbb005144d51c6aa13cd48726091adf63c5326e [log] [tgz]
author: David Barker <david.barker@argondesign.com> Tue Jan 30 16:34:04 2018 +0000
committer: Debargha Mukherjee <debargha@google.com> Fri Feb 09 19:05:43 2018 +0000
tree: b2a151966da8ad1837f6cda96c5e6cfd7e81d942
parent: d3b9973879bf869ab09534bf45d4cd36519b4f8d [diff]
diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c
index 4c42274..c9901dc 100644
--- a/aom_dsp/blend_a64_mask.c
+++ b/aom_dsp/blend_a64_mask.c

@@ -30,6 +30,9 @@
 // This works because of the identity:
 // ROUND_POWER_OF_TWO(x >> y, z) == ROUND_POWER_OF_TWO(x, y+z)
 //
+// In addition, to avoid rounding when subsampling the mask, we upgrade the mask
+// from 6-bit to 8-bit precision before blending.
+//
 // In contrast, the output of the non-d32 functions will not be further rounded,
 // so we *should* use ROUND_POWER_OF_TWO there.
 
@@ -51,48 +54,48 @@
   if (subw == 0 && subh == 0) {
     for (i = 0; i < h; ++i) {
       for (j = 0; j < w; ++j) {
-        const int m = mask[i * mask_stride + j];
+        const int m = mask[i * mask_stride + j] << 2;
         dst[i * dst_stride + j] =
             ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
+              (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+             AOM_BLEND_A256_ROUND_BITS);
       }
     }
   } else if (subw == 1 && subh == 1) {
     for (i = 0; i < h; ++i) {
       for (j = 0; j < w; ++j) {
-        const int m = ROUND_POWER_OF_TWO(
-            mask[(2 * i) * mask_stride + (2 * j)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j)] +
-                mask[(2 * i) * mask_stride + (2 * j + 1)] +
-                mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
-            2);
+        const int m = (mask[(2 * i) * mask_stride + (2 * j)] +
+                       mask[(2 * i + 1) * mask_stride + (2 * j)] +
+                       mask[(2 * i) * mask_stride + (2 * j + 1)] +
+                       mask[(2 * i + 1) * mask_stride + (2 * j + 1)]);
         dst[i * dst_stride + j] =
             ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
+              (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+             AOM_BLEND_A256_ROUND_BITS);
       }
     }
   } else if (subw == 1 && subh == 0) {
     for (i = 0; i < h; ++i) {
       for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
-                                    mask[i * mask_stride + (2 * j + 1)]);
+        const int m = (mask[i * mask_stride + (2 * j)] +
+                       mask[i * mask_stride + (2 * j + 1)])
+                      << 1;
         dst[i * dst_stride + j] =
             ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
+              (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+             AOM_BLEND_A256_ROUND_BITS);
       }
     }
   } else {
     for (i = 0; i < h; ++i) {
       for (j = 0; j < w; ++j) {
-        const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
-                                    mask[(2 * i + 1) * mask_stride + j]);
+        const int m = (mask[(2 * i) * mask_stride + j] +
+                       mask[(2 * i + 1) * mask_stride + j])
+                      << 1;
         dst[i * dst_stride + j] =
             ((m * src0[i * src0_stride + j] +
-              (AOM_BLEND_A64_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
-             AOM_BLEND_A64_ROUND_BITS);
+              (AOM_BLEND_A256_MAX_ALPHA - m) * src1[i * src1_stride + j]) >>
+             AOM_BLEND_A256_ROUND_BITS);
       }
     }
   }
commit	7dbb005144d51c6aa13cd48726091adf63c5326e	[log] [tgz]
author	David Barker <david.barker@argondesign.com>	Tue Jan 30 16:34:04 2018 +0000
committer	Debargha Mukherjee <debargha@google.com>	Fri Feb 09 19:05:43 2018 +0000
tree	b2a151966da8ad1837f6cda96c5e6cfd7e81d942
parent	d3b9973879bf869ab09534bf45d4cd36519b4f8d [diff]