Clean up and use do_post_rounding consistently

This patch simplifies the setting and use of the do_post_rounding
flag consistently across all convolve/warp operations. It is always
set now to 1 for compound modes, but the masked blending operation
can subsequently turn it off.
To accomplish this, for the warped case, the use of the conv_params->dst
buffer is made consistent with the other non-warp convolves,
in the sense that it is used only when is_compound is 1.

Change-Id: If3a37ffa65c3ca75f1df66ca427e6b5df86ac72f
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index ac18606..1ae7e53 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -69,6 +69,8 @@
   // Make sure the selected motion mode is valid for this configuration
   assert_motion_mode_valid(mi->mbmi.motion_mode, xd->global_motion, xd, mi);
 
+  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+
   WarpedMotionParams final_warp_params;
   const int do_warp =
       (w >= 8 && h >= 8 &&
@@ -86,20 +88,17 @@
                    pre_buf->buf0, pre_buf->width, pre_buf->height,
                    pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
                    pd->subsampling_x, pd->subsampling_y, conv_params);
-    assert(IMPLIES(conv_params->dst != NULL, conv_params->do_post_rounding));
-    assert(IMPLIES(conv_params->dst == NULL, !conv_params->do_post_rounding));
   } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                            sf, w, h, conv_params, interp_filters, xs, ys,
                            xd->bd);
-    assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
-    assert(!(conv_params->is_compound && conv_params->dst == NULL));
   } else {
     inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
                     h, conv_params, interp_filters, xs, ys);
-    assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
-    assert(!(conv_params->is_compound && conv_params->dst == NULL));
   }
+  // For compound, do_post_rounding is always 1.
+  // For masked compound, this flag will be turned off after the blend stage.
+  conv_params->do_post_rounding = conv_params->is_compound;
 }
 
 #define NSMOOTHERS 1
@@ -1244,8 +1243,6 @@
             subpel_params[ref].ys, xd);
     }
 
-    // if (!is_masked_compound_type(mi->mbmi.interinter_compound_type))
-    //   assert(conv_params.do_post_rounding);
     // TODO(angiebird): This part needs optimization
     if (conv_params.do_post_rounding) {
       assert(!is_masked_compound_type(mi->mbmi.interinter_compound_type));
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 839cba8..b7a41b1 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -58,10 +58,6 @@
       av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                              interp_filters, subpel_x, xs, subpel_y, ys, 1,
                              conv_params, sf);
-      if (conv_params->is_compound)
-        conv_params->do_post_rounding = 1;
-      else
-        conv_params->do_post_rounding = 0;
     } else {
       assert(conv_params->round == CONVOLVE_OPT_ROUND);
       av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
@@ -81,10 +77,6 @@
                              interp_filters, subpel_x, xs, subpel_y, ys, 0,
                              conv_params, sf);
 
-      if (conv_params->is_compound)
-        conv_params->do_post_rounding = 1;
-      else
-        conv_params->do_post_rounding = 0;
     } else {
       assert(conv_params->round == CONVOLVE_OPT_ROUND);
 
@@ -132,10 +124,6 @@
       av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                                     interp_filters, subpel_x, xs, subpel_y, ys,
                                     1, conv_params, bd);
-      if (conv_params->is_compound)
-        conv_params->do_post_rounding = 1;
-      else
-        conv_params->do_post_rounding = 0;
     } else {
       av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
                                 interp_filters, subpel_x, xs, subpel_y, ys, avg,
@@ -154,10 +142,6 @@
       av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
                                     interp_filters, subpel_x, xs, subpel_y, ys,
                                     0, conv_params, bd);
-      if (conv_params->is_compound)
-        conv_params->do_post_rounding = 1;
-      else
-        conv_params->do_post_rounding = 0;
     } else {
       InterpFilterParams filter_params_x, filter_params_y;
 #if CONFIG_SHORT_FILTER
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 0d62acb..fc4d8da 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -422,21 +422,17 @@
                               ConvolveParams *conv_params, int16_t alpha,
                               int16_t beta, int16_t gamma, int16_t delta) {
   int32_t tmp[15 * 8];
-  const int use_conv_params =
-      (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
-  int reduce_bits_horiz = conv_params->round_0;
-  if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
-    reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
-  const int reduce_bits_vert = use_conv_params
+  const int reduce_bits_horiz =
+      conv_params->round_0 +
+      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+  const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
   const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
   const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
-  if (use_conv_params) {
-    conv_params->do_post_rounding = 1;
-  }
   (void)max_bits_horiz;
+  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
   for (int i = p_row; i < p_row + p_height; i += 8) {
     for (int j = p_col; j < p_col + p_width; j += 8) {
@@ -500,7 +496,7 @@
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
 
-          if (use_conv_params) {
+          if (conv_params->is_compound) {
             CONV_BUF_TYPE *p =
                 &conv_params
                      ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
@@ -716,19 +712,15 @@
                        int16_t gamma, int16_t delta) {
   int32_t tmp[15 * 8];
   const int bd = 8;
-  const int use_conv_params =
-      (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
   const int reduce_bits_horiz = conv_params->round_0;
-  const int reduce_bits_vert = use_conv_params
+  const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
   const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
   const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
-  if (use_conv_params) {
-    conv_params->do_post_rounding = 1;
-  }
   (void)max_bits_horiz;
+  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
   for (int i = p_row; i < p_row + p_height; i += 8) {
     for (int j = p_col; j < p_col + p_width; j += 8) {
@@ -798,7 +790,7 @@
             sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
           }
 
-          if (use_conv_params) {
+          if (conv_params->is_compound) {
             CONV_BUF_TYPE *p =
                 &conv_params
                      ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index 700d48f..5647eb3 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c
@@ -24,19 +24,14 @@
   int comp_avg = conv_params->do_average;
   __m128i tmp[15];
   int i, j, k;
-  const int use_conv_params =
-      (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
-  int reduce_bits_horiz = conv_params->round_0;
-  if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
-    reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
-  const int reduce_bits_vert = use_conv_params
+  const int reduce_bits_horiz =
+      conv_params->round_0 +
+      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+  const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  if (use_conv_params) {
-    conv_params->do_post_rounding = 1;
-  }
-  assert(FILTER_BITS == FILTER_BITS);
+  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
   assert(!(bd == 12 && reduce_bits_horiz < 5));
 
 #if CONFIG_JNT_COMP
@@ -302,7 +297,7 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-        if (use_conv_params) {
+        if (conv_params->is_compound) {
           __m128i *const p =
               (__m128i *)&conv_params
                   ->dst[(i + k + 4) * conv_params->dst_stride + j];
diff --git a/av1/common/x86/warp_plane_sse4.c b/av1/common/x86/warp_plane_sse4.c
index ca20e46..1e8ad47 100644
--- a/av1/common/x86/warp_plane_sse4.c
+++ b/av1/common/x86/warp_plane_sse4.c
@@ -212,16 +212,13 @@
   __m128i tmp[15];
   int i, j, k;
   const int bd = 8;
-  const int use_conv_params =
-      (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
   const int reduce_bits_horiz = conv_params->round_0;
-  const int reduce_bits_vert = use_conv_params
+  const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
   const int offset_bits_horiz = bd + FILTER_BITS - 1;
-  if (use_conv_params) {
-    conv_params->do_post_rounding = 1;
-  }
+  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
+
 #if CONFIG_JNT_COMP
   const int w0 = conv_params->fwd_offset;
   const int w1 = conv_params->bck_offset;
@@ -475,7 +472,7 @@
         __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
         __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
 
-        if (use_conv_params) {
+        if (conv_params->is_compound) {
           __m128i *const p =
               (__m128i *)&conv_params
                   ->dst[(i + k + 4) * conv_params->dst_stride + j];