Add CONVOLVE_POST_ROUNDING flag

By turning on CONVOLVE_POST_ROUNDING, in the compound inter
prediction mode, FILTER_BITS rounding is moved after the summation
of two predictions.

Note that the post rounding is only applied on non-sub8x8 block

       PSNR     BDRate
lowres -0.808%  -0.673%

Change-Id: Ib91304e6122c24d832a582ab9f5757d33eac876c
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 19c8032..a5d5422 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -44,13 +44,15 @@
 
 #if CONFIG_DUAL_FILTER
   if (interp_filter_params_x.taps == SUBPEL_TAPS &&
-      interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+      interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2 &&
+      conv_params->round == CONVOLVE_OPT_ROUND) {
     const int16_t *kernel_x =
         av1_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
     const int16_t *kernel_y =
         av1_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
 #else
-  if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+  if (interp_filter_params.taps == SUBPEL_TAPS && w > 2 && h > 2 &&
+      conv_params->round == CONVOLVE_OPT_ROUND) {
     const int16_t *kernel_x =
         av1_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
     const int16_t *kernel_y =