Abstract round operation in apply_temporal_filter()

The round operation in apply_temporal_filter() function
is abstracted into a function for better readability.
The function handles positive values only.

Change-Id: Ic1c0702738771c013e551f16e48b832b419050bd
diff --git a/aom_dsp/mathutils.h b/aom_dsp/mathutils.h
index 035ca39..cbb6cf4 100644
--- a/aom_dsp/mathutils.h
+++ b/aom_dsp/mathutils.h
@@ -87,6 +87,12 @@
   memset(y, 0, n * sizeof(double));
 }
 
+// Round the given positive value to nearest integer
+static AOM_FORCE_INLINE int iroundpf(float x) {
+  assert(x >= 0.0);
+  return (int)(x + 0.5f);
+}
+
 static INLINE void least_squares_accumulate(double *mat, double *y,
                                             const double *a, double b, int n) {
   for (int i = 0; i < n; i++) {
diff --git a/av1/encoder/arm/neon/temporal_filter_neon.c b/av1/encoder/arm/neon/temporal_filter_neon.c
index f4ec20f..a287ffe 100644
--- a/av1/encoder/arm/neon/temporal_filter_neon.c
+++ b/av1/encoder/arm/neon/temporal_filter_neon.c
@@ -180,8 +180,9 @@
         double scaled_error =
             combined_error * d_factor[subblock_idx] * decay_factor;
         scaled_error = AOMMIN(scaled_error, 7);
-        const int weight =
-            (int)(approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE + 0.5f);
+        const float fweight =
+            approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
+        const int weight = iroundpf(fweight);
         accumulator[k] += weight * pixel_value;
         count[k] += weight;
       }
@@ -335,8 +336,9 @@
         double scaled_error =
             combined_error * d_factor[subblock_idx] * decay_factor;
         scaled_error = AOMMIN(scaled_error, 7);
-        const int weight =
-            (int)(approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE + 0.5f);
+        const float fweight =
+            approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
+        const int weight = iroundpf(fweight);
         accumulator[k] += weight * pixel_value;
         count[k] += weight;
       }
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index a1e4430..7ec4781 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -699,8 +699,9 @@
         if (tf_wgt_calc_lvl == 0) {
           weight = (int)(exp(-scaled_error) * TF_WEIGHT_SCALE);
         } else {
-          weight =
-              (int)(approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE + 0.5f);
+          const float fweight =
+              approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
+          weight = iroundpf(fweight);
         }
 
         const int idx = plane_offset + pred_idx;  // Index with plane shift.
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index ff1b49f..0631482 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -234,9 +234,9 @@
 
         double scaled_error = combined_error * d_factor_decayed[subblock_idx];
         scaled_error = AOMMIN(scaled_error, 7);
-        const int weight =
-            (int)(approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE + 0.5f);
-
+        const float fweight =
+            approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
+        const int weight = iroundpf(fweight);
         count[k] += weight;
         accumulator[k] += weight * pixel_value;
       }
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index b0eb2f1..842d3b1 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -210,9 +210,9 @@
 
         double scaled_error = combined_error * d_factor_decayed[subblock_idx];
         scaled_error = AOMMIN(scaled_error, 7);
-        const int weight =
-            (int)(approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE + 0.5f);
-
+        const float fweight =
+            approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
+        const int weight = iroundpf(fweight);
         count[k] += weight;
         accumulator[k] += weight * pixel_value;
       }