Use integer division for x_step_q4 and y_step_q4

We replace the division by shifts using already calculated 'x_scale_fp'
and 'y_scale_fp'.

That way, hardware can do it on-the-fly instead using a set of
programmable registers.

BUG=aomedia:343

Change-Id: I99d93dec5327e835ce288212ab75b8a75a8fe520
diff --git a/av1/common/scale.c b/av1/common/scale.c
index d5ccdfe..a581a5b 100644
--- a/av1/common/scale.c
+++ b/av1/common/scale.c
@@ -46,12 +46,9 @@
   return ((other_size << REF_SCALE_SHIFT) + this_size / 2) / this_size;
 }
 
-static int get_coarse_point_scale_factor(int other_size, int this_size) {
-  // Calculate scaling factor once for each reference frame
-  // and use fixed point scaling factors in decoding and encoding routines.
-  // Hardware implementations can calculate scale factor in device driver
-  // and use multiplication and shifting on hardware instead of division.
-  return ((other_size << SCALE_SUBPEL_BITS) + this_size / 2) / this_size;
+// Given the fixed point scale, calculate coarse point scale.
+static int fixed_point_scale_to_coarse_point_scale(int scale_fp) {
+  return ROUND_POWER_OF_TWO(scale_fp, REF_SCALE_SHIFT - SCALE_SUBPEL_BITS);
 }
 
 // Note: x and y are integer precision, mvq4 is q4 precision.
@@ -81,8 +78,8 @@
   sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
   sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
 
-  sf->x_step_q4 = get_coarse_point_scale_factor(other_w, this_w);
-  sf->y_step_q4 = get_coarse_point_scale_factor(other_h, this_h);
+  sf->x_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->x_scale_fp);
+  sf->y_step_q4 = fixed_point_scale_to_coarse_point_scale(sf->y_scale_fp);
 
   if (av1_is_scaled(sf)) {
     sf->scale_value_x = scaled_x;