remove ROUNDZ_* macros in favor of just ROUND_* ones

Change-Id: I263088be8d71018deb9cc6a9d2c66307770b824d
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index cd0b136..753a7e4 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -188,8 +188,8 @@
 
   const unsigned int w_y = img1->d_w;
   const unsigned int h_y = img1->d_h;
-  const unsigned int w_uv = ROUNDZ_POWER_OF_TWO(w_y, img1->x_chroma_shift);
-  const unsigned int h_uv = ROUNDZ_POWER_OF_TWO(h_y, img1->y_chroma_shift);
+  const unsigned int w_uv = ROUND_POWER_OF_TWO(w_y, img1->x_chroma_shift);
+  const unsigned int h_uv = ROUND_POWER_OF_TWO(h_y, img1->y_chroma_shift);
 
   if (img1->fmt != img2->fmt
       || img1->cs != img2->cs
diff --git a/vp10/common/warped_motion.c b/vp10/common/warped_motion.c
index 4990bb3..3b924ea 100644
--- a/vp10/common/warped_motion.c
+++ b/vp10/common/warped_motion.c
@@ -85,19 +85,19 @@
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
     if (subsampling_x)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           ((x << (WARPEDMODEL_PREC_BITS + 1)) + mat[0]),
           WARPEDPIXEL_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           ((x << WARPEDMODEL_PREC_BITS)) + mat[0],
           WARPEDPIXEL_PREC_BITS);
     if (subsampling_y)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           ((y << (WARPEDMODEL_PREC_BITS + 1)) + mat[1]),
           WARPEDPIXEL_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           ((y << WARPEDMODEL_PREC_BITS)) + mat[1],
           WARPEDPIXEL_PREC_BITS);
     points += stride_points - 2;
@@ -115,21 +115,21 @@
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
     if (subsampling_x)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           mat[0] * 2 * x + mat[1] * 2 * y + mat[2] +
           (mat[0] + mat[1] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
           WARPEDDIFF_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(mat[0] * x + mat[1] * y + mat[2],
-                                             WARPEDDIFF_PREC_BITS);
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[0] * x + mat[1] * y + mat[2],
+                                            WARPEDDIFF_PREC_BITS);
     if (subsampling_y)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           -mat[1] * 2 * x + mat[0] * 2 * y + mat[3] +
           (-mat[1] + mat[0] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
           WARPEDDIFF_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(-mat[1] * x + mat[0] * y + mat[3],
-                                             WARPEDDIFF_PREC_BITS);
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[1] * x + mat[0] * y + mat[3],
+                                            WARPEDDIFF_PREC_BITS);
     points += stride_points - 2;
     proj += stride_proj - 2;
   }
@@ -145,21 +145,21 @@
   for (i = 0; i < n; ++i) {
     const int x = *(points++), y = *(points++);
     if (subsampling_x)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           mat[0] * 2 * x + mat[1] * 2 * y + mat[4] +
           (mat[0] + mat[1] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
           WARPEDDIFF_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(mat[0] * x + mat[1] * y + mat[4],
-                                             WARPEDDIFF_PREC_BITS);
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[0] * x + mat[1] * y + mat[4],
+                                            WARPEDDIFF_PREC_BITS);
     if (subsampling_y)
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
           mat[2] * 2 * x + mat[3] * 2 * y + mat[5] +
           (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
           WARPEDDIFF_PREC_BITS + 1);
     else
-      *(proj++) = ROUNDZ_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[5],
-                                             WARPEDDIFF_PREC_BITS);
+      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[5],
+                                            WARPEDDIFF_PREC_BITS);
     points += stride_points - 2;
     proj += stride_proj - 2;
   }
@@ -357,7 +357,7 @@
     const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
     const int64_t v3 = x * (p[1] - p[-1]);
     const int64_t v4 = 2 * p[0];
-    return (int32_t)ROUNDZ_POWER_OF_TWO_SIGNED(
+    return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
         (v4 << (3 * WARPEDPIXEL_PREC_BITS)) +
         (v3 << (2 * WARPEDPIXEL_PREC_BITS)) +
         (v2 << WARPEDPIXEL_PREC_BITS) + v1,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 3ed8462..c64d57e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1217,7 +1217,7 @@
       sse = vpx_sum_squares_2d_i16(diff, diff_stride, bs);
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-        sse = ROUNDZ_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
+        sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
       sse = (int64_t)sse * 16;
 
@@ -3027,7 +3027,7 @@
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
-    tmp = ROUNDZ_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
+    tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
   *bsse += tmp * 16;
 
@@ -6664,7 +6664,7 @@
 
     mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
     sse = vp10_wedge_sse_from_residuals(r1, d10, mask, N);
-    sse = ROUNDZ_POWER_OF_TWO(sse, bd_round);
+    sse = ROUND_POWER_OF_TWO(sse, bd_round);
 
     model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
     rd =  RDCOST(x->rdmult, x->rddiv, rate, dist);
@@ -6726,7 +6726,7 @@
   for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
     mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
     sse = vp10_wedge_sse_from_residuals(r1, d10, mask, N);
-    sse = ROUNDZ_POWER_OF_TWO(sse, bd_round);
+    sse = ROUND_POWER_OF_TWO(sse, bd_round);
 
     model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
     rd =  RDCOST(x->rdmult, x->rddiv, rate, dist);
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 1dca1a8..48549ce 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -38,24 +38,15 @@
 #define __builtin_prefetch(x)
 #endif
 
-/* Shift down with rounding for use when n > 0 */
+/* Shift down with rounding for use when n >= 0, value >= 0 */
 #define ROUND_POWER_OF_TWO(value, n) \
-    (((value) + (1 << ((n) - 1))) >> (n))
+    (((value) + (((1 << (n)) >> 1))) >> (n))
 
-/* Shift down with rounding for use when n >= 0 */
-#define ROUNDZ_POWER_OF_TWO(value, n) \
-    ((n) ? (((value) + (1 << ((n) - 1))) >> (n)) : (value))
-
-/* Shift down with rounding for signed integers, for use when n > 0 */
+/* Shift down with rounding for signed integers, for use when n >= 0 */
 #define ROUND_POWER_OF_TWO_SIGNED(value, n) \
     (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
                    : ROUND_POWER_OF_TWO((value), (n)))
 
-/* Shift down with rounding for signed integers, for use when n >= 0 */
-#define ROUNDZ_POWER_OF_TWO_SIGNED(value, n) \
-    (((value) < 0) ? -ROUNDZ_POWER_OF_TWO(-(value), (n)) \
-     : ROUNDZ_POWER_OF_TWO((value), (n)))
-
 #define ALIGN_POWER_OF_TWO(value, n) \
     (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))