Cleanup obmc_sad function prototypes.

Name 'wsrc', 'mask' and 'pre' explicitly, rather than
using 'b', 'm' and 'a'.

Change-Id: Iaee6d1ac1211b0b05b47cf98b50570089b12d600
diff --git a/test/obmc_sad_test.cc b/test/obmc_sad_test.cc
index 9d8c2a2..95d56ae 100644
--- a/test/obmc_sad_test.cc
+++ b/test/obmc_sad_test.cc
@@ -29,7 +29,7 @@
 static const int kIterations = 1000;
 static const int kMaskMax = 64;
 
-typedef unsigned int (*ObmcSadF)(const uint8_t *ref, int ref_stride,
+typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride,
                                  const int32_t *wsrc, const int32_t *mask);
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -45,42 +45,42 @@
 };
 
 TEST_P(ObmcSadTest, RandomValues) {
-  DECLARE_ALIGNED(32, uint8_t, ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
 
   for (int iter = 0 ; iter < kIterations && !HasFatalFailure() ; ++iter) {
-    const int ref_stride = rng_(MAX_SB_SIZE + 1);
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
 
     for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
-      ref[i] = rng_.Rand8();
+      pre[i] = rng_.Rand8();
       wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1);
       mask[i] = rng_(kMaskMax * kMaskMax + 1);
     }
 
-    const unsigned int ref_res = ref_func_(ref, ref_stride, wsrc, mask);
-    const unsigned int tst_res = tst_func_(ref, ref_stride, wsrc, mask);
+    const unsigned int ref_res = ref_func_(pre, pre_stride, wsrc, mask);
+    const unsigned int tst_res = tst_func_(pre, pre_stride, wsrc, mask);
 
     ASSERT_EQ(ref_res, tst_res);
   }
 }
 
 TEST_P(ObmcSadTest, ExtremeValues) {
-  DECLARE_ALIGNED(32, uint8_t, ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
 
   for (int iter = 0 ; iter < MAX_SB_SIZE && !HasFatalFailure() ; ++iter) {
-    const int ref_stride = iter;
+    const int pre_stride = iter;
 
     for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
-      ref[i] = UINT8_MAX;
+      pre[i] = UINT8_MAX;
       wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
       mask[i] = kMaskMax * kMaskMax;
     }
 
-    const unsigned int ref_res = ref_func_(ref, ref_stride, wsrc, mask);
-    const unsigned int tst_res = tst_func_(ref, ref_stride, wsrc, mask);
+    const unsigned int ref_res = ref_func_(pre, pre_stride, wsrc, mask);
+    const unsigned int tst_res = tst_func_(pre, pre_stride, wsrc, mask);
 
     ASSERT_EQ(ref_res, tst_res);
   }
@@ -126,22 +126,22 @@
 };
 
 TEST_P(ObmcSadHBDTest, RandomValues) {
-  DECLARE_ALIGNED(32, uint16_t, ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
 
   for (int iter = 0 ; iter < kIterations && !HasFatalFailure() ; ++iter) {
-    const int ref_stride = rng_(MAX_SB_SIZE + 1);
+    const int pre_stride = rng_(MAX_SB_SIZE + 1);
 
     for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
-      ref[i] = rng_(1<<12);
+      pre[i] = rng_(1<<12);
       wsrc[i] = rng_(1<<12) * rng_(kMaskMax * kMaskMax + 1);
       mask[i] = rng_(kMaskMax * kMaskMax + 1);
     }
 
-    const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+    const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
                                            wsrc, mask);
-    const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+    const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
                                            wsrc, mask);
 
     ASSERT_EQ(ref_res, tst_res);
@@ -149,22 +149,22 @@
 }
 
 TEST_P(ObmcSadHBDTest, ExtremeValues) {
-  DECLARE_ALIGNED(32, uint16_t, ref[MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
   DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
 
   for (int iter = 0 ; iter < MAX_SB_SIZE && !HasFatalFailure() ; ++iter) {
-    const int ref_stride = iter;
+    const int pre_stride = iter;
 
     for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
-      ref[i] = (1 << 12) - 1;
+      pre[i] = (1 << 12) - 1;
       wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax;
       mask[i] = kMaskMax * kMaskMax;
     }
 
-    const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+    const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
                                            wsrc, mask);
-    const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+    const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
                                            wsrc, mask);
 
     ASSERT_EQ(ref_res, tst_res);
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index bb1daf8..e64dae3 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -452,23 +452,23 @@
 #endif  // CONFIG_VP10 && CONFIG_EXT_INTER
 
 #if CONFIG_VP10 && CONFIG_OBMC
-// a: pred
-// b: target weighted prediction (has been *4096 to keep precision)
-// m: 2d weights (scaled by 4096)
-static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
-                                    const int32_t *b,
-                                    const int32_t *m,
+// pre: predictor being evaluated
+// wsrc: target weighted prediction (has been *4096 to keep precision)
+// mask: 2d weights (scaled by 4096)
+static INLINE unsigned int obmc_sad(const uint8_t *pre, int pre_stride,
+                                    const int32_t *wsrc,
+                                    const int32_t *mask,
                                     int width, int height) {
   int y, x;
   unsigned int sad = 0;
 
   for (y = 0; y < height; y++) {
     for (x = 0; x < width; x++)
-      sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
+      sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
 
-    a += a_stride;
-    b += width;
-    m += width;
+    pre += pre_stride;
+    wsrc += width;
+    mask += width;
   }
 
   return sad;
@@ -477,8 +477,8 @@
 #define OBMCSADMxN(m, n)                                                      \
 unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride,    \
                                        const int32_t *wsrc,                   \
-                                       const int32_t *msk) {                  \
-  return obmc_sad(ref, ref_stride, wsrc, msk, m, n);                          \
+                                       const int32_t *mask) {                 \
+  return obmc_sad(ref, ref_stride, wsrc, mask, m, n);                         \
 }
 
 #if CONFIG_EXT_PARTITION
@@ -501,21 +501,21 @@
 OBMCSADMxN(4, 4)
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
-                                           const int32_t *b,
-                                           const int32_t *m,
+static INLINE unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
+                                           const int32_t *wsrc,
+                                           const int32_t *mask,
                                            int width, int height) {
   int y, x;
   unsigned int sad = 0;
-  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
 
   for (y = 0; y < height; y++) {
     for (x = 0; x < width; x++)
-      sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
+      sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
 
-    a += a_stride;
-    b += width;
-    m += width;
+    pre += pre_stride;
+    wsrc += width;
+    mask += width;
   }
 
   return sad;
@@ -525,8 +525,8 @@
 unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref,             \
                                               int ref_stride,                 \
                                               const int32_t *wsrc,            \
-                                              const int32_t *msk) {           \
-  return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n);                   \
+                                              const int32_t *mask) {          \
+  return highbd_obmc_sad(ref, ref_stride, wsrc, mask, m, n);                  \
 }
 
 #if CONFIG_EXT_PARTITION
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 02c8727..d8055e9 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1124,14 +1124,14 @@
 if (vpx_config("CONFIG_OBMC") eq "yes") {
   foreach (@block_sizes) {
     ($w, $h) = @$_;
-    add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
+    add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
     specialize "vpx_obmc_sad${w}x${h}", qw/sse4_1/;
   }
 
   if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
     foreach (@block_sizes) {
       ($w, $h) = @$_;
-      add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
+      add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
       specialize "vpx_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
     }
   }
diff --git a/vpx_dsp/x86/obmc_sad_sse4.c b/vpx_dsp/x86/obmc_sad_sse4.c
index 57e1428..de12e1d 100644
--- a/vpx_dsp/x86/obmc_sad_sse4.c
+++ b/vpx_dsp/x86/obmc_sad_sse4.c
@@ -21,26 +21,28 @@
 // 8 bit
 ////////////////////////////////////////////////////////////////////////////////
 
-static INLINE unsigned int obmc_sad_w4(const uint8_t *a, const int a_stride,
-                                       const int32_t *b, const int32_t *m,
+static INLINE unsigned int obmc_sad_w4(const uint8_t *pre,
+                                       const int pre_stride,
+                                       const int32_t *wsrc,
+                                       const int32_t *mask,
                                        const int height) {
-  const int a_step = a_stride - 4;
+  const int pre_step = pre_stride - 4;
   int n = 0;
   __m128i v_sad_d = _mm_setzero_si128();
 
   do {
-    const __m128i v_a_b = xx_loadl_32(a + n);
-    const __m128i v_m_d = xx_load_128(m + n);
-    const __m128i v_b_d = xx_load_128(b + n);
+    const __m128i v_p_b = xx_loadl_32(pre + n);
+    const __m128i v_m_d = xx_load_128(mask + n);
+    const __m128i v_w_d = xx_load_128(wsrc + n);
 
-    const __m128i v_a_d = _mm_cvtepu8_epi32(v_a_b);
+    const __m128i v_p_d = _mm_cvtepu8_epi32(v_p_b);
 
-    // Values in both a and m fit in 15 bits, and are packed at 32 bit
+    // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
     // boundaries. We use pmaddwd, as it has lower latency on Haswell
     // than pmulld but produces the same result with these inputs.
-    const __m128i v_am_d = _mm_madd_epi16(v_a_d, v_m_d);
+    const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
 
-    const __m128i v_diff_d = _mm_sub_epi32(v_b_d, v_am_d);
+    const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
     const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
 
     // Rounded absolute difference
@@ -51,39 +53,42 @@
     n += 4;
 
     if (n % 4 == 0)
-      a += a_step;
+      pre += pre_step;
   } while (n < 4 * height);
 
   return xx_hsum_epi32_si32(v_sad_d);
 }
 
-static INLINE unsigned int obmc_sad_w8n(const uint8_t *a, const int a_stride,
-                                        const int32_t *b, const int32_t *m,
-                                        const int width, const int height) {
-  const int a_step = a_stride - width;
+static INLINE unsigned int obmc_sad_w8n(const uint8_t *pre,
+                                        const int pre_stride,
+                                        const int32_t *wsrc,
+                                        const int32_t *mask,
+                                        const int width,
+                                        const int height) {
+  const int pre_step = pre_stride - width;
   int n = 0;
   __m128i v_sad_d = _mm_setzero_si128();
   assert(width >= 8 && (width & (width - 1)) == 0);
 
   do {
-    const __m128i v_a1_b = xx_loadl_32(a + n + 4);
-    const __m128i v_m1_d = xx_load_128(m + n + 4);
-    const __m128i v_b1_d = xx_load_128(b + n + 4);
-    const __m128i v_a0_b = xx_loadl_32(a + n);
-    const __m128i v_m0_d = xx_load_128(m + n);
-    const __m128i v_b0_d = xx_load_128(b + n);
+    const __m128i v_p1_b = xx_loadl_32(pre + n + 4);
+    const __m128i v_m1_d = xx_load_128(mask + n + 4);
+    const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
+    const __m128i v_p0_b = xx_loadl_32(pre + n);
+    const __m128i v_m0_d = xx_load_128(mask + n);
+    const __m128i v_w0_d = xx_load_128(wsrc + n);
 
-    const __m128i v_a0_d = _mm_cvtepu8_epi32(v_a0_b);
-    const __m128i v_a1_d = _mm_cvtepu8_epi32(v_a1_b);
+    const __m128i v_p0_d = _mm_cvtepu8_epi32(v_p0_b);
+    const __m128i v_p1_d = _mm_cvtepu8_epi32(v_p1_b);
 
-    // Values in both a and m fit in 15 bits, and are packed at 32 bit
+    // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
     // boundaries. We use pmaddwd, as it has lower latency on Haswell
     // than pmulld but produces the same result with these inputs.
-    const __m128i v_am0_d = _mm_madd_epi16(v_a0_d, v_m0_d);
-    const __m128i v_am1_d = _mm_madd_epi16(v_a1_d, v_m1_d);
+    const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
+    const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
 
-    const __m128i v_diff0_d = _mm_sub_epi32(v_b0_d, v_am0_d);
-    const __m128i v_diff1_d = _mm_sub_epi32(v_b1_d, v_am1_d);
+    const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
+    const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
     const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
     const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
 
@@ -97,21 +102,21 @@
     n += 8;
 
     if (n % width == 0)
-      a += a_step;
+      pre += pre_step;
   } while (n < width * height);
 
   return xx_hsum_epi32_si32(v_sad_d);
 }
 
 #define OBMCSADWXH(w, h)                                                      \
-unsigned int vpx_obmc_sad##w##x##h##_sse4_1(const uint8_t *ref,               \
-                                            int ref_stride,                   \
+unsigned int vpx_obmc_sad##w##x##h##_sse4_1(const uint8_t *pre,               \
+                                            int pre_stride,                   \
                                             const int32_t *wsrc,              \
                                             const int32_t *msk) {             \
   if (w == 4)                                                                 \
-    return obmc_sad_w4(ref, ref_stride, wsrc, msk, h);                        \
+    return obmc_sad_w4(pre, pre_stride, wsrc, msk, h);                        \
   else                                                                        \
-    return obmc_sad_w8n(ref, ref_stride, wsrc, msk, w, h);                    \
+    return obmc_sad_w8n(pre, pre_stride, wsrc, msk, w, h);                    \
 }
 
 #if CONFIG_EXT_PARTITION
@@ -138,28 +143,29 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static INLINE unsigned int hbd_obmc_sad_w4(const uint8_t *a8,
-                                           const int a_stride,
-                                           const int32_t *b, const int32_t *m,
+static INLINE unsigned int hbd_obmc_sad_w4(const uint8_t *pre8,
+                                           const int pre_stride,
+                                           const int32_t *wsrc,
+                                           const int32_t *mask,
                                            const int height) {
-  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
-  const int a_step = a_stride - 4;
+  const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
+  const int pre_step = pre_stride - 4;
   int n = 0;
   __m128i v_sad_d = _mm_setzero_si128();
 
   do {
-    const __m128i v_a_w = xx_loadl_64(a + n);
-    const __m128i v_m_d = xx_load_128(m + n);
-    const __m128i v_b_d = xx_load_128(b + n);
+    const __m128i v_p_w = xx_loadl_64(pre + n);
+    const __m128i v_m_d = xx_load_128(mask + n);
+    const __m128i v_w_d = xx_load_128(wsrc + n);
 
-    const __m128i v_a_d = _mm_cvtepu16_epi32(v_a_w);
+    const __m128i v_p_d = _mm_cvtepu16_epi32(v_p_w);
 
-    // Values in both a and m fit in 15 bits, and are packed at 32 bit
+    // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
     // boundaries. We use pmaddwd, as it has lower latency on Haswell
     // than pmulld but produces the same result with these inputs.
-    const __m128i v_am_d = _mm_madd_epi16(v_a_d, v_m_d);
+    const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
 
-    const __m128i v_diff_d = _mm_sub_epi32(v_b_d, v_am_d);
+    const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
     const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
 
     // Rounded absolute difference
@@ -170,41 +176,43 @@
     n += 4;
 
     if (n % 4 == 0)
-      a += a_step;
+      pre += pre_step;
   } while (n < 4 * height);
 
   return xx_hsum_epi32_si32(v_sad_d);
 }
 
-static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *a8,
-                                            const int a_stride,
-                                            const int32_t *b, const int32_t *m,
-                                            const int width, const int height) {
-  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
-  const int a_step = a_stride - width;
+static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *pre8,
+                                            const int pre_stride,
+                                            const int32_t *wsrc,
+                                            const int32_t *mask,
+                                            const int width,
+                                            const int height) {
+  const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
+  const int pre_step = pre_stride - width;
   int n = 0;
   __m128i v_sad_d = _mm_setzero_si128();
   assert(width >= 8 && (width & (width - 1)) == 0);
 
   do {
-    const __m128i v_a1_w = xx_loadl_64(a + n + 4);
-    const __m128i v_m1_d = xx_load_128(m + n + 4);
-    const __m128i v_b1_d = xx_load_128(b + n + 4);
-    const __m128i v_a0_w = xx_loadl_64(a + n);
-    const __m128i v_m0_d = xx_load_128(m + n);
-    const __m128i v_b0_d = xx_load_128(b + n);
+    const __m128i v_p1_w = xx_loadl_64(pre + n + 4);
+    const __m128i v_m1_d = xx_load_128(mask + n + 4);
+    const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
+    const __m128i v_p0_w = xx_loadl_64(pre + n);
+    const __m128i v_m0_d = xx_load_128(mask + n);
+    const __m128i v_w0_d = xx_load_128(wsrc + n);
 
-    const __m128i v_a0_d = _mm_cvtepu16_epi32(v_a0_w);
-    const __m128i v_a1_d = _mm_cvtepu16_epi32(v_a1_w);
+    const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w);
+    const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w);
 
-    // Values in both a and m fit in 15 bits, and are packed at 32 bit
+    // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
     // boundaries. We use pmaddwd, as it has lower latency on Haswell
     // than pmulld but produces the same result with these inputs.
-    const __m128i v_am0_d = _mm_madd_epi16(v_a0_d, v_m0_d);
-    const __m128i v_am1_d = _mm_madd_epi16(v_a1_d, v_m1_d);
+    const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
+    const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
 
-    const __m128i v_diff0_d = _mm_sub_epi32(v_b0_d, v_am0_d);
-    const __m128i v_diff1_d = _mm_sub_epi32(v_b1_d, v_am1_d);
+    const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
+    const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
     const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
     const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
 
@@ -218,21 +226,21 @@
     n += 8;
 
     if (n % width == 0)
-      a += a_step;
+      pre += pre_step;
   } while (n < width * height);
 
   return xx_hsum_epi32_si32(v_sad_d);
 }
 
 #define HBD_OBMCSADWXH(w, h)                                                  \
-unsigned int vpx_highbd_obmc_sad##w##x##h##_sse4_1(const uint8_t *ref,        \
-                                                   int ref_stride,            \
+unsigned int vpx_highbd_obmc_sad##w##x##h##_sse4_1(const uint8_t *pre,        \
+                                                   int pre_stride,            \
                                                    const int32_t *wsrc,       \
-                                                   const int32_t *msk) {      \
+                                                   const int32_t *mask) {     \
   if (w == 4)                                                                 \
-    return hbd_obmc_sad_w4(ref, ref_stride, wsrc, msk, h);                    \
+    return hbd_obmc_sad_w4(pre, pre_stride, wsrc, mask, h);                   \
   else                                                                        \
-    return hbd_obmc_sad_w8n(ref, ref_stride, wsrc, msk, w, h);                \
+    return hbd_obmc_sad_w8n(pre, pre_stride, wsrc, mask, w, h);               \
 }
 
 #if CONFIG_EXT_PARTITION