Cleanup obmc_sad function prototypes.
Name 'wsrc', 'mask' and 'pre' explicitly, rather than
using 'b', 'm' and 'a'.
Change-Id: Iaee6d1ac1211b0b05b47cf98b50570089b12d600
diff --git a/test/obmc_sad_test.cc b/test/obmc_sad_test.cc
index 9d8c2a2..95d56ae 100644
--- a/test/obmc_sad_test.cc
+++ b/test/obmc_sad_test.cc
@@ -29,7 +29,7 @@
static const int kIterations = 1000;
static const int kMaskMax = 64;
-typedef unsigned int (*ObmcSadF)(const uint8_t *ref, int ref_stride,
+typedef unsigned int (*ObmcSadF)(const uint8_t *pre, int pre_stride,
const int32_t *wsrc, const int32_t *mask);
////////////////////////////////////////////////////////////////////////////////
@@ -45,42 +45,42 @@
};
TEST_P(ObmcSadTest, RandomValues) {
- DECLARE_ALIGNED(32, uint8_t, ref[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
for (int iter = 0 ; iter < kIterations && !HasFatalFailure() ; ++iter) {
- const int ref_stride = rng_(MAX_SB_SIZE + 1);
+ const int pre_stride = rng_(MAX_SB_SIZE + 1);
for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
- ref[i] = rng_.Rand8();
+ pre[i] = rng_.Rand8();
wsrc[i] = rng_.Rand8() * rng_(kMaskMax * kMaskMax + 1);
mask[i] = rng_(kMaskMax * kMaskMax + 1);
}
- const unsigned int ref_res = ref_func_(ref, ref_stride, wsrc, mask);
- const unsigned int tst_res = tst_func_(ref, ref_stride, wsrc, mask);
+ const unsigned int ref_res = ref_func_(pre, pre_stride, wsrc, mask);
+ const unsigned int tst_res = tst_func_(pre, pre_stride, wsrc, mask);
ASSERT_EQ(ref_res, tst_res);
}
}
TEST_P(ObmcSadTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint8_t, ref[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, uint8_t, pre[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
for (int iter = 0 ; iter < MAX_SB_SIZE && !HasFatalFailure() ; ++iter) {
- const int ref_stride = iter;
+ const int pre_stride = iter;
for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
- ref[i] = UINT8_MAX;
+ pre[i] = UINT8_MAX;
wsrc[i] = UINT8_MAX * kMaskMax * kMaskMax;
mask[i] = kMaskMax * kMaskMax;
}
- const unsigned int ref_res = ref_func_(ref, ref_stride, wsrc, mask);
- const unsigned int tst_res = tst_func_(ref, ref_stride, wsrc, mask);
+ const unsigned int ref_res = ref_func_(pre, pre_stride, wsrc, mask);
+ const unsigned int tst_res = tst_func_(pre, pre_stride, wsrc, mask);
ASSERT_EQ(ref_res, tst_res);
}
@@ -126,22 +126,22 @@
};
TEST_P(ObmcSadHBDTest, RandomValues) {
- DECLARE_ALIGNED(32, uint16_t, ref[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
for (int iter = 0 ; iter < kIterations && !HasFatalFailure() ; ++iter) {
- const int ref_stride = rng_(MAX_SB_SIZE + 1);
+ const int pre_stride = rng_(MAX_SB_SIZE + 1);
for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
- ref[i] = rng_(1<<12);
+ pre[i] = rng_(1<<12);
wsrc[i] = rng_(1<<12) * rng_(kMaskMax * kMaskMax + 1);
mask[i] = rng_(kMaskMax * kMaskMax + 1);
}
- const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+ const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
wsrc, mask);
- const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+ const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
wsrc, mask);
ASSERT_EQ(ref_res, tst_res);
@@ -149,22 +149,22 @@
}
TEST_P(ObmcSadHBDTest, ExtremeValues) {
- DECLARE_ALIGNED(32, uint16_t, ref[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, uint16_t, pre[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, wsrc[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, int32_t, mask[MAX_SB_SQUARE]);
for (int iter = 0 ; iter < MAX_SB_SIZE && !HasFatalFailure() ; ++iter) {
- const int ref_stride = iter;
+ const int pre_stride = iter;
for (int i = 0 ; i < MAX_SB_SQUARE ; ++i) {
- ref[i] = (1 << 12) - 1;
+ pre[i] = (1 << 12) - 1;
wsrc[i] = ((1 << 12) - 1) * kMaskMax * kMaskMax;
mask[i] = kMaskMax * kMaskMax;
}
- const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+ const unsigned int ref_res = ref_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
wsrc, mask);
- const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(ref), ref_stride,
+ const unsigned int tst_res = tst_func_(CONVERT_TO_BYTEPTR(pre), pre_stride,
wsrc, mask);
ASSERT_EQ(ref_res, tst_res);
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index bb1daf8..e64dae3 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -452,23 +452,23 @@
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
#if CONFIG_VP10 && CONFIG_OBMC
-// a: pred
-// b: target weighted prediction (has been *4096 to keep precision)
-// m: 2d weights (scaled by 4096)
-static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
- const int32_t *b,
- const int32_t *m,
+// pre: predictor being evaluated
+// wsrc: target weighted prediction (has been *4096 to keep precision)
+// mask: 2d weights (scaled by 4096)
+static INLINE unsigned int obmc_sad(const uint8_t *pre, int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
int width, int height) {
int y, x;
unsigned int sad = 0;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++)
- sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
+ sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
- a += a_stride;
- b += width;
- m += width;
+ pre += pre_stride;
+ wsrc += width;
+ mask += width;
}
return sad;
@@ -477,8 +477,8 @@
#define OBMCSADMxN(m, n) \
unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
const int32_t *wsrc, \
- const int32_t *msk) { \
- return obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
+ const int32_t *mask) { \
+ return obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
}
#if CONFIG_EXT_PARTITION
@@ -501,21 +501,21 @@
OBMCSADMxN(4, 4)
#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
- const int32_t *b,
- const int32_t *m,
+static INLINE unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
int width, int height) {
int y, x;
unsigned int sad = 0;
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++)
- sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
+ sad += ROUND_POWER_OF_TWO(abs(wsrc[x] - pre[x] * mask[x]), 12);
- a += a_stride;
- b += width;
- m += width;
+ pre += pre_stride;
+ wsrc += width;
+ mask += width;
}
return sad;
@@ -525,8 +525,8 @@
unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
int ref_stride, \
const int32_t *wsrc, \
- const int32_t *msk) { \
- return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
+ const int32_t *mask) { \
+ return highbd_obmc_sad(ref, ref_stride, wsrc, mask, m, n); \
}
#if CONFIG_EXT_PARTITION
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 02c8727..d8055e9 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1124,14 +1124,14 @@
if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
- add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
+ add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
specialize "vpx_obmc_sad${w}x${h}", qw/sse4_1/;
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
- add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
+ add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
specialize "vpx_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
}
}
diff --git a/vpx_dsp/x86/obmc_sad_sse4.c b/vpx_dsp/x86/obmc_sad_sse4.c
index 57e1428..de12e1d 100644
--- a/vpx_dsp/x86/obmc_sad_sse4.c
+++ b/vpx_dsp/x86/obmc_sad_sse4.c
@@ -21,26 +21,28 @@
// 8 bit
////////////////////////////////////////////////////////////////////////////////
-static INLINE unsigned int obmc_sad_w4(const uint8_t *a, const int a_stride,
- const int32_t *b, const int32_t *m,
+static INLINE unsigned int obmc_sad_w4(const uint8_t *pre,
+ const int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
const int height) {
- const int a_step = a_stride - 4;
+ const int pre_step = pre_stride - 4;
int n = 0;
__m128i v_sad_d = _mm_setzero_si128();
do {
- const __m128i v_a_b = xx_loadl_32(a + n);
- const __m128i v_m_d = xx_load_128(m + n);
- const __m128i v_b_d = xx_load_128(b + n);
+ const __m128i v_p_b = xx_loadl_32(pre + n);
+ const __m128i v_m_d = xx_load_128(mask + n);
+ const __m128i v_w_d = xx_load_128(wsrc + n);
- const __m128i v_a_d = _mm_cvtepu8_epi32(v_a_b);
+ const __m128i v_p_d = _mm_cvtepu8_epi32(v_p_b);
- // Values in both a and m fit in 15 bits, and are packed at 32 bit
+ // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
- const __m128i v_am_d = _mm_madd_epi16(v_a_d, v_m_d);
+ const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
- const __m128i v_diff_d = _mm_sub_epi32(v_b_d, v_am_d);
+ const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
// Rounded absolute difference
@@ -51,39 +53,42 @@
n += 4;
if (n % 4 == 0)
- a += a_step;
+ pre += pre_step;
} while (n < 4 * height);
return xx_hsum_epi32_si32(v_sad_d);
}
-static INLINE unsigned int obmc_sad_w8n(const uint8_t *a, const int a_stride,
- const int32_t *b, const int32_t *m,
- const int width, const int height) {
- const int a_step = a_stride - width;
+static INLINE unsigned int obmc_sad_w8n(const uint8_t *pre,
+ const int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
+ const int width,
+ const int height) {
+ const int pre_step = pre_stride - width;
int n = 0;
__m128i v_sad_d = _mm_setzero_si128();
assert(width >= 8 && (width & (width - 1)) == 0);
do {
- const __m128i v_a1_b = xx_loadl_32(a + n + 4);
- const __m128i v_m1_d = xx_load_128(m + n + 4);
- const __m128i v_b1_d = xx_load_128(b + n + 4);
- const __m128i v_a0_b = xx_loadl_32(a + n);
- const __m128i v_m0_d = xx_load_128(m + n);
- const __m128i v_b0_d = xx_load_128(b + n);
+ const __m128i v_p1_b = xx_loadl_32(pre + n + 4);
+ const __m128i v_m1_d = xx_load_128(mask + n + 4);
+ const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
+ const __m128i v_p0_b = xx_loadl_32(pre + n);
+ const __m128i v_m0_d = xx_load_128(mask + n);
+ const __m128i v_w0_d = xx_load_128(wsrc + n);
- const __m128i v_a0_d = _mm_cvtepu8_epi32(v_a0_b);
- const __m128i v_a1_d = _mm_cvtepu8_epi32(v_a1_b);
+ const __m128i v_p0_d = _mm_cvtepu8_epi32(v_p0_b);
+ const __m128i v_p1_d = _mm_cvtepu8_epi32(v_p1_b);
- // Values in both a and m fit in 15 bits, and are packed at 32 bit
+ // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
- const __m128i v_am0_d = _mm_madd_epi16(v_a0_d, v_m0_d);
- const __m128i v_am1_d = _mm_madd_epi16(v_a1_d, v_m1_d);
+ const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
+ const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
- const __m128i v_diff0_d = _mm_sub_epi32(v_b0_d, v_am0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_b1_d, v_am1_d);
+ const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
+ const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
@@ -97,21 +102,21 @@
n += 8;
if (n % width == 0)
- a += a_step;
+ pre += pre_step;
} while (n < width * height);
return xx_hsum_epi32_si32(v_sad_d);
}
#define OBMCSADWXH(w, h) \
-unsigned int vpx_obmc_sad##w##x##h##_sse4_1(const uint8_t *ref, \
- int ref_stride, \
+unsigned int vpx_obmc_sad##w##x##h##_sse4_1(const uint8_t *pre, \
+ int pre_stride, \
const int32_t *wsrc, \
const int32_t *msk) { \
if (w == 4) \
- return obmc_sad_w4(ref, ref_stride, wsrc, msk, h); \
+ return obmc_sad_w4(pre, pre_stride, wsrc, msk, h); \
else \
- return obmc_sad_w8n(ref, ref_stride, wsrc, msk, w, h); \
+ return obmc_sad_w8n(pre, pre_stride, wsrc, msk, w, h); \
}
#if CONFIG_EXT_PARTITION
@@ -138,28 +143,29 @@
////////////////////////////////////////////////////////////////////////////////
#if CONFIG_VP9_HIGHBITDEPTH
-static INLINE unsigned int hbd_obmc_sad_w4(const uint8_t *a8,
- const int a_stride,
- const int32_t *b, const int32_t *m,
+static INLINE unsigned int hbd_obmc_sad_w4(const uint8_t *pre8,
+ const int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
const int height) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const int a_step = a_stride - 4;
+ const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
+ const int pre_step = pre_stride - 4;
int n = 0;
__m128i v_sad_d = _mm_setzero_si128();
do {
- const __m128i v_a_w = xx_loadl_64(a + n);
- const __m128i v_m_d = xx_load_128(m + n);
- const __m128i v_b_d = xx_load_128(b + n);
+ const __m128i v_p_w = xx_loadl_64(pre + n);
+ const __m128i v_m_d = xx_load_128(mask + n);
+ const __m128i v_w_d = xx_load_128(wsrc + n);
- const __m128i v_a_d = _mm_cvtepu16_epi32(v_a_w);
+ const __m128i v_p_d = _mm_cvtepu16_epi32(v_p_w);
- // Values in both a and m fit in 15 bits, and are packed at 32 bit
+ // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
- const __m128i v_am_d = _mm_madd_epi16(v_a_d, v_m_d);
+ const __m128i v_pm_d = _mm_madd_epi16(v_p_d, v_m_d);
- const __m128i v_diff_d = _mm_sub_epi32(v_b_d, v_am_d);
+ const __m128i v_diff_d = _mm_sub_epi32(v_w_d, v_pm_d);
const __m128i v_absdiff_d = _mm_abs_epi32(v_diff_d);
// Rounded absolute difference
@@ -170,41 +176,43 @@
n += 4;
if (n % 4 == 0)
- a += a_step;
+ pre += pre_step;
} while (n < 4 * height);
return xx_hsum_epi32_si32(v_sad_d);
}
-static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *a8,
- const int a_stride,
- const int32_t *b, const int32_t *m,
- const int width, const int height) {
- const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- const int a_step = a_stride - width;
+static INLINE unsigned int hbd_obmc_sad_w8n(const uint8_t *pre8,
+ const int pre_stride,
+ const int32_t *wsrc,
+ const int32_t *mask,
+ const int width,
+ const int height) {
+ const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
+ const int pre_step = pre_stride - width;
int n = 0;
__m128i v_sad_d = _mm_setzero_si128();
assert(width >= 8 && (width & (width - 1)) == 0);
do {
- const __m128i v_a1_w = xx_loadl_64(a + n + 4);
- const __m128i v_m1_d = xx_load_128(m + n + 4);
- const __m128i v_b1_d = xx_load_128(b + n + 4);
- const __m128i v_a0_w = xx_loadl_64(a + n);
- const __m128i v_m0_d = xx_load_128(m + n);
- const __m128i v_b0_d = xx_load_128(b + n);
+ const __m128i v_p1_w = xx_loadl_64(pre + n + 4);
+ const __m128i v_m1_d = xx_load_128(mask + n + 4);
+ const __m128i v_w1_d = xx_load_128(wsrc + n + 4);
+ const __m128i v_p0_w = xx_loadl_64(pre + n);
+ const __m128i v_m0_d = xx_load_128(mask + n);
+ const __m128i v_w0_d = xx_load_128(wsrc + n);
- const __m128i v_a0_d = _mm_cvtepu16_epi32(v_a0_w);
- const __m128i v_a1_d = _mm_cvtepu16_epi32(v_a1_w);
+ const __m128i v_p0_d = _mm_cvtepu16_epi32(v_p0_w);
+ const __m128i v_p1_d = _mm_cvtepu16_epi32(v_p1_w);
- // Values in both a and m fit in 15 bits, and are packed at 32 bit
+ // Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
- const __m128i v_am0_d = _mm_madd_epi16(v_a0_d, v_m0_d);
- const __m128i v_am1_d = _mm_madd_epi16(v_a1_d, v_m1_d);
+ const __m128i v_pm0_d = _mm_madd_epi16(v_p0_d, v_m0_d);
+ const __m128i v_pm1_d = _mm_madd_epi16(v_p1_d, v_m1_d);
- const __m128i v_diff0_d = _mm_sub_epi32(v_b0_d, v_am0_d);
- const __m128i v_diff1_d = _mm_sub_epi32(v_b1_d, v_am1_d);
+ const __m128i v_diff0_d = _mm_sub_epi32(v_w0_d, v_pm0_d);
+ const __m128i v_diff1_d = _mm_sub_epi32(v_w1_d, v_pm1_d);
const __m128i v_absdiff0_d = _mm_abs_epi32(v_diff0_d);
const __m128i v_absdiff1_d = _mm_abs_epi32(v_diff1_d);
@@ -218,21 +226,21 @@
n += 8;
if (n % width == 0)
- a += a_step;
+ pre += pre_step;
} while (n < width * height);
return xx_hsum_epi32_si32(v_sad_d);
}
#define HBD_OBMCSADWXH(w, h) \
-unsigned int vpx_highbd_obmc_sad##w##x##h##_sse4_1(const uint8_t *ref, \
- int ref_stride, \
+unsigned int vpx_highbd_obmc_sad##w##x##h##_sse4_1(const uint8_t *pre, \
+ int pre_stride, \
const int32_t *wsrc, \
- const int32_t *msk) { \
+ const int32_t *mask) { \
if (w == 4) \
- return hbd_obmc_sad_w4(ref, ref_stride, wsrc, msk, h); \
+ return hbd_obmc_sad_w4(pre, pre_stride, wsrc, mask, h); \
else \
- return hbd_obmc_sad_w8n(ref, ref_stride, wsrc, msk, w, h); \
+ return hbd_obmc_sad_w8n(pre, pre_stride, wsrc, mask, w, h); \
}
#if CONFIG_EXT_PARTITION