ext-inter: Use joint_motion_search for masked compounds
Add functions which take both components of a masked compound and
compute the resulting SAD/SSE. Extend joint_motion_search to understand
masked compounds, and use it to evaluate NEW_NEWMV modes.
Change-Id: I782199a20d119a6c61c6567df157508125ac7ce7
diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index 9fc0db7..90d0622 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -18,6 +18,7 @@
#include "aom_dsp/variance.h"
#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/blend.h"
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride) {
@@ -672,6 +673,47 @@
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_AV1 && CONFIG_EXT_INTER
+void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride,
+ const uint8_t *mask, int mask_stride,
+ int invert_mask) {
+ int i, j;
+
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ mask += mask_stride;
+ }
+}
+
+void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+ int width, int height, const uint8_t *ref,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
+ int i, j;
+ int stride = ref_stride << 3;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += stride;
+ mask += mask_stride;
+ }
+}
+
void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, const uint8_t *m, int m_stride, int w, int h,
unsigned int *sse, int *sum) {
@@ -696,13 +738,54 @@
*sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
}
-#define MASK_VAR(W, H) \
- unsigned int aom_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+void masked_compound_variance(const uint8_t *src, int src_stride,
+ const uint8_t *a, int a_stride, const uint8_t *b,
+ int b_stride, const uint8_t *m, int m_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ int64_t sum64 = 0;
+ uint64_t sse64 = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const uint8_t pred = AOM_BLEND_A64(m[j], a[j], b[j]);
+ const int diff = pred - src[j];
+ sum64 += diff;
+ sse64 += diff * diff;
+ }
+
+ src += src_stride;
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+ sum64 = (sum64 >= 0) ? sum64 : -sum64;
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
+ *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
+}
+
+#define MASK_VAR(W, H) \
+ unsigned int aom_masked_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
+ const uint8_t *m, int m_stride, unsigned int *sse) { \
+ int sum; \
+ masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+ } \
+ \
+ unsigned int aom_masked_compound_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred, const uint8_t *m, int m_stride, \
+ int invert_mask, unsigned int *sse) { \
+ int sum; \
+ if (!invert_mask) \
+ masked_compound_variance(src, src_stride, ref, ref_stride, second_pred, \
+ W, m, m_stride, W, H, sse, &sum); \
+ else \
+ masked_compound_variance(src, src_stride, second_pred, W, ref, \
+ ref_stride, m, m_stride, W, H, sse, &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
}
#define MASK_SUBPIX_VAR(W, H) \
@@ -720,6 +803,25 @@
\
return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk, \
msk_stride, sse); \
+ } \
+ \
+ unsigned int aom_masked_compound_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ aom_comp_mask_pred(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
+ invert_mask); \
+ return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
}
MASK_VAR(4, 4)
@@ -773,6 +875,51 @@
#endif // CONFIG_EXT_PARTITION
#if CONFIG_HIGHBITDEPTH
+void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
+ int width, int height, const uint8_t *ref8,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
+ int i, j;
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ mask += mask_stride;
+ }
+}
+
+void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred,
+ const uint8_t *pred8, int width,
+ int height, const uint8_t *ref8,
+ int ref_stride, const uint8_t *mask,
+ int mask_stride, int invert_mask) {
+ int i, j;
+ int stride = ref_stride << 3;
+
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ if (!invert_mask)
+ comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]);
+ else
+ comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += stride;
+ mask += mask_stride;
+ }
+}
+
void highbd_masked_variance64(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *m,
int m_stride, int w, int h, uint64_t *sse,
@@ -835,85 +982,272 @@
*sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}
-#define HIGHBD_MASK_VAR(W, H) \
- unsigned int aom_highbd_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
- &sum); \
- return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
- } \
- \
- unsigned int aom_highbd_10_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
- sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
- } \
- \
- unsigned int aom_highbd_12_masked_variance##W##x##H##_c( \
- const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
- const uint8_t *m, int m_stride, unsigned int *sse) { \
- int sum; \
- int64_t var; \
- highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
- sse, &sum); \
- var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
- return (var >= 0) ? (uint32_t)var : 0; \
+void highbd_masked_compound_variance64(const uint8_t *src8, int src_stride,
+ const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ const uint8_t *m, int m_stride, int w,
+ int h, uint64_t *sse, int64_t *sum) {
+ int i, j;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const uint16_t pred = AOM_BLEND_A64(m[j], a[j], b[j]);
+ const int diff = pred - src[j];
+ *sum += (int64_t)diff;
+ *sse += (int64_t)diff * diff;
+ }
+
+ src += src_stride;
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+ *sum = (*sum >= 0) ? *sum : -*sum;
+ *sum = ROUND_POWER_OF_TWO(*sum, 6);
+ *sse = ROUND_POWER_OF_TWO(*sse, 12);
+}
+
+void highbd_masked_compound_variance(const uint8_t *src8, int src_stride,
+ const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ const uint8_t *m, int m_stride, int w,
+ int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+ b_stride, m, m_stride, w, h, &sse64,
+ &sum64);
+ *sum = (int)sum64;
+ *sse = (unsigned int)sse64;
+}
+
+void highbd_10_masked_compound_variance(const uint8_t *src8, int src_stride,
+ const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ const uint8_t *m, int m_stride, int w,
+ int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+ b_stride, m, m_stride, w, h, &sse64,
+ &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
+}
+
+void highbd_12_masked_compound_variance(const uint8_t *src8, int src_stride,
+ const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ const uint8_t *m, int m_stride, int w,
+ int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+ b_stride, m, m_stride, w, h, &sse64,
+ &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
+}
+
+#define HIGHBD_MASK_VAR(W, H) \
+ unsigned int aom_highbd_masked_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
+ const uint8_t *m, int m_stride, unsigned int *sse) { \
+ int sum; \
+ highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
+ &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+ } \
+ \
+ unsigned int aom_highbd_10_masked_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
+ const uint8_t *m, int m_stride, unsigned int *sse) { \
+ int sum; \
+ int64_t var; \
+ highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
+ sse, &sum); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
+ } \
+ \
+ unsigned int aom_highbd_12_masked_variance##W##x##H##_c( \
+ const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, \
+ const uint8_t *m, int m_stride, unsigned int *sse) { \
+ int sum; \
+ int64_t var; \
+ highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, \
+ sse, &sum); \
+ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
+ } \
+ \
+ unsigned int aom_highbd_masked_compound_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred, const uint8_t *m, int m_stride, \
+ int invert_mask, unsigned int *sse) { \
+ int sum; \
+ if (!invert_mask) \
+ highbd_masked_compound_variance(src, src_stride, ref, ref_stride, \
+ second_pred, W, m, m_stride, W, H, sse, \
+ &sum); \
+ else \
+ highbd_masked_compound_variance(src, src_stride, second_pred, W, ref, \
+ ref_stride, m, m_stride, W, H, sse, \
+ &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+ } \
+ \
+ unsigned int aom_highbd_10_masked_compound_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred, const uint8_t *m, int m_stride, \
+ int invert_mask, unsigned int *sse) { \
+ int sum; \
+ if (!invert_mask) \
+ highbd_10_masked_compound_variance(src, src_stride, ref, ref_stride, \
+ second_pred, W, m, m_stride, W, H, \
+ sse, &sum); \
+ else \
+ highbd_10_masked_compound_variance(src, src_stride, second_pred, W, ref, \
+ ref_stride, m, m_stride, W, H, sse, \
+ &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
+ } \
+ \
+ unsigned int aom_highbd_12_masked_compound_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+ const uint8_t *second_pred, const uint8_t *m, int m_stride, \
+ int invert_mask, unsigned int *sse) { \
+ int sum; \
+ if (!invert_mask) \
+ highbd_12_masked_compound_variance(src, src_stride, ref, ref_stride, \
+ second_pred, W, m, m_stride, W, H, \
+ sse, &sum); \
+ else \
+ highbd_12_masked_compound_variance(src, src_stride, second_pred, W, ref, \
+ ref_stride, m, m_stride, W, H, sse, \
+ &sum); \
+ return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
}
-#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
- unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
- } \
- \
- unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_10_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
- } \
- \
- unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
- const uint8_t *src, int src_stride, int xoffset, int yoffset, \
- const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
- unsigned int *sse) { \
- uint16_t fdata3[(H + 1) * W]; \
- uint16_t temp2[H * W]; \
- \
- aom_highbd_var_filter_block2d_bil_first_pass( \
- src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
- aom_highbd_var_filter_block2d_bil_second_pass( \
- fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
- \
- return aom_highbd_12_masked_variance##W##x##H##_c( \
- CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
+ unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ return aom_highbd_masked_variance##W##x##H##_c( \
+ CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ return aom_highbd_10_masked_variance##W##x##H##_c( \
+ CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ return aom_highbd_12_masked_variance##W##x##H##_c( \
+ CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_masked_compound_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_10_masked_compound_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
+ } \
+ \
+ unsigned int aom_highbd_12_masked_compound_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src, int src_stride, int xoffset, int yoffset, \
+ const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
+ const uint8_t *msk, int msk_stride, int invert_mask, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
+ \
+ aom_highbd_var_filter_block2d_bil_first_pass( \
+ src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
+ aom_highbd_var_filter_block2d_bil_second_pass( \
+ fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
+ \
+ aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H, \
+ CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+ invert_mask); \
+ \
+ return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
+ ref, ref_stride, sse); \
}
HIGHBD_MASK_VAR(4, 4)