ext-inter: Use joint_motion_search for masked compounds

Add functions which take both components of a masked compound and
compute the resulting SAD/SSE. Extend joint_motion_search to understand
masked compounds, and use it to evaluate NEW_NEWMV modes.

Change-Id: I782199a20d119a6c61c6567df157508125ac7ce7
diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index 9fc0db7..90d0622 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -18,6 +18,7 @@
 
 #include "aom_dsp/variance.h"
 #include "aom_dsp/aom_filter.h"
+#include "aom_dsp/blend.h"
 
 uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
                             int b_stride) {
@@ -672,6 +673,47 @@
 #endif  // CONFIG_HIGHBITDEPTH
 
 #if CONFIG_AV1 && CONFIG_EXT_INTER
+void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+                          int height, const uint8_t *ref, int ref_stride,
+                          const uint8_t *mask, int mask_stride,
+                          int invert_mask) {
+  int i, j;
+
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      if (!invert_mask)
+        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+      else
+        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+    mask += mask_stride;
+  }
+}
+
+void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
+                                    int width, int height, const uint8_t *ref,
+                                    int ref_stride, const uint8_t *mask,
+                                    int mask_stride, int invert_mask) {
+  int i, j;
+  int stride = ref_stride << 3;
+
+  for (i = 0; i < height; i++) {
+    for (j = 0; j < width; j++) {
+      if (!invert_mask)
+        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[(j << 3)], pred[j]);
+      else
+        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[(j << 3)]);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += stride;
+    mask += mask_stride;
+  }
+}
+
 void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
                      int b_stride, const uint8_t *m, int m_stride, int w, int h,
                      unsigned int *sse, int *sum) {
@@ -696,13 +738,54 @@
   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
 }
 
-#define MASK_VAR(W, H)                                                       \
-  unsigned int aom_masked_variance##W##x##H##_c(                             \
-      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
-      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
-    int sum;                                                                 \
-    masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
-    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));            \
+void masked_compound_variance(const uint8_t *src, int src_stride,
+                              const uint8_t *a, int a_stride, const uint8_t *b,
+                              int b_stride, const uint8_t *m, int m_stride,
+                              int w, int h, unsigned int *sse, int *sum) {
+  int i, j;
+
+  int64_t sum64 = 0;
+  uint64_t sse64 = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const uint8_t pred = AOM_BLEND_A64(m[j], a[j], b[j]);
+      const int diff = pred - src[j];
+      sum64 += diff;
+      sse64 += diff * diff;
+    }
+
+    src += src_stride;
+    a += a_stride;
+    b += b_stride;
+    m += m_stride;
+  }
+  sum64 = (sum64 >= 0) ? sum64 : -sum64;
+  *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
+  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
+}
+
+#define MASK_VAR(W, H)                                                        \
+  unsigned int aom_masked_variance##W##x##H##_c(                              \
+      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,         \
+      const uint8_t *m, int m_stride, unsigned int *sse) {                    \
+    int sum;                                                                  \
+    masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum);  \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));             \
+  }                                                                           \
+                                                                              \
+  unsigned int aom_masked_compound_variance##W##x##H##_c(                     \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+      const uint8_t *second_pred, const uint8_t *m, int m_stride,             \
+      int invert_mask, unsigned int *sse) {                                   \
+    int sum;                                                                  \
+    if (!invert_mask)                                                         \
+      masked_compound_variance(src, src_stride, ref, ref_stride, second_pred, \
+                               W, m, m_stride, W, H, sse, &sum);              \
+    else                                                                      \
+      masked_compound_variance(src, src_stride, second_pred, W, ref,          \
+                               ref_stride, m, m_stride, W, H, sse, &sum);     \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));             \
   }
 
 #define MASK_SUBPIX_VAR(W, H)                                                 \
@@ -720,6 +803,25 @@
                                                                               \
     return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk,   \
                                             msk_stride, sse);                 \
+  }                                                                           \
+                                                                              \
+  unsigned int aom_masked_compound_sub_pixel_variance##W##x##H##_c(           \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
+      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,         \
+      const uint8_t *msk, int msk_stride, int invert_mask,                    \
+      unsigned int *sse) {                                                    \
+    uint16_t fdata3[(H + 1) * W];                                             \
+    uint8_t temp2[H * W];                                                     \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                               \
+                                                                              \
+    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
+                                      bilinear_filters_2t[xoffset]);          \
+    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
+                                       bilinear_filters_2t[yoffset]);         \
+                                                                              \
+    aom_comp_mask_pred(temp3, second_pred, W, H, temp2, W, msk, msk_stride,   \
+                       invert_mask);                                          \
+    return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);         \
   }
 
 MASK_VAR(4, 4)
@@ -773,6 +875,51 @@
 #endif  // CONFIG_EXT_PARTITION
 
 #if CONFIG_HIGHBITDEPTH
+void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
+                                 int width, int height, const uint8_t *ref8,
+                                 int ref_stride, const uint8_t *mask,
+                                 int mask_stride, int invert_mask) {
+  int i, j;
+  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      if (!invert_mask)
+        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
+      else
+        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+    mask += mask_stride;
+  }
+}
+
+void aom_highbd_comp_mask_upsampled_pred_c(uint16_t *comp_pred,
+                                           const uint8_t *pred8, int width,
+                                           int height, const uint8_t *ref8,
+                                           int ref_stride, const uint8_t *mask,
+                                           int mask_stride, int invert_mask) {
+  int i, j;
+  int stride = ref_stride << 3;
+
+  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      if (!invert_mask)
+        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j << 3], pred[j]);
+      else
+        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j << 3]);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += stride;
+    mask += mask_stride;
+  }
+}
+
 void highbd_masked_variance64(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, const uint8_t *m,
                               int m_stride, int w, int h, uint64_t *sse,
@@ -835,85 +982,272 @@
   *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
 }
 
-#define HIGHBD_MASK_VAR(W, H)                                                \
-  unsigned int aom_highbd_masked_variance##W##x##H##_c(                      \
-      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
-      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
-    int sum;                                                                 \
-    highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
-                           &sum);                                            \
-    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));            \
-  }                                                                          \
-                                                                             \
-  unsigned int aom_highbd_10_masked_variance##W##x##H##_c(                   \
-      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
-      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
-    int sum;                                                                 \
-    int64_t var;                                                             \
-    highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
-                              sse, &sum);                                    \
-    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                \
-    return (var >= 0) ? (uint32_t)var : 0;                                   \
-  }                                                                          \
-                                                                             \
-  unsigned int aom_highbd_12_masked_variance##W##x##H##_c(                   \
-      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
-      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
-    int sum;                                                                 \
-    int64_t var;                                                             \
-    highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
-                              sse, &sum);                                    \
-    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                \
-    return (var >= 0) ? (uint32_t)var : 0;                                   \
+void highbd_masked_compound_variance64(const uint8_t *src8, int src_stride,
+                                       const uint8_t *a8, int a_stride,
+                                       const uint8_t *b8, int b_stride,
+                                       const uint8_t *m, int m_stride, int w,
+                                       int h, uint64_t *sse, int64_t *sum) {
+  int i, j;
+  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; i++) {
+    for (j = 0; j < w; j++) {
+      const uint16_t pred = AOM_BLEND_A64(m[j], a[j], b[j]);
+      const int diff = pred - src[j];
+      *sum += (int64_t)diff;
+      *sse += (int64_t)diff * diff;
+    }
+
+    src += src_stride;
+    a += a_stride;
+    b += b_stride;
+    m += m_stride;
+  }
+  *sum = (*sum >= 0) ? *sum : -*sum;
+  *sum = ROUND_POWER_OF_TWO(*sum, 6);
+  *sse = ROUND_POWER_OF_TWO(*sse, 12);
+}
+
+void highbd_masked_compound_variance(const uint8_t *src8, int src_stride,
+                                     const uint8_t *a8, int a_stride,
+                                     const uint8_t *b8, int b_stride,
+                                     const uint8_t *m, int m_stride, int w,
+                                     int h, unsigned int *sse, int *sum) {
+  int64_t sum64;
+  uint64_t sse64;
+  highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+                                    b_stride, m, m_stride, w, h, &sse64,
+                                    &sum64);
+  *sum = (int)sum64;
+  *sse = (unsigned int)sse64;
+}
+
+void highbd_10_masked_compound_variance(const uint8_t *src8, int src_stride,
+                                        const uint8_t *a8, int a_stride,
+                                        const uint8_t *b8, int b_stride,
+                                        const uint8_t *m, int m_stride, int w,
+                                        int h, unsigned int *sse, int *sum) {
+  int64_t sum64;
+  uint64_t sse64;
+  highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+                                    b_stride, m, m_stride, w, h, &sse64,
+                                    &sum64);
+  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
+}
+
+void highbd_12_masked_compound_variance(const uint8_t *src8, int src_stride,
+                                        const uint8_t *a8, int a_stride,
+                                        const uint8_t *b8, int b_stride,
+                                        const uint8_t *m, int m_stride, int w,
+                                        int h, unsigned int *sse, int *sum) {
+  int64_t sum64;
+  uint64_t sse64;
+  highbd_masked_compound_variance64(src8, src_stride, a8, a_stride, b8,
+                                    b_stride, m, m_stride, w, h, &sse64,
+                                    &sum64);
+  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
+  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
+}
+
+#define HIGHBD_MASK_VAR(W, H)                                                  \
+  unsigned int aom_highbd_masked_variance##W##x##H##_c(                        \
+      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,          \
+      const uint8_t *m, int m_stride, unsigned int *sse) {                     \
+    int sum;                                                                   \
+    highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse,   \
+                           &sum);                                              \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));              \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_10_masked_variance##W##x##H##_c(                     \
+      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,          \
+      const uint8_t *m, int m_stride, unsigned int *sse) {                     \
+    int sum;                                                                   \
+    int64_t var;                                                               \
+    highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,     \
+                              sse, &sum);                                      \
+    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
+    return (var >= 0) ? (uint32_t)var : 0;                                     \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_12_masked_variance##W##x##H##_c(                     \
+      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,          \
+      const uint8_t *m, int m_stride, unsigned int *sse) {                     \
+    int sum;                                                                   \
+    int64_t var;                                                               \
+    highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,     \
+                              sse, &sum);                                      \
+    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
+    return (var >= 0) ? (uint32_t)var : 0;                                     \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_masked_compound_variance##W##x##H##_c(               \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,  \
+      const uint8_t *second_pred, const uint8_t *m, int m_stride,              \
+      int invert_mask, unsigned int *sse) {                                    \
+    int sum;                                                                   \
+    if (!invert_mask)                                                          \
+      highbd_masked_compound_variance(src, src_stride, ref, ref_stride,        \
+                                      second_pred, W, m, m_stride, W, H, sse,  \
+                                      &sum);                                   \
+    else                                                                       \
+      highbd_masked_compound_variance(src, src_stride, second_pred, W, ref,    \
+                                      ref_stride, m, m_stride, W, H, sse,      \
+                                      &sum);                                   \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));              \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_10_masked_compound_variance##W##x##H##_c(            \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,  \
+      const uint8_t *second_pred, const uint8_t *m, int m_stride,              \
+      int invert_mask, unsigned int *sse) {                                    \
+    int sum;                                                                   \
+    if (!invert_mask)                                                          \
+      highbd_10_masked_compound_variance(src, src_stride, ref, ref_stride,     \
+                                         second_pred, W, m, m_stride, W, H,    \
+                                         sse, &sum);                           \
+    else                                                                       \
+      highbd_10_masked_compound_variance(src, src_stride, second_pred, W, ref, \
+                                         ref_stride, m, m_stride, W, H, sse,   \
+                                         &sum);                                \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));              \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_12_masked_compound_variance##W##x##H##_c(            \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,  \
+      const uint8_t *second_pred, const uint8_t *m, int m_stride,              \
+      int invert_mask, unsigned int *sse) {                                    \
+    int sum;                                                                   \
+    if (!invert_mask)                                                          \
+      highbd_12_masked_compound_variance(src, src_stride, ref, ref_stride,     \
+                                         second_pred, W, m, m_stride, W, H,    \
+                                         sse, &sum);                           \
+    else                                                                       \
+      highbd_12_masked_compound_variance(src, src_stride, second_pred, W, ref, \
+                                         ref_stride, m, m_stride, W, H, sse,   \
+                                         &sum);                                \
+    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));              \
   }
 
-#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                          \
-  unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c(             \
-      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
-      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
-      unsigned int *sse) {                                                    \
-    uint16_t fdata3[(H + 1) * W];                                             \
-    uint16_t temp2[H * W];                                                    \
-                                                                              \
-    aom_highbd_var_filter_block2d_bil_first_pass(                             \
-        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
-    aom_highbd_var_filter_block2d_bil_second_pass(                            \
-        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
-                                                                              \
-    return aom_highbd_masked_variance##W##x##H##_c(                           \
-        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
-  }                                                                           \
-                                                                              \
-  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(          \
-      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
-      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
-      unsigned int *sse) {                                                    \
-    uint16_t fdata3[(H + 1) * W];                                             \
-    uint16_t temp2[H * W];                                                    \
-                                                                              \
-    aom_highbd_var_filter_block2d_bil_first_pass(                             \
-        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
-    aom_highbd_var_filter_block2d_bil_second_pass(                            \
-        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
-                                                                              \
-    return aom_highbd_10_masked_variance##W##x##H##_c(                        \
-        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
-  }                                                                           \
-                                                                              \
-  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(          \
-      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
-      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
-      unsigned int *sse) {                                                    \
-    uint16_t fdata3[(H + 1) * W];                                             \
-    uint16_t temp2[H * W];                                                    \
-                                                                              \
-    aom_highbd_var_filter_block2d_bil_first_pass(                             \
-        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
-    aom_highbd_var_filter_block2d_bil_second_pass(                            \
-        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
-                                                                              \
-    return aom_highbd_12_masked_variance##W##x##H##_c(                        \
-        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
+#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                           \
+  unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c(              \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride,  \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    return aom_highbd_masked_variance##W##x##H##_c(                            \
+        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse);  \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(           \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride,  \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    return aom_highbd_10_masked_variance##W##x##H##_c(                         \
+        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse);  \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(           \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride,  \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    return aom_highbd_12_masked_variance##W##x##H##_c(                         \
+        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse);  \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_masked_compound_sub_pixel_variance##W##x##H##_c(     \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
+      const uint8_t *msk, int msk_stride, int invert_mask,                     \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
+                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+                                invert_mask);                                  \
+                                                                               \
+    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,    \
+                                              ref, ref_stride, sse);           \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_10_masked_compound_sub_pixel_variance##W##x##H##_c(  \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
+      const uint8_t *msk, int msk_stride, int invert_mask,                     \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
+                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+                                invert_mask);                                  \
+                                                                               \
+    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
+                                               ref, ref_stride, sse);          \
+  }                                                                            \
+                                                                               \
+  unsigned int aom_highbd_12_masked_compound_sub_pixel_variance##W##x##H##_c(  \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
+      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
+      const uint8_t *msk, int msk_stride, int invert_mask,                     \
+      unsigned int *sse) {                                                     \
+    uint16_t fdata3[(H + 1) * W];                                              \
+    uint16_t temp2[H * W];                                                     \
+    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
+                                                                               \
+    aom_highbd_var_filter_block2d_bil_first_pass(                              \
+        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
+    aom_highbd_var_filter_block2d_bil_second_pass(                             \
+        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
+                                                                               \
+    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
+                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
+                                invert_mask);                                  \
+                                                                               \
+    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
+                                               ref, ref_stride, sse);          \
   }
 
 HIGHBD_MASK_VAR(4, 4)