Merge "Add single motion search for OBMC predictor" into nextgenv2
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 86959b0..1e8679b 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -487,6 +487,7 @@
#endif // CONFIG_EXT_INTERP
#if CONFIG_OBMC
+void setup_obmc_mask(int length, const uint8_t *mask[2]);
void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
MACROBLOCKD *xd, int mi_row, int mi_col,
int use_tmp_dst_buf,
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index fc55133..43b5401 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1219,6 +1219,49 @@
MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
+ cpi->fn_ptr[BT].osdf = OSDF; \
+ cpi->fn_ptr[BT].ovf = OVF; \
+ cpi->fn_ptr[BT].osvf = OSVF;
+
+#define MAKE_OBFP_SAD_WRAPPER(fnname) \
+static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride) { \
+ return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \
+} \
+static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride) { \
+ return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
+} \
+static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride) { \
+ return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
+}
+
+#if CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x128)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x128)
+#endif // CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x4)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x4)
+#endif // CONFIG_OBMC
+
static void highbd_set_var_fns(VP10_COMP *const cpi) {
VP10_COMMON *const cm = &cpi->common;
if (cm->use_highbitdepth) {
@@ -1454,6 +1497,74 @@
vpx_highbd_masked_variance4x4,
vpx_highbd_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128,
+ vpx_highbd_obmc_sad128x128_bits8,
+ vpx_highbd_obmc_variance128x128,
+ vpx_highbd_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64,
+ vpx_highbd_obmc_sad128x64_bits8,
+ vpx_highbd_obmc_variance128x64,
+ vpx_highbd_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128,
+ vpx_highbd_obmc_sad64x128_bits8,
+ vpx_highbd_obmc_variance64x128,
+ vpx_highbd_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64,
+ vpx_highbd_obmc_sad64x64_bits8,
+ vpx_highbd_obmc_variance64x64,
+ vpx_highbd_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32,
+ vpx_highbd_obmc_sad64x32_bits8,
+ vpx_highbd_obmc_variance64x32,
+ vpx_highbd_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64,
+ vpx_highbd_obmc_sad32x64_bits8,
+ vpx_highbd_obmc_variance32x64,
+ vpx_highbd_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32,
+ vpx_highbd_obmc_sad32x32_bits8,
+ vpx_highbd_obmc_variance32x32,
+ vpx_highbd_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16,
+ vpx_highbd_obmc_sad32x16_bits8,
+ vpx_highbd_obmc_variance32x16,
+ vpx_highbd_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32,
+ vpx_highbd_obmc_sad16x32_bits8,
+ vpx_highbd_obmc_variance16x32,
+ vpx_highbd_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16,
+ vpx_highbd_obmc_sad16x16_bits8,
+ vpx_highbd_obmc_variance16x16,
+ vpx_highbd_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16,
+ vpx_highbd_obmc_sad8x16_bits8,
+ vpx_highbd_obmc_variance8x16,
+ vpx_highbd_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8,
+ vpx_highbd_obmc_sad16x8_bits8,
+ vpx_highbd_obmc_variance16x8,
+ vpx_highbd_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8,
+ vpx_highbd_obmc_sad8x8_bits8,
+ vpx_highbd_obmc_variance8x8,
+ vpx_highbd_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8,
+ vpx_highbd_obmc_sad4x8_bits8,
+ vpx_highbd_obmc_variance4x8,
+ vpx_highbd_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4,
+ vpx_highbd_obmc_sad8x4_bits8,
+ vpx_highbd_obmc_variance8x4,
+ vpx_highbd_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4,
+ vpx_highbd_obmc_sad4x4_bits8,
+ vpx_highbd_obmc_variance4x4,
+ vpx_highbd_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
break;
case VPX_BITS_10:
@@ -1687,6 +1798,74 @@
vpx_highbd_10_masked_variance4x4,
vpx_highbd_10_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128,
+ vpx_highbd_obmc_sad128x128_bits10,
+ vpx_highbd_10_obmc_variance128x128,
+ vpx_highbd_10_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64,
+ vpx_highbd_obmc_sad128x64_bits10,
+ vpx_highbd_10_obmc_variance128x64,
+ vpx_highbd_10_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128,
+ vpx_highbd_obmc_sad64x128_bits10,
+ vpx_highbd_10_obmc_variance64x128,
+ vpx_highbd_10_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64,
+ vpx_highbd_obmc_sad64x64_bits10,
+ vpx_highbd_10_obmc_variance64x64,
+ vpx_highbd_10_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32,
+ vpx_highbd_obmc_sad64x32_bits10,
+ vpx_highbd_10_obmc_variance64x32,
+ vpx_highbd_10_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64,
+ vpx_highbd_obmc_sad32x64_bits10,
+ vpx_highbd_10_obmc_variance32x64,
+ vpx_highbd_10_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32,
+ vpx_highbd_obmc_sad32x32_bits10,
+ vpx_highbd_10_obmc_variance32x32,
+ vpx_highbd_10_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16,
+ vpx_highbd_obmc_sad32x16_bits10,
+ vpx_highbd_10_obmc_variance32x16,
+ vpx_highbd_10_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32,
+ vpx_highbd_obmc_sad16x32_bits10,
+ vpx_highbd_10_obmc_variance16x32,
+ vpx_highbd_10_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16,
+ vpx_highbd_obmc_sad16x16_bits10,
+ vpx_highbd_10_obmc_variance16x16,
+ vpx_highbd_10_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16,
+ vpx_highbd_obmc_sad8x16_bits10,
+ vpx_highbd_10_obmc_variance8x16,
+ vpx_highbd_10_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8,
+ vpx_highbd_obmc_sad16x8_bits10,
+ vpx_highbd_10_obmc_variance16x8,
+ vpx_highbd_10_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8,
+ vpx_highbd_obmc_sad8x8_bits10,
+ vpx_highbd_10_obmc_variance8x8,
+ vpx_highbd_10_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8,
+ vpx_highbd_obmc_sad4x8_bits10,
+ vpx_highbd_10_obmc_variance4x8,
+ vpx_highbd_10_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4,
+ vpx_highbd_obmc_sad8x4_bits10,
+ vpx_highbd_10_obmc_variance8x4,
+ vpx_highbd_10_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4,
+ vpx_highbd_obmc_sad4x4_bits10,
+ vpx_highbd_10_obmc_variance4x4,
+ vpx_highbd_10_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
break;
case VPX_BITS_12:
@@ -1920,6 +2099,75 @@
vpx_highbd_12_masked_variance4x4,
vpx_highbd_12_masked_sub_pixel_variance4x4)
#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128,
+ vpx_highbd_obmc_sad128x128_bits12,
+ vpx_highbd_12_obmc_variance128x128,
+ vpx_highbd_12_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64,
+ vpx_highbd_obmc_sad128x64_bits12,
+ vpx_highbd_12_obmc_variance128x64,
+ vpx_highbd_12_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128,
+ vpx_highbd_obmc_sad64x128_bits12,
+ vpx_highbd_12_obmc_variance64x128,
+ vpx_highbd_12_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64,
+ vpx_highbd_obmc_sad64x64_bits12,
+ vpx_highbd_12_obmc_variance64x64,
+ vpx_highbd_12_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32,
+ vpx_highbd_obmc_sad64x32_bits12,
+ vpx_highbd_12_obmc_variance64x32,
+ vpx_highbd_12_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64,
+ vpx_highbd_obmc_sad32x64_bits12,
+ vpx_highbd_12_obmc_variance32x64,
+ vpx_highbd_12_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32,
+ vpx_highbd_obmc_sad32x32_bits12,
+ vpx_highbd_12_obmc_variance32x32,
+ vpx_highbd_12_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16,
+ vpx_highbd_obmc_sad32x16_bits12,
+ vpx_highbd_12_obmc_variance32x16,
+ vpx_highbd_12_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32,
+ vpx_highbd_obmc_sad16x32_bits12,
+ vpx_highbd_12_obmc_variance16x32,
+ vpx_highbd_12_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16,
+ vpx_highbd_obmc_sad16x16_bits12,
+ vpx_highbd_12_obmc_variance16x16,
+ vpx_highbd_12_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16,
+ vpx_highbd_obmc_sad8x16_bits12,
+ vpx_highbd_12_obmc_variance8x16,
+ vpx_highbd_12_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8,
+ vpx_highbd_obmc_sad16x8_bits12,
+ vpx_highbd_12_obmc_variance16x8,
+ vpx_highbd_12_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8,
+ vpx_highbd_obmc_sad8x8_bits12,
+ vpx_highbd_12_obmc_variance8x8,
+ vpx_highbd_12_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8,
+ vpx_highbd_obmc_sad4x8_bits12,
+ vpx_highbd_12_obmc_variance4x8,
+ vpx_highbd_12_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4,
+ vpx_highbd_obmc_sad8x4_bits12,
+ vpx_highbd_12_obmc_variance8x4,
+ vpx_highbd_12_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4,
+ vpx_highbd_obmc_sad4x4_bits12,
+ vpx_highbd_12_obmc_variance4x4,
+ vpx_highbd_12_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
break;
default:
@@ -2414,6 +2662,48 @@
vpx_sub_pixel_avg_variance4x4,
vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
+#if CONFIG_OBMC
+#define OBFP(BT, OSDF, OVF, OSVF) \
+ cpi->fn_ptr[BT].osdf = OSDF; \
+ cpi->fn_ptr[BT].ovf = OVF; \
+ cpi->fn_ptr[BT].osvf = OSVF;
+
+#if CONFIG_EXT_PARTITION
+ OBFP(BLOCK_128X128, vpx_obmc_sad128x128, vpx_obmc_variance128x128,
+ vpx_obmc_sub_pixel_variance128x128)
+ OBFP(BLOCK_128X64, vpx_obmc_sad128x64, vpx_obmc_variance128x64,
+ vpx_obmc_sub_pixel_variance128x64)
+ OBFP(BLOCK_64X128, vpx_obmc_sad64x128, vpx_obmc_variance64x128,
+ vpx_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ OBFP(BLOCK_64X64, vpx_obmc_sad64x64, vpx_obmc_variance64x64,
+ vpx_obmc_sub_pixel_variance64x64)
+ OBFP(BLOCK_64X32, vpx_obmc_sad64x32, vpx_obmc_variance64x32,
+ vpx_obmc_sub_pixel_variance64x32)
+ OBFP(BLOCK_32X64, vpx_obmc_sad32x64, vpx_obmc_variance32x64,
+ vpx_obmc_sub_pixel_variance32x64)
+ OBFP(BLOCK_32X32, vpx_obmc_sad32x32, vpx_obmc_variance32x32,
+ vpx_obmc_sub_pixel_variance32x32)
+ OBFP(BLOCK_32X16, vpx_obmc_sad32x16, vpx_obmc_variance32x16,
+ vpx_obmc_sub_pixel_variance32x16)
+ OBFP(BLOCK_16X32, vpx_obmc_sad16x32, vpx_obmc_variance16x32,
+ vpx_obmc_sub_pixel_variance16x32)
+ OBFP(BLOCK_16X16, vpx_obmc_sad16x16, vpx_obmc_variance16x16,
+ vpx_obmc_sub_pixel_variance16x16)
+ OBFP(BLOCK_16X8, vpx_obmc_sad16x8, vpx_obmc_variance16x8,
+ vpx_obmc_sub_pixel_variance16x8)
+ OBFP(BLOCK_8X16, vpx_obmc_sad8x16, vpx_obmc_variance8x16,
+ vpx_obmc_sub_pixel_variance8x16)
+ OBFP(BLOCK_8X8, vpx_obmc_sad8x8, vpx_obmc_variance8x8,
+ vpx_obmc_sub_pixel_variance8x8)
+ OBFP(BLOCK_4X8, vpx_obmc_sad4x8, vpx_obmc_variance4x8,
+ vpx_obmc_sub_pixel_variance4x8)
+ OBFP(BLOCK_8X4, vpx_obmc_sad8x4, vpx_obmc_variance8x4,
+ vpx_obmc_sub_pixel_variance8x4)
+ OBFP(BLOCK_4X4, vpx_obmc_sad4x4, vpx_obmc_variance4x4,
+ vpx_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
+
#if CONFIG_EXT_INTER
#define MBFP(BT, MSDF, MVF, MSVF) \
cpi->fn_ptr[BT].msdf = MSDF; \
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index dd0c311..14a11c3 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -3253,3 +3253,544 @@
return bestsme;
}
#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+ vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
+ src_stride, mask, mask_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define MVC(r, c) \
+ (mvcost ? \
+ ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
+ mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
+ error_per_bit + 4096) >> 13 : 0)
+
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = (DIST(r, c)); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#undef CHECK_BETTER0
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+#undef CHECK_BETTER1
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = upsampled_obmc_pref_error(xd, \
+ mask, mask_stride, \
+ vfp, z, src_stride, \
+ upre(y, y_stride, r, c), \
+ y_stride, \
+ w, h, &sse); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+static unsigned int setup_obmc_center_error(const int *mask,
+ int mask_stride,
+ const MV *bestmv,
+ const MV *ref_mv,
+ int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ const int *const wsrc,
+ const int wsrc_stride,
+ const uint8_t *const y,
+ int y_stride,
+ int offset,
+ int *mvjcost, int *mvcost[2],
+ unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr;
+ besterr = vfp->ovf(y + offset, y_stride, wsrc, wsrc_stride,
+ mask, mask_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
+ const int *mask, int mask_stride,
+ const vp10_variance_fn_ptr_t *vfp,
+ const int *const wsrc,
+ const int wsrc_stride,
+ const uint8_t *const y, int y_stride,
+ int w, int h, unsigned int *sse) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+ besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, wsrc_stride,
+ mask, mask_stride, sse);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+ (void) xd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+ besterr = vfp->ovf(pred, w, wsrc, wsrc_stride, mask, mask_stride, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ return besterr;
+}
+
+static unsigned int upsampled_setup_obmc_center_error(
+ const MACROBLOCKD *xd,
+ const int *mask, int mask_stride,
+ const MV *bestmv, const MV *ref_mv,
+ int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
+ const int *const wsrc, const int wsrc_stride,
+ const uint8_t *const y, int y_stride,
+ int w, int h, int offset, int *mvjcost, int *mvcost[2],
+ unsigned int *sse1, int *distortion) {
+ unsigned int besterr = upsampled_obmc_pref_error(xd, mask, mask_stride, vfp,
+ wsrc, wsrc_stride,
+ y + offset, y_stride,
+ w, h, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ int mi_row, int mi_col,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp, int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ int forced_stop, int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1,
+ int is_second,
+ int use_upsampled_ref) {
+ const int *const z = wsrc;
+ const int *const src_address = z;
+ const int src_stride = wsrc_stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int thismse;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter;
+ int round = 3 - forced_stop;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+ int kr, kc;
+ const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int offset;
+ int y_stride;
+ const uint8_t *y;
+
+ const struct buf_2d backup_pred = pd->pre[is_second];
+ if (use_upsampled_ref) {
+ int ref = xd->mi[0]->mbmi.ref_frame[is_second];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+ setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
+ upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+ NULL, pd->subsampling_x, pd->subsampling_y);
+ }
+ y = pd->pre[is_second].buf;
+ y_stride = pd->pre[is_second].stride;
+ offset = bestmv->row * y_stride + bestmv->col;
+
+ if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
+ if (round == 3)
+ round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+ // use_upsampled_ref can be 0 or 1
+ if (use_upsampled_ref)
+ besterr = upsampled_setup_obmc_center_error(
+ xd, mask, mask_stride, bestmv, ref_mv, error_per_bit,
+ vfp, z, src_stride, y, y_stride,
+ w, h, (offset << 3),
+ mvjcost, mvcost, sse1, distortion);
+ else
+ besterr = setup_obmc_center_error(
+ mask, mask_stride, bestmv, ref_mv, error_per_bit,
+ vfp, z, src_stride, y, y_stride,
+ offset, mvjcost, mvcost, sse1, distortion);
+
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = {tr, tc};
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
+ vfp, src_address, src_stride,
+ pre_address, y_stride,
+ w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
+ (tc >> 3);
+ thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride,
+ mask, mask_stride, &sse);
+ }
+
+ cost_array[idx] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+ }
+
+ // Check diagonal sub-pixel position
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = {tr, tc};
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
+ vfp, src_address, src_stride,
+ pre_address, y_stride,
+ w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+ thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, mask, mask_stride, &sse);
+ }
+
+ cost_array[4] = thismse +
+ mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1 && best_idx != -1) {
+ if (use_upsampled_ref) {
+ SECOND_LEVEL_CHECKS_BEST(1);
+ } else {
+ SECOND_LEVEL_CHECKS_BEST(0);
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
+ }
+
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void) tr;
+ (void) tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if (use_upsampled_ref) {
+ pd->pre[is_second] = backup_pred;
+ }
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+#undef DIST
+#undef MVC
+#undef CHECK_BETTER
+
+static int get_obmc_mvpred_var(const MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ const MV *best_mv, const MV *center_mv,
+ const vp10_variance_fn_ptr_t *vfp,
+ int use_mvcost, int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV mv = {best_mv->row * 8, best_mv->col * 8};
+ unsigned int unused;
+
+ return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride,
+ wsrc, wsrc_stride, mask, mask_stride, &unused) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
+ x->mvcost, x->errorperbit) : 0);
+}
+
+int obmc_refining_search_sad(const MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ MV *ref_mv, int error_per_bit,
+ int search_range,
+ const vp10_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
+ in_what->stride,
+ wsrc, wsrc_stride, mask, mask_stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
+ int i, j;
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+
+ for (j = 0; j < 4; j++) {
+ const MV mv = {ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col};
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
+ in_what->stride, wsrc, wsrc_stride,
+ mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ ref_mv->row += neighbors[best_site].row;
+ ref_mv->col += neighbors[best_site].col;
+ }
+ }
+ return best_sad;
+}
+
+int obmc_diamond_search_sad(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ MV *ref_mv, MV *best_mv,
+ int search_param,
+ int sad_per_bit, int *num00,
+ const vp10_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ // search_param determines the length of the initial step and hence the number
+ // of iterations
+ // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
+ // (MAX_FIRST_STEP/4) pel... etc.
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+ const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+ const uint8_t *best_address, *in_what_ref;
+ int best_sad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+ int i, j, step;
+
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
+ best_address = in_what_ref;
+ *num00 = 0;
+ *best_mv = *ref_mv;
+
+ // Check the starting position
+ best_sad = fn_ptr->osdf(best_address, in_what->stride,
+ wsrc, wsrc_stride, mask, mask_stride) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ const MV mv = {best_mv->row + ss[i].mv.row,
+ best_mv->col + ss[i].mv.col};
+ if (is_mv_in(x, &mv)) {
+ int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
+ wsrc, wsrc_stride, mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = i;
+ }
+ }
+ }
+
+ i++;
+ }
+
+ if (best_site != last_site) {
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ const MV this_mv = {best_mv->row + ss[best_site].mv.row,
+ best_mv->col + ss[best_site].mv.col};
+ if (is_mv_in(x, &this_mv)) {
+ int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
+ in_what->stride, wsrc, wsrc_stride,
+ mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ } else if (best_address == in_what_ref) {
+ (*num00)++;
+ }
+ }
+ return best_sad;
+}
+
+int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ MV *mvp_full, int step_param,
+ int sadpb, int further_steps, int do_refine,
+ const vp10_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv,
+ int is_second) {
+ MV temp_mv;
+ int thissme, n, num00 = 0;
+ int bestsme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
+ wsrc, wsrc_stride,
+ mask, mask_stride,
+ mvp_full, &temp_mv,
+ step_param, sadpb, &n,
+ fn_ptr, ref_mv, is_second);
+ if (bestsme < INT_MAX)
+ bestsme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+ &temp_mv, ref_mv, fn_ptr, 1, is_second);
+ *dst_mv = temp_mv;
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps)
+ do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+
+ if (num00) {
+ num00--;
+ } else {
+ thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
+ wsrc, wsrc_stride,
+ mask, mask_stride,
+ mvp_full, &temp_mv,
+ step_param + n, sadpb, &num00,
+ fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+ &temp_mv, ref_mv, fn_ptr, 1, is_second);
+
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n)
+ do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = temp_mv;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = *dst_mv;
+ thissme = obmc_refining_search_sad(x, wsrc, wsrc_stride, mask, mask_stride,
+ &best_mv, sadpb, search_range,
+ fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
+ &best_mv, ref_mv, fn_ptr, 1, is_second);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = best_mv;
+ }
+ }
+ return bestsme;
+}
+#endif // CONFIG_OBMC
diff --git a/vp10/encoder/mcomp.h b/vp10/encoder/mcomp.h
index c12e7af..1b4e613 100644
--- a/vp10/encoder/mcomp.h
+++ b/vp10/encoder/mcomp.h
@@ -195,6 +195,29 @@
const MV *ref_mv, MV *dst_mv,
int is_second);
#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ MV *mvp_full, int step_param,
+ int sadpb, int further_steps, int do_refine,
+ const vp10_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv,
+ int is_second);
+int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
+ const int *wsrc, int wsrc_stride,
+ const int *mask, int mask_stride,
+ int mi_row, int mi_col,
+ MV *bestmv, const MV *ref_mv,
+ int allow_hp, int error_per_bit,
+ const vp10_variance_fn_ptr_t *vfp,
+ int forced_stop, int iters_per_step,
+ int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1,
+ int is_second,
+ int use_upsampled_ref);
+#endif // CONFIG_OBMC
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 32c64cb..6d7c1a8 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -6036,6 +6036,149 @@
}
}
+#if CONFIG_OBMC
+static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ const int* wsrc, int wsrc_stride,
+ const int* mask, int mask_stride,
+#if CONFIG_EXT_INTER
+ int ref_idx,
+ int mv_idx,
+#endif // CONFIG_EXT_INTER
+ int_mv *tmp_mv, int_mv pred_mv,
+ int *rate_mv) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const VP10_COMMON *cm = &cpi->common;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
+ int bestsme = INT_MAX;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ MV mvp_full;
+#if CONFIG_EXT_INTER
+ int ref = mbmi->ref_frame[ref_idx];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+#else
+ int ref = mbmi->ref_frame[0];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+ int ref_idx = 0;
+#endif // CONFIG_EXT_INTER
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ const YV12_BUFFER_CONFIG *scaled_ref_frame = vp10_get_scaled_ref_frame(cpi,
+ ref);
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, ref);
+#endif
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[ref_idx];
+
+ vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+ vp10_set_mv_search_range(x, &ref_mv);
+
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
+ if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param = (vp10_init_search_range(x->max_mv_context[ref]) +
+ cpi->mv_step_param) / 2;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
+
+ if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
+ int boffset = 2 * (b_width_log2_lookup[cm->sb_size] -
+ VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+ step_param = VPXMAX(step_param, boffset);
+ }
+
+ if (cpi->sf.adaptive_motion_search) {
+ int bwl = b_width_log2_lookup[bsize];
+ int bhl = b_height_log2_lookup[bsize];
+ int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+
+ if (tlevel < 5)
+ step_param += 2;
+
+ // prev_mv_sad is not setup for dynamically scaled frames.
+ if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+ int i;
+ for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
+ if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
+ x->pred_mv[ref].row = 0;
+ x->pred_mv[ref].col = 0;
+ tmp_mv->as_int = INVALID_MV;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+ return;
+ }
+ }
+ }
+ }
+
+ mvp_full = pred_mv.as_mv;
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+
+ bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
+ mask, mask_stride,
+ &mvp_full, step_param, sadpb,
+ MAX_MVSEARCH_STEPS - 1 - step_param,
+ 1, &cpi->fn_ptr[bsize],
+ &ref_mv, &tmp_mv->as_mv, ref_idx);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis;
+ vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
+ wsrc, wsrc_stride,
+ mask, mask_stride,
+ mi_row, mi_col,
+ &tmp_mv->as_mv, &ref_mv,
+ cm->allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost,
+ &dis, &x->pred_sse[ref],
+ ref_idx,
+ cpi->sf.use_upsampled_references);
+ }
+ *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+}
+#endif // CONFIG_OBMC
+
#if CONFIG_EXT_INTER
static void do_masked_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
const uint8_t *mask, int mask_stride,
@@ -6370,10 +6513,10 @@
int_mv (*mode_mv)[MAX_REF_FRAMES],
int mi_row, int mi_col,
#if CONFIG_OBMC
- uint8_t *dst_buf1[3],
- int dst_stride1[3],
- uint8_t *dst_buf2[3],
- int dst_stride2[3],
+ uint8_t *dst_buf1[3], int dst_stride1[3],
+ uint8_t *dst_buf2[3], int dst_stride2[3],
+ int *wsrc, int wsrc_strides,
+ int *mask2d, int mask2d_strides,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
@@ -6435,6 +6578,7 @@
MB_MODE_INFO best_mbmi;
#if CONFIG_EXT_INTER
int rate2_bmc_nocoeff;
+ int rate_mv_bmc;
MB_MODE_INFO best_bmc_mbmi;
#endif // CONFIG_EXT_INTER
#endif // CONFIG_OBMC
@@ -6873,6 +7017,7 @@
#if CONFIG_EXT_INTER
#if CONFIG_OBMC
best_bmc_mbmi = *mbmi;
+ rate_mv_bmc = rate_mv;
rate2_bmc_nocoeff = *rate2;
if (cm->interp_filter == SWITCHABLE)
rate2_bmc_nocoeff += rs;
@@ -7386,14 +7531,45 @@
for (mbmi->obmc = 0; mbmi->obmc <= allow_obmc; mbmi->obmc++) {
int64_t tmp_rd, tmp_dist;
int tmp_rate;
+#if CONFIG_EXT_INTER
+ int tmp_rate2 = mbmi->obmc ? rate2_bmc_nocoeff : rate2_nocoeff;
+#else
+ int tmp_rate2 = rate2_nocoeff;
+#endif // CONFIG_EXT_INTER
if (mbmi->obmc) {
#if CONFIG_EXT_INTER
*mbmi = best_bmc_mbmi;
- assert(!mbmi->use_wedge_interinter);
- vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
mbmi->obmc = 1;
#endif // CONFIG_EXT_INTER
+ if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
+ int_mv tmp_mv;
+ int_mv pred_mv;
+ int tmp_rate_mv = 0;
+
+ pred_mv.as_int = mbmi->mv[0].as_int;
+ single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
+ wsrc, wsrc_strides,
+ mask2d, mask2d_strides,
+#if CONFIG_EXT_INTER
+ 0, mv_idx,
+#endif // CONFIG_EXT_INTER
+ &tmp_mv, pred_mv, &tmp_rate_mv);
+ mbmi->mv[0].as_int = tmp_mv.as_int;
+ if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
+ tmp_rate_mv = VPXMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
+ }
+#if CONFIG_EXT_INTER
+ tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
+#else
+ tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
+#endif // CONFIG_EXT_INTER
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#if CONFIG_EXT_INTER
+ } else {
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#endif // CONFIG_EXT_INTER
+ }
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
NULL, NULL,
dst_buf1, dst_stride1,
@@ -7415,11 +7591,7 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
x->skip = 0;
-#if CONFIG_EXT_INTER
- *rate2 = mbmi->obmc ? rate2_bmc_nocoeff : rate2_nocoeff;
-#else
- *rate2 = rate2_nocoeff;
-#endif // CONFIG_EXT_INTER
+ *rate2 = tmp_rate2;
if (allow_obmc)
*rate2 += cpi->obmc_cost[bsize][mbmi->obmc];
*distortion = 0;
@@ -7927,9 +8099,13 @@
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
+ int weighted_src_stride = MAX_SB_SIZE;
+ int mask2d_stride = MAX_SB_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -8031,6 +8207,11 @@
vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
+ dst_buf1[0], dst_stride1[0],
+ dst_buf2[0], dst_stride2[0],
+ mask2d_buf, mask2d_stride,
+ weighted_src_buf, weighted_src_stride);
#endif // CONFIG_OBMC
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -8577,6 +8758,8 @@
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
+ weighted_src_buf, weighted_src_stride,
+ mask2d_buf, mask2d_stride,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
@@ -8688,6 +8871,9 @@
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
+ weighted_src_buf,
+ weighted_src_stride,
+ mask2d_buf, mask2d_stride,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
@@ -10245,3 +10431,194 @@
store_coding_context(x, ctx, best_ref_index,
best_pred_diff, 0);
}
+
+#if CONFIG_OBMC
+void calc_target_weighted_pred(VP10_COMMON *cm,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *above_buf, int above_stride,
+ uint8_t *left_buf, int left_stride,
+ int *mask_buf, int mask_stride,
+ int *weighted_src_buf, int weighted_src_stride) {
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int row, col, i, mi_step;
+ int bw = 8 * xd->n8_w;
+ int bh = 8 * xd->n8_h;
+ int *dst = weighted_src_buf;
+ int *mask2d = mask_buf;
+ uint8_t *src;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = 0;
+ mask2d[col] = 64;
+ }
+ dst += weighted_src_stride;
+ mask2d += mask_stride;
+ }
+
+ // handle above row
+#if CONFIG_EXT_TILE
+ if (mi_row > 0 && (mi_row - 1 >= tile->mi_row_start)) {
+#else
+ if (mi_row > 0) {
+#endif // CONFIG_EXT_TILE
+ for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ MODE_INFO *above_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+ int overlap = num_4x4_blocks_high_lookup[bsize] << 1;
+
+ mi_step = VPXMIN(xd->n8_w,
+ num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (is_neighbor_overlappable(above_mbmi)) {
+ const struct macroblockd_plane *pd = &xd->plane[0];
+ int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
+ int bh = overlap >> pd->subsampling_y;
+ int dst_stride = weighted_src_stride;
+ int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
+ int tmp_stride = above_stride;
+ uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
+ int mask2d_stride = mask_stride;
+ int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
+ const uint8_t *mask1d[2];
+
+ setup_obmc_mask(bh, mask1d);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = mask1d[1][row] * tmp16[col];
+ mask2d[col] = mask1d[0][row];
+ }
+ dst += dst_stride;
+ tmp16 += tmp_stride;
+ mask2d += mask2d_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = mask1d[1][row] * tmp[col];
+ mask2d[col] = mask1d[0][row];
+ }
+ dst += dst_stride;
+ tmp += tmp_stride;
+ mask2d += mask2d_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ } // each mi in the above row
+ }
+
+ // handle left column
+ dst = weighted_src_buf;
+ mask2d = mask_buf;
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = dst[col] << 6;
+ mask2d[col] = mask2d[col] << 6;
+ }
+ dst += weighted_src_stride;
+ mask2d += mask_stride;
+ }
+
+ if (mi_col > 0 && (mi_col - 1 >= tile->mi_col_start)) {
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
+ MODE_INFO *left_mi = xd->mi[mi_col_offset +
+ mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+
+ mi_step = VPXMIN(xd->n8_h,
+ num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (is_neighbor_overlappable(left_mbmi)) {
+ const struct macroblockd_plane *pd = &xd->plane[0];
+ int bw = overlap >> pd->subsampling_x;
+ int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
+ int dst_stride = weighted_src_stride;
+ int *dst = weighted_src_buf +
+ (i * MI_SIZE * dst_stride >> pd->subsampling_y);
+ int tmp_stride = left_stride;
+ uint8_t *tmp = left_buf +
+ (i * MI_SIZE * tmp_stride >> pd->subsampling_y);
+ int mask2d_stride = mask_stride;
+ int *mask2d = mask_buf +
+ (i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
+ const uint8_t *mask1d[2];
+
+ setup_obmc_mask(bw, mask1d);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = (dst[col] >> 6) * mask1d[0][col] +
+ (tmp16[col] << 6) * mask1d[1][col];
+ mask2d[col] = (mask2d[col] >> 6) * mask1d[0][col];
+ }
+ dst += dst_stride;
+ tmp16 += tmp_stride;
+ mask2d += mask2d_stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ dst[col] = (dst[col] >> 6) * mask1d[0][col] +
+ (tmp[col] << 6) * mask1d[1][col];
+ mask2d[col] = (mask2d[col] >> 6) * mask1d[0][col];
+ }
+ dst += dst_stride;
+ tmp += tmp_stride;
+ mask2d += mask2d_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ } // each mi in the left column
+ }
+
+ dst = weighted_src_buf;
+ src = x->plane[0].src.buf;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd) {
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col)
+ dst[col] = (src16[col] << 12) - dst[col];
+ dst += weighted_src_stride;
+ src16 += x->plane[0].src.stride;
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col)
+ dst[col] = (src[col] << 12) - dst[col];
+ dst += weighted_src_stride;
+ src += x->plane[0].src.stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+#endif // CONFIG_OBMC
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index 53920bc..2ca39a5 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -108,6 +108,17 @@
return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
}
+#if CONFIG_OBMC
+void calc_target_weighted_pred(VP10_COMMON *cm,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *above_buf, int above_stride,
+ uint8_t *left_buf, int left_stride,
+ int *mask_buf, int mask_stride,
+ int *weighted_src_buf, int weighted_src_stride);
+#endif // CONFIG_OBMC
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vpx_dsp/sad.c b/vpx_dsp/sad.c
index c500206..b3ed410 100644
--- a/vpx_dsp/sad.c
+++ b/vpx_dsp/sad.c
@@ -450,3 +450,109 @@
HIGHBD_MASKSADMXN(4, 4)
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
+
+#if CONFIG_VP10 && CONFIG_OBMC
+// a: pred
+// b: target weighted prediction (has been *4096 to keep precision)
+// m: 2d weights (scaled by 4096)
+static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ int abs_diff = abs(b[x] - a[x] * m[x]);
+ sad += (abs_diff + 2048) >> 12;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+
+ return sad;
+}
+
+#define OBMCSADMxN(m, n) \
+unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride) { \
+ return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \
+}
+
+#if CONFIG_EXT_PARTITION
+OBMCSADMxN(128, 128)
+OBMCSADMxN(128, 64)
+OBMCSADMxN(64, 128)
+#endif // CONFIG_EXT_PARTITION
+OBMCSADMxN(64, 64)
+OBMCSADMxN(64, 32)
+OBMCSADMxN(32, 64)
+OBMCSADMxN(32, 32)
+OBMCSADMxN(32, 16)
+OBMCSADMxN(16, 32)
+OBMCSADMxN(16, 16)
+OBMCSADMxN(16, 8)
+OBMCSADMxN(8, 16)
+OBMCSADMxN(8, 8)
+OBMCSADMxN(8, 4)
+OBMCSADMxN(4, 8)
+OBMCSADMxN(4, 4)
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int width, int height) {
+ int y, x;
+ unsigned int sad = 0;
+ const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ int abs_diff = abs(b[x] - a[x] * m[x]);
+ sad += (abs_diff + 2048) >> 12;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+
+ return sad;
+}
+
+#define HIGHBD_OBMCSADMXN(m, n) \
+unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
+ int ref_stride, \
+ const int *wsrc, \
+ int wsrc_stride, \
+ const int *msk, \
+ int msk_stride) { \
+ return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \
+ msk, msk_stride, m, n); \
+}
+
+#if CONFIG_EXT_PARTITION
+HIGHBD_OBMCSADMXN(128, 128)
+HIGHBD_OBMCSADMXN(128, 64)
+HIGHBD_OBMCSADMXN(64, 128)
+#endif // CONFIG_EXT_PARTITION
+HIGHBD_OBMCSADMXN(64, 64)
+HIGHBD_OBMCSADMXN(64, 32)
+HIGHBD_OBMCSADMXN(32, 64)
+HIGHBD_OBMCSADMXN(32, 32)
+HIGHBD_OBMCSADMXN(32, 16)
+HIGHBD_OBMCSADMXN(16, 32)
+HIGHBD_OBMCSADMXN(16, 16)
+HIGHBD_OBMCSADMXN(16, 8)
+HIGHBD_OBMCSADMXN(8, 16)
+HIGHBD_OBMCSADMXN(8, 8)
+HIGHBD_OBMCSADMXN(8, 4)
+HIGHBD_OBMCSADMXN(4, 8)
+HIGHBD_OBMCSADMXN(4, 4)
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_VP10 && CONFIG_OBMC
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index cc99d25..ab3d8bb 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <stdlib.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
@@ -1022,3 +1023,322 @@
#endif // CONFIG_EXT_PARTITION
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
+
+#if CONFIG_VP10 && CONFIG_OBMC
+void obmc_variance(const uint8_t *a, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ int scaled_diff = b[j] - a[j] * m[j];
+ int abs_diff = (abs(scaled_diff) + 2048) >> 12;
+ int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+}
+
+#define OBMC_VAR(W, H) \
+unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
+ const int *b, int b_stride, \
+ const int *m, int m_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ obmc_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+#define OBMC_SUBPIX_VAR(W, H) \
+unsigned int vpx_obmc_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *pre, int pre_stride, \
+ int xoffset, int yoffset, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ \
+ var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ return vpx_obmc_variance##W##x##H##_c(temp2, W, wsrc, wsrc_stride, \
+ msk, msk_stride, sse); \
+}
+
+OBMC_VAR(4, 4)
+OBMC_SUBPIX_VAR(4, 4)
+
+OBMC_VAR(4, 8)
+OBMC_SUBPIX_VAR(4, 8)
+
+OBMC_VAR(8, 4)
+OBMC_SUBPIX_VAR(8, 4)
+
+OBMC_VAR(8, 8)
+OBMC_SUBPIX_VAR(8, 8)
+
+OBMC_VAR(8, 16)
+OBMC_SUBPIX_VAR(8, 16)
+
+OBMC_VAR(16, 8)
+OBMC_SUBPIX_VAR(16, 8)
+
+OBMC_VAR(16, 16)
+OBMC_SUBPIX_VAR(16, 16)
+
+OBMC_VAR(16, 32)
+OBMC_SUBPIX_VAR(16, 32)
+
+OBMC_VAR(32, 16)
+OBMC_SUBPIX_VAR(32, 16)
+
+OBMC_VAR(32, 32)
+OBMC_SUBPIX_VAR(32, 32)
+
+OBMC_VAR(32, 64)
+OBMC_SUBPIX_VAR(32, 64)
+
+OBMC_VAR(64, 32)
+OBMC_SUBPIX_VAR(64, 32)
+
+OBMC_VAR(64, 64)
+OBMC_SUBPIX_VAR(64, 64)
+
+#if CONFIG_EXT_PARTITION
+OBMC_VAR(64, 128)
+OBMC_SUBPIX_VAR(64, 128)
+
+OBMC_VAR(128, 64)
+OBMC_SUBPIX_VAR(128, 64)
+
+OBMC_VAR(128, 128)
+OBMC_SUBPIX_VAR(128, 128)
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_obmc_variance64(const uint8_t *a8, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int w, int h, uint64_t *sse, int64_t *sum) {
+ int i, j;
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ int scaled_diff = b[j] - a[j] * m[j];
+ int abs_diff = (abs(scaled_diff) + 2048) >> 12;
+ int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ m += m_stride;
+ }
+}
+
+void highbd_obmc_variance(const uint8_t *a8, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
+ w, h, &sse64, &sum64);
+ *sum = (int)sum64;
+ *sse = (unsigned int)sse64;
+}
+
+void highbd_10_obmc_variance(const uint8_t *a8, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
+}
+
+void highbd_12_obmc_variance(const uint8_t *a8, int a_stride,
+ const int *b, int b_stride,
+ const int *m, int m_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int64_t sum64;
+ uint64_t sse64;
+ highbd_obmc_variance64(a8, a_stride, b, b_stride, m, m_stride,
+ w, h, &sse64, &sum64);
+ *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
+}
+
+#define HIGHBD_OBMC_VAR(W, H) \
+unsigned int vpx_highbd_obmc_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const int *b, \
+ int b_stride, \
+ const int *m, \
+ int m_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
+ W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+ \
+unsigned int vpx_highbd_10_obmc_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const int *b, \
+ int b_stride, \
+ const int *m, \
+ int m_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_10_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
+ W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+ \
+unsigned int vpx_highbd_12_obmc_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const int *b, \
+ int b_stride, \
+ const int *m, \
+ int m_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_12_obmc_variance(a, a_stride, b, b_stride, m, m_stride, \
+ W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+#define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
+unsigned int vpx_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *pre, int pre_stride, \
+ int xoffset, int yoffset, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ return vpx_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+ W, wsrc, wsrc_stride, \
+ msk, msk_stride, sse); \
+} \
+ \
+unsigned int vpx_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *pre, int pre_stride, \
+ int xoffset, int yoffset, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ return vpx_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+ W, wsrc, wsrc_stride, \
+ msk, msk_stride, sse); \
+} \
+ \
+unsigned int vpx_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *pre, int pre_stride, \
+ int xoffset, int yoffset, \
+ const int *wsrc, int wsrc_stride, \
+ const int *msk, int msk_stride, \
+ unsigned int *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint16_t temp2[H * W]; \
+ \
+ vpx_highbd_var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, \
+ H + 1, W, \
+ bilinear_filters_2t[xoffset]); \
+ vpx_highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters_2t[yoffset]); \
+ \
+ return vpx_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+ W, wsrc, wsrc_stride, \
+ msk, msk_stride, sse); \
+}
+
+HIGHBD_OBMC_VAR(4, 4)
+HIGHBD_OBMC_SUBPIX_VAR(4, 4)
+
+HIGHBD_OBMC_VAR(4, 8)
+HIGHBD_OBMC_SUBPIX_VAR(4, 8)
+
+HIGHBD_OBMC_VAR(8, 4)
+HIGHBD_OBMC_SUBPIX_VAR(8, 4)
+
+HIGHBD_OBMC_VAR(8, 8)
+HIGHBD_OBMC_SUBPIX_VAR(8, 8)
+
+HIGHBD_OBMC_VAR(8, 16)
+HIGHBD_OBMC_SUBPIX_VAR(8, 16)
+
+HIGHBD_OBMC_VAR(16, 8)
+HIGHBD_OBMC_SUBPIX_VAR(16, 8)
+
+HIGHBD_OBMC_VAR(16, 16)
+HIGHBD_OBMC_SUBPIX_VAR(16, 16)
+
+HIGHBD_OBMC_VAR(16, 32)
+HIGHBD_OBMC_SUBPIX_VAR(16, 32)
+
+HIGHBD_OBMC_VAR(32, 16)
+HIGHBD_OBMC_SUBPIX_VAR(32, 16)
+
+HIGHBD_OBMC_VAR(32, 32)
+HIGHBD_OBMC_SUBPIX_VAR(32, 32)
+
+HIGHBD_OBMC_VAR(32, 64)
+HIGHBD_OBMC_SUBPIX_VAR(32, 64)
+
+HIGHBD_OBMC_VAR(64, 32)
+HIGHBD_OBMC_SUBPIX_VAR(64, 32)
+
+HIGHBD_OBMC_VAR(64, 64)
+HIGHBD_OBMC_SUBPIX_VAR(64, 64)
+
+#if CONFIG_EXT_PARTITION
+HIGHBD_OBMC_VAR(64, 128)
+HIGHBD_OBMC_SUBPIX_VAR(64, 128)
+
+HIGHBD_OBMC_VAR(128, 64)
+HIGHBD_OBMC_SUBPIX_VAR(128, 64)
+
+HIGHBD_OBMC_VAR(128, 128)
+HIGHBD_OBMC_SUBPIX_VAR(128, 128)
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_VP10 && CONFIG_OBMC
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h
index dea2af9..88ab5e3 100644
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -98,6 +98,30 @@
unsigned int *sse);
#endif // CONFIG_VP10 && CONFIG_EXT_INTER
+#if CONFIG_VP10 && CONFIG_OBMC
+typedef unsigned int(*vpx_obmc_sad_fn_t)(const uint8_t *pred,
+ int pred_stride,
+ const int *wsrc,
+ int wsrc_stride,
+ const int *msk,
+ int msk_stride);
+typedef unsigned int (*vpx_obmc_variance_fn_t)(const uint8_t *pred,
+ int pred_stride,
+ const int *wsrc,
+ int wsrc_stride,
+ const int *msk,
+ int msk_stride,
+ unsigned int *sse);
+typedef unsigned int (*vpx_obmc_subpixvariance_fn_t)(const uint8_t *pred,
+ int pred_stride,
+ int xoffset, int yoffset,
+ const int *wsrc,
+ int wsrc_stride,
+ const int *msk,
+ int msk_stride,
+ unsigned int *sse);
+#endif // CONFIG_VP10 && CONFIG_OBMC
+
#if CONFIG_VP9
typedef struct vp9_variance_vtable {
vpx_sad_fn_t sdf;
@@ -126,6 +150,11 @@
vpx_masked_variance_fn_t mvf;
vpx_masked_subpixvariance_fn_t msvf;
#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ vpx_obmc_sad_fn_t osdf;
+ vpx_obmc_variance_fn_t ovf;
+ vpx_obmc_subpixvariance_fn_t osvf;
+#endif // CONFIG_OBMC
} vp10_variance_fn_ptr_t;
#endif // CONFIG_VP10
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 6e566c8..ad524a2 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1095,6 +1095,25 @@
}
#
+# OBMC SAD
+#
+if (vpx_config("CONFIG_OBMC") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
+ specialize "vpx_obmc_sad${w}x${h}";
+ }
+
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
+ specialize "vpx_highbd_obmc_sad${w}x${h}";
+ }
+ }
+}
+
+#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
# Blocks of 3
@@ -1365,6 +1384,31 @@
}
#
+# OBMC Variance / OBMC Subpixel Variance
+#
+if (vpx_config("CONFIG_OBMC") eq "yes") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+ specialize "vpx_obmc_variance${w}x${h}";
+ specialize "vpx_obmc_sub_pixel_variance${w}x${h}";
+ }
+
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ foreach $bd ("_", "_10_", "_12_") {
+ foreach (@block_sizes) {
+ ($w, $h) = @$_;
+ add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+ add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
+ specialize "vpx_highbd${bd}obmc_variance${w}x${h}";
+ specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}";
+ }
+ }
+ }
+}
+
+#
# Specialty Subpixel
#
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";