Overlapped block motion compensation experiment

In this experiment, an obmc inter prediction mode is enabled for
>= 8X8 inter blocks. When the obmc flag is on, the regular block-
based motion compensation will be refined by using predictors of
the above and left blocks.
Fixed some compatibility issues with vp9_highbitdepth, supertx,
ref_mv, and ext_interp.

Coding gain (%) on derflr/hevcmr/hevchd
OBMC:
1.047/1.022/0.708
OBMC + SUPERTX:
1.652/1.616/1.137
SUPERTX:
0.862/0.779/0.630

Change-Id: I5d8d3c4729c6d3ccb03ec7034563107893103b7f
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 060fc38..0a94733 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -4932,7 +4932,6 @@
                xd->mb_to_top_edge - LEFT_TOP_MARGIN,
                xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
 }
-
 static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
                                            const MACROBLOCK *x,
                                            const BLOCK_SIZE bsize,
@@ -4942,19 +4941,17 @@
                                            (*single_filter)[MAX_REF_FRAMES]
                                            ) {
   INTERP_FILTER best_filter = SWITCHABLE;
-
   const VP10_COMMON *cm = &cpi->common;
   const MACROBLOCKD *xd = &x->e_mbd;
   int bsl = mi_width_log2_lookup[bsize];
   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
       (((mi_row + mi_col) >> bsl) +
-       get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
+          get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
   const int this_mode = mbmi->mode;
   int refs[2] = { mbmi->ref_frame[0],
-    (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-
+      (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
   if (pred_filter_search) {
     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
     if (xd->up_available)
@@ -5053,6 +5050,12 @@
                                  int *disable_skip,
                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
                                  int mi_row, int mi_col,
+#if CONFIG_OBMC
+                                 uint8_t *dst_buf1[3],
+                                 int dst_stride1[3],
+                                 uint8_t *dst_buf2[3],
+                                 int dst_stride2[3],
+#endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                  int_mv single_newmvs[2][MAX_REF_FRAMES],
 #else
@@ -5088,6 +5091,24 @@
 #else
   DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_OBMC
+#if CONFIG_VP9_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint16_t, tmp_buf1_16[MAX_MB_PLANE * 64 * 64]);
+  uint8_t *tmp_buf1;
+  uint8_t *obmc_tmp_buf[3];
+#else
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+  uint8_t *obmc_tmp_buf[3] = {tmp_buf1, tmp_buf1 + 4096, tmp_buf1 + 8192};
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  int obmc_tmp_stride[3] = {64, 64, 64};
+  int best_obmc_flag = 0;
+  uint8_t tmp_skip_txfm[MAX_MB_PLANE << 2] = {0};
+  int64_t tmp_bsse[MAX_MB_PLANE << 2] = {0};
+  int64_t rdobmc;
+  int skip_txfm_sb_obmc = 0;
+  int64_t skip_sse_sb_obmc = INT64_MAX;
+  int allow_obmc = is_obmc_allowed(mbmi);
+#endif  // CONFIG_OBMC
   int pred_exists = 0;
   int intpel_mv;
   int64_t rd, tmp_rd, best_rd = INT64_MAX;
@@ -5104,6 +5125,9 @@
   int64_t distortion_y = 0, distortion_uv = 0;
   int16_t mode_ctx = mbmi_ext->mode_context[refs[0]];
 
+#if CONFIG_OBMC
+  tmp_rd = 0;
+#endif  // CONFIG_OBMC
 #if CONFIG_REF_MV
 #if CONFIG_EXT_INTER
   if (is_comp_pred)
@@ -5117,9 +5141,20 @@
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
+#if CONFIG_OBMC
+    tmp_buf1 = CONVERT_TO_BYTEPTR(tmp_buf1_16);
+#endif  // CONFIG_OBMC
   } else {
     tmp_buf = (uint8_t *)tmp_buf16;
+#if CONFIG_OBMC
+    tmp_buf1 = (uint8_t *)tmp_buf1_16;
+#endif  // CONFIG_OBMC
   }
+#if CONFIG_OBMC
+  obmc_tmp_buf[0] = tmp_buf1;
+  obmc_tmp_buf[1] = tmp_buf1 + 4096;
+  obmc_tmp_buf[2] = tmp_buf1 + 8192;
+#endif  // CONFIG_OBMC
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
   if (is_comp_pred) {
@@ -5302,9 +5337,8 @@
   if (this_mode == NEARMV && is_comp_pred) {
     uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
     if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
-      int ref_mv_idx = mbmi->ref_mv_idx + 1;
-      cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
-      cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+      cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
+      cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
 
       for (i = 0; i < 2; ++i) {
         lower_mv_precision(&cur_mv[i].as_mv, cm->allow_high_precision_mv);
@@ -5383,6 +5417,11 @@
       int64_t rs_rd;
       int tmp_skip_sb = 0;
       int64_t tmp_skip_sse = INT64_MAX;
+#if CONFIG_OBMC
+      int obmc_flag = 0;
+      int tmp_skip_sb_obmc = 0;
+      int64_t tmp_skip_sse_obmc = INT64_MAX;
+#endif  // CONFIG_OBMC
 
       mbmi->interp_filter = i;
       rs = vp10_get_switchable_rate(cpi, xd);
@@ -5395,10 +5434,21 @@
             VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
         if (cm->interp_filter == SWITCHABLE)
           rd += rs_rd;
+#if CONFIG_OBMC
+        if (allow_obmc) {
+          obmc_flag = best_obmc_flag;
+          rd += RDCOST(x->rdmult, x->rddiv,
+                       cpi->obmc_cost[bsize][obmc_flag], 0);
+        }
+#endif  // CONFIG_OBMC
         *mask_filter = VPXMAX(*mask_filter, rd);
       } else {
         int rate_sum = 0;
         int64_t dist_sum = 0;
+#if CONFIG_OBMC
+        int rate_sum_obmc = 0;
+        int64_t dist_sum_obmc = 0;
+#endif  // CONFIG_OBMC
         if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
             (cpi->sf.interp_filter_search_mask & (1 << i))) {
           rate_sum = INT_MAX;
@@ -5423,6 +5473,40 @@
                         &tmp_skip_sb, &tmp_skip_sse);
 
         rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
+#if CONFIG_OBMC
+        if (allow_obmc) {
+          rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
+          memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm));
+          memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse));
+
+          vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+                                           obmc_tmp_buf, obmc_tmp_stride,
+                                           dst_buf1, dst_stride1,
+                                           dst_buf2, dst_stride2);
+          for (j = 0; j < MAX_MB_PLANE; ++j) {
+            xd->plane[j].dst.buf = obmc_tmp_buf[j];
+            xd->plane[j].dst.stride = obmc_tmp_stride[j];
+          }
+          model_rd_for_sb(cpi, bsize, x, xd, &rate_sum_obmc, &dist_sum_obmc,
+                          &tmp_skip_sb_obmc, &tmp_skip_sse_obmc);
+          rdobmc = RDCOST(x->rdmult, x->rddiv,
+                          rate_sum_obmc + cpi->obmc_cost[bsize][1],
+                          dist_sum_obmc);
+
+          if ((double)rdobmc <= 0.99 * (double)rd) {
+            obmc_flag = 1;
+            rd = rdobmc;
+            rate_sum = rate_sum_obmc;
+            dist_sum = dist_sum_obmc;
+            tmp_skip_sb = tmp_skip_sb_obmc;
+            tmp_skip_sse = tmp_skip_sse_obmc;
+          } else {
+            obmc_flag = 0;
+            memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm));
+            memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse));
+          }
+        }
+#endif  // CONFIG_OBMC
         filter_cache[i] = rd;
         filter_cache[SWITCHABLE_FILTERS] =
             VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
@@ -5447,6 +5531,10 @@
       if (newbest) {
         best_rd = rd;
         best_filter = mbmi->interp_filter;
+#if CONFIG_OBMC
+          if (allow_obmc)
+            best_obmc_flag = obmc_flag;
+#endif  // CONFIG_OBMC
         if (cm->interp_filter == SWITCHABLE && i &&
             !(intpel_mv && IsInterpolatingFilter(i)))
           best_needs_copy = !best_needs_copy;
@@ -5471,8 +5559,18 @@
   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
       cm->interp_filter : best_filter;
   rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
+#if CONFIG_OBMC
+  if (allow_obmc)
+    mbmi->obmc = best_obmc_flag;
+  else
+    mbmi->obmc = 0;
+#endif  // CONFIG_OBMC
 
+#if CONFIG_OBMC
+  if (pred_exists && !mbmi->obmc) {
+#else
   if (pred_exists) {
+#endif  // CONFIG_OBMC
     if (best_needs_copy) {
       // again temporarily set the buffers to local memory to prevent a memcpy
       for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -5481,16 +5579,77 @@
       }
     }
     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
+#if CONFIG_OBMC
+    if (allow_obmc)
+      rd += RDCOST(x->rdmult, x->rddiv,
+                   cpi->obmc_cost[bsize][mbmi->obmc], 0);
+#endif  // CONFIG_OBMC
   } else {
     int tmp_rate;
     int64_t tmp_dist;
+#if CONFIG_OBMC
+    int tmp_rate_obmc;
+    int64_t tmp_dist_obmc;
+#endif  // CONFIG_OBMC
     // Handles the special case when a filter that is not in the
     // switchable list (ex. bilinear) is indicated at the frame level, or
     // skip condition holds.
     vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#if CONFIG_OBMC
+    if (mbmi->obmc) {
+      vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+                                       obmc_tmp_buf, obmc_tmp_stride,
+                                       dst_buf1, dst_stride1,
+                                       dst_buf2, dst_stride2);
+      for (i = 0; i < MAX_MB_PLANE; ++i) {
+        xd->plane[i].dst.buf = obmc_tmp_buf[i];
+        xd->plane[i].dst.stride = obmc_tmp_stride[i];
+      }
+      model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
+                      &skip_txfm_sb, &skip_sse_sb);
+      rd = RDCOST(x->rdmult, x->rddiv,
+                  rs + tmp_rate + cpi->obmc_cost[bsize][1],
+                  tmp_dist);
+    } else {
+#endif  // CONFIG_OBMC
     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist,
                     &skip_txfm_sb, &skip_sse_sb);
     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+#if CONFIG_OBMC
+      if (allow_obmc) {
+        rd += RDCOST(x->rdmult, x->rddiv, cpi->obmc_cost[bsize][0], 0);
+        memcpy(tmp_skip_txfm, x->skip_txfm, sizeof(tmp_skip_txfm));
+        memcpy(tmp_bsse, x->bsse, sizeof(tmp_bsse));
+
+        vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 1,
+                                         obmc_tmp_buf, obmc_tmp_stride,
+                                         dst_buf1, dst_stride1,
+                                         dst_buf2, dst_stride2);
+        for (i = 0; i < MAX_MB_PLANE; ++i) {
+          xd->plane[i].dst.buf = obmc_tmp_buf[i];
+          xd->plane[i].dst.stride = obmc_tmp_stride[i];
+        }
+        model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate_obmc, &tmp_dist_obmc,
+                        &skip_txfm_sb_obmc, &skip_sse_sb_obmc);
+        rdobmc = RDCOST(x->rdmult, x->rddiv,
+                        rs + tmp_rate_obmc + cpi->obmc_cost[bsize][1],
+                        tmp_dist_obmc);
+        if ((double)rdobmc <= 0.99 * (double)rd) {
+          mbmi->obmc = 1;
+          rd = rdobmc;
+          skip_txfm_sb = skip_txfm_sb_obmc;
+          skip_sse_sb = skip_sse_sb_obmc;
+        } else {
+          mbmi->obmc = 0;
+          memcpy(x->skip_txfm, tmp_skip_txfm, sizeof(tmp_skip_txfm));
+          memcpy(x->bsse, tmp_bsse, sizeof(tmp_bsse));
+          restore_dst_buf(xd, orig_dst, orig_dst_stride);
+        }
+      } else {
+        mbmi->obmc = 0;
+      }
+    }
+#endif  // CONFIG_OBMC
     memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
     memcpy(bsse, x->bsse, sizeof(bsse));
   }
@@ -5570,6 +5729,10 @@
 
   if (cm->interp_filter == SWITCHABLE)
     *rate2 += rs;
+#if CONFIG_OBMC
+  if (allow_obmc)
+    *rate2 += cpi->obmc_cost[bsize][mbmi->obmc];
+#endif  // CONFIG_OBMC
 
   memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
   memcpy(x->bsse, bsse, sizeof(bsse));
@@ -5916,6 +6079,39 @@
   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
   const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+#if CONFIG_OBMC
+#if CONFIG_VP9_HIGHBITDEPTH
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * 64 * 64]);
+#else
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * 64 * 64]);
+  DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * 64 * 64]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  uint8_t *dst_buf1[3], *dst_buf2[3];
+  int dst_stride1[3] = {64, 64, 64};
+  int dst_stride2[3] = {64, 64, 64};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    int len = sizeof(uint16_t);
+    dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+    dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + 4096 * len);
+    dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 8192 * len);
+    dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+    dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + 4096 * len);
+    dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 8192 * len);
+  } else {
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  dst_buf1[0] = tmp_buf1;
+  dst_buf1[1] = tmp_buf1 + 4096;
+  dst_buf1[2] = tmp_buf1 + 8192;
+  dst_buf2[0] = tmp_buf2;
+  dst_buf2[1] = tmp_buf2 + 4096;
+  dst_buf2[2] = tmp_buf2 + 8192;
+#if CONFIG_VP9_HIGHBITDEPTH
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // CONFIG_OBMC
 
   vp10_zero(best_mbmode);
 
@@ -5988,6 +6184,14 @@
   }
 #endif
 
+#if CONFIG_OBMC
+  vp10_build_prediction_by_above_preds(cpi, xd, mi_row, mi_col, dst_buf1,
+                                       dst_stride1);
+  vp10_build_prediction_by_left_preds(cpi, xd, mi_row, mi_col, dst_buf2,
+                                      dst_stride2);
+  vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+#endif  // CONFIG_OBMC
+
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
       // Skip checking missing references in both single and compound reference
@@ -6286,6 +6490,9 @@
     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
                                                           : cm->interp_filter;
     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+#if CONFIG_OBMC
+    mbmi->obmc = 0;
+#endif  // CONFIG_OBMC
 
     x->skip = 0;
     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
@@ -6450,6 +6657,10 @@
                                   &rate_y, &rate_uv,
                                   &disable_skip, frame_mv,
                                   mi_row, mi_col,
+#if CONFIG_OBMC
+                                  dst_buf1, dst_stride1,
+                                  dst_buf2, dst_stride2,
+#endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                   single_newmvs,
 #else
@@ -6522,6 +6733,10 @@
                                            &tmp_rate_y, &tmp_rate_uv,
                                            &dummy_disable_skip, frame_mv,
                                            mi_row, mi_col,
+#if CONFIG_OBMC
+                                           dst_buf1, dst_stride1,
+                                           dst_buf2, dst_stride2,
+#endif  // CONFIG_OBMC
 #if CONFIG_EXT_INTER
                                            dummy_single_newmvs,
 #else
@@ -6671,6 +6886,10 @@
         }
         *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
                                             mbmi->ref_frame[0] != INTRA_FRAME);
+#if CONFIG_OBMC
+        if (is_inter_block(mbmi) && is_obmc_allowed(mbmi))
+          *returnrate_nocoef -= cpi->obmc_cost[bsize][mbmi->obmc];
+#endif  // CONFIG_OBMC
 #endif  // CONFIG_SUPERTX
         rd_cost->dist = distortion2;
         rd_cost->rdcost = this_rd;
@@ -7178,6 +7397,9 @@
   mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
   mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
 #endif  // CONFIG_EXT_INTRA
+#if CONFIG_OBMC
+  mbmi->obmc = 0;
+#endif  // CONFIG_OBMC
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     filter_cache[i] = INT64_MAX;
@@ -7896,3 +8118,167 @@
   store_coding_context(x, ctx, best_ref_index,
                        best_pred_diff, best_filter_diff, 0);
 }
+
+#if CONFIG_OBMC
+void vp10_build_prediction_by_above_preds(VP10_COMP *cpi,
+                                          MACROBLOCKD *xd,
+                                          int mi_row, int mi_col,
+                                          uint8_t *tmp_buf[MAX_MB_PLANE],
+                                          int tmp_stride[MAX_MB_PLANE]) {
+  VP10_COMMON *const cm = &cpi->common;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  int i, j, mi_step, ref;
+
+  if (mi_row == 0)
+    return;
+
+  for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+    int mi_row_offset = -1;
+    int mi_col_offset = i;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *above_mi = xd->mi[mi_col_offset +
+                                 mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+
+    mi_step = VPXMIN(xd->n8_w,
+                     num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+    if (!is_inter_block(above_mbmi))
+      continue;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst,
+                       tmp_buf[j], tmp_stride[j],
+                       0, i, NULL,
+                       pd->subsampling_x, pd->subsampling_y);
+    }
+    set_ref_ptrs(cm, xd, above_mbmi->ref_frame[0], above_mbmi->ref_frame[1]);
+    for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+      YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
+                                                  above_mbmi->ref_frame[ref]);
+      assert(cfg != NULL);
+      vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col + i,
+                            &xd->block_refs[ref]->sf);
+    }
+
+    xd->mb_to_left_edge   = -(((mi_col + i) * MI_SIZE) * 8);
+    mi_x = (mi_col + i) << MI_SIZE_LOG2;
+    mi_y = mi_row << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = (mi_step * 8) >> pd->subsampling_x;
+      bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+                  4);
+
+      if (above_mbmi->sb_type < BLOCK_8X8) {
+        const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
+        const int have_vsplit = bp != PARTITION_HORZ;
+        const int have_hsplit = bp != PARTITION_VERT;
+        const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+        const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+        const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+        int x, y;
+
+        for (y = 0; y < num_4x4_h; ++y)
+          for (x = 0; x < num_4x4_w; ++x) {
+            if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT)
+                && y == 0 && !pd->subsampling_y)
+              continue;
+
+            build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+                                   y * 2 + x, bw, bh,
+                                   4 * x, 0, pw, bh, mi_x, mi_y);
+          }
+      } else {
+        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
+                               bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+      }
+    }
+  }
+  xd->mb_to_left_edge   = -((mi_col * MI_SIZE) * 8);
+}
+
+void vp10_build_prediction_by_left_preds(VP10_COMP *cpi,
+                                         MACROBLOCKD *xd,
+                                         int mi_row, int mi_col,
+                                         uint8_t *tmp_buf[MAX_MB_PLANE],
+                                         int tmp_stride[MAX_MB_PLANE]) {
+  VP10_COMMON *const cm = &cpi->common;
+  const TileInfo *const tile = &xd->tile;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+  int i, j, mi_step, ref;
+
+  if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start) ||
+      (mi_col - 1) >= tile->mi_col_end)
+    return;
+
+  for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+    int mi_row_offset = i;
+    int mi_col_offset = -1;
+    int mi_x, mi_y, bw, bh;
+    MODE_INFO *left_mi = xd->mi[mi_col_offset +
+                                mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+
+    mi_step = VPXMIN(xd->n8_h,
+                     num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+    if (!is_inter_block(left_mbmi))
+      continue;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      struct macroblockd_plane *const pd = &xd->plane[j];
+      setup_pred_plane(&pd->dst,
+                       tmp_buf[j], tmp_stride[j],
+                       i, 0, NULL,
+                       pd->subsampling_x, pd->subsampling_y);
+    }
+    set_ref_ptrs(cm, xd, left_mbmi->ref_frame[0], left_mbmi->ref_frame[1]);
+    for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
+      YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
+                                                     left_mbmi->ref_frame[ref]);
+      assert(cfg != NULL);
+      vp10_setup_pre_planes(xd, ref, cfg, mi_row + i, mi_col,
+                            &xd->block_refs[ref]->sf);
+    }
+
+    xd->mb_to_top_edge    = -(((mi_row + i) * MI_SIZE) * 8);
+    mi_x = mi_col << MI_SIZE_LOG2;
+    mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+    for (j = 0; j < MAX_MB_PLANE; ++j) {
+      const struct macroblockd_plane *pd = &xd->plane[j];
+      bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+                  4);
+      bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+      if (left_mbmi->sb_type < BLOCK_8X8) {
+        const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
+        const int have_vsplit = bp != PARTITION_HORZ;
+        const int have_hsplit = bp != PARTITION_VERT;
+        const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+        const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+        const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+        int x, y;
+
+        for (y = 0; y < num_4x4_h; ++y)
+          for (x = 0; x < num_4x4_w; ++x) {
+            if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT)
+                && x == 0 && !pd->subsampling_x)
+              continue;
+
+            build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+                                   y * 2 + x, bw, bh,
+                                   0, 4 * y, bw, ph, mi_x, mi_y);
+          }
+      } else {
+        build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0,
+                               bw, bh, 0, 0, bw, bh, mi_x, mi_y);
+      }
+    }
+  }
+  xd->mb_to_top_edge    = -((mi_row * MI_SIZE) * 8);
+}
+#endif  // CONFIG_OBMC