Fix sub8x8 rd costing to include global motion prediction

One codepath originally skipped a function which forks between
gm prediction and regular inter prediction. This change fixes this
for both high bit depth and regular bit depth.

Change-Id: I741d67a7c89eb6eb0cd35c02219739dc3ddb3841
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 53545be..3263f9e 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -742,45 +742,44 @@
 }
 #endif  // CONFIG_EXT_INTER
 
+// TODO(sarahparker) av1_highbd_build_inter_predictor and
+// av1_build_inter_predictor should be combined with
+// av1_make_inter_predictor
 #if CONFIG_AOM_HIGHBITDEPTH
-void av1_highbd_build_inter_predictor(
-    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
-    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
+void av1_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
+                                      uint8_t *dst, int dst_stride,
+                                      const MV *src_mv,
+                                      const struct scale_factors *sf, int w,
+                                      int h, int ref,
 #if CONFIG_DUAL_FILTER
-    const InterpFilter *interp_filter,
+                                      const InterpFilter *interp_filter,
 #else
-    const InterpFilter interp_filter,
+                                      const InterpFilter interp_filter,
 #endif
-    enum mv_precision precision, int x, int y, int bd) {
+#if CONFIG_GLOBAL_MOTION
+                                      int is_global, int p_col, int p_row,
+#endif  // CONFIG_GLOBAL_MOTION
+                                      int plane, enum mv_precision precision,
+                                      int x, int y, const MACROBLOCKD *xd) {
   const int is_q4 = precision == MV_PRECISION_Q4;
   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                      is_q4 ? src_mv->col : src_mv->col * 2 };
   MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
   const int subpel_x = mv.col & SUBPEL_MASK;
   const int subpel_y = mv.row & SUBPEL_MASK;
+  ConvolveParams conv_params = get_conv_params(ref, plane);
 
   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
-  highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
-                         sf, w, h, ref, interp_filter, sf->x_step_q4,
-                         sf->y_step_q4, bd);
+  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                           sf, w, h, &conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                           is_global, p_col, p_row, plane, ref,
+#endif  // CONFIG_GLOBAL_MOTION
+                           sf->x_step_q4, sf->y_step_q4, xd);
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-#if CONFIG_GLOBAL_MOTION
-static INLINE int is_global_mv_block(const MODE_INFO *mi, int block,
-                                     TransformationType type) {
-  PREDICTION_MODE mode = get_y_mode(mi, block);
-#if GLOBAL_SUB8X8_USED
-  const int block_size_allowed = 1;
-#else
-  const BLOCK_SIZE bsize = mi->mbmi.sb_type;
-  const int block_size_allowed = (bsize >= BLOCK_8X8);
-#endif  // GLOBAL_SUB8X8_USED
-  return mode == ZEROMV && type > TRANSLATION && block_size_allowed;
-}
-#endif  // CONFIG_GLOBAL_MOTION
-
 void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                                int dst_stride, const MV *src_mv,
                                const struct scale_factors *sf, int w, int h,
@@ -790,7 +789,12 @@
 #else
                                const InterpFilter interp_filter,
 #endif
-                               enum mv_precision precision, int x, int y) {
+#if CONFIG_GLOBAL_MOTION
+                               int is_global, int p_col, int p_row, int plane,
+                               int ref,
+#endif  // CONFIG_GLOBAL_MOTION
+                               enum mv_precision precision, int x, int y,
+                               const MACROBLOCKD *xd) {
   const int is_q4 = precision == MV_PRECISION_Q4;
   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                      is_q4 ? src_mv->col : src_mv->col * 2 };
@@ -800,8 +804,12 @@
 
   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
 
-  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
-                  h, conv_params, interp_filter, sf->x_step_q4, sf->y_step_q4);
+  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+                           sf, w, h, conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                           is_global, p_col, p_row, plane, ref,
+#endif  // CONFIG_GLOBAL_MOTION
+                           sf->x_step_q4, sf->y_step_q4, xd);
 }
 
 typedef struct SubpelParams {
@@ -1060,6 +1068,15 @@
   uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
   int ref;
   const int is_compound = has_second_ref(&mi->mbmi);
+#if CONFIG_GLOBAL_MOTION
+  const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
+  const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
+  int is_global[2];
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
+    is_global[ref] = is_global_mv_block(mi, i, wm->wmtype);
+  }
+#endif  // CONFIG_GLOBAL_MOTION
 
   for (ref = 0; ref < 1 + is_compound; ++ref) {
     ConvolveParams conv_params = get_conv_params(ref, plane);
@@ -1070,15 +1087,23 @@
       av1_highbd_build_inter_predictor(
           pre, pd->pre[ref].stride, dst, pd->dst.stride,
           &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
-          ref, mi->mbmi.interp_filter, MV_PRECISION_Q3,
-          mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd->bd);
+          ref, mi->mbmi.interp_filter,
+#if CONFIG_GLOBAL_MOTION
+          is_global[ref], p_col, p_row,
+#endif  // CONFIG_GLOBAL_MOTION
+          plane, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
+          mi_row * MI_SIZE + 4 * ir, xd);
     else
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-      av1_build_inter_predictor(
-          pre, pd->pre[ref].stride, dst, pd->dst.stride,
-          &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
-          &conv_params, mi->mbmi.interp_filter, MV_PRECISION_Q3,
-          mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir);
+      av1_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride,
+                                &mi->bmi[i].as_mv[ref].as_mv,
+                                &xd->block_refs[ref]->sf, width, height,
+                                &conv_params, mi->mbmi.interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                                is_global[ref], p_col, p_row, plane, ref,
+#endif  // CONFIG_GLOBAL_MOTION
+                                MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
+                                mi_row * MI_SIZE + 4 * ir, xd);
   }
 }
 
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 1ea75a9..8ac0047 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -24,6 +24,20 @@
 extern "C" {
 #endif
 
+#if CONFIG_GLOBAL_MOTION
+static INLINE int is_global_mv_block(const MODE_INFO *mi, int block,
+                                     TransformationType type) {
+  PREDICTION_MODE mode = get_y_mode(mi, block);
+#if GLOBAL_SUB8X8_USED
+  const int block_size_allowed = 1;
+#else
+  const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+  const int block_size_allowed = (bsize >= BLOCK_8X8);
+#endif  // GLOBAL_SUB8X8_USED
+  return mode == ZEROMV && type > TRANSLATION && block_size_allowed;
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 static INLINE void inter_predictor(const uint8_t *src, int src_stride,
                                    uint8_t *dst, int dst_stride,
                                    const int subpel_x, const int subpel_y,
@@ -417,7 +431,7 @@
 #endif  // CONFIG_SUPERTX
 
 void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
-                               int dst_stride, const MV *mv_q3,
+                               int dst_stride, const MV *src_mv,
                                const struct scale_factors *sf, int w, int h,
                                ConvolveParams *conv_params,
 #if CONFIG_DUAL_FILTER
@@ -425,18 +439,29 @@
 #else
                                const InterpFilter interp_filter,
 #endif
-                               enum mv_precision precision, int x, int y);
+#if CONFIG_GLOBAL_MOTION
+                               int is_global, int p_col, int p_row, int plane,
+                               int ref,
+#endif  // CONFIG_GLOBAL_MOTION
+                               enum mv_precision precision, int x, int y,
+                               const MACROBLOCKD *xd);
 
 #if CONFIG_AOM_HIGHBITDEPTH
-void av1_highbd_build_inter_predictor(
-    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
-    const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
+void av1_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
+                                      uint8_t *dst, int dst_stride,
+                                      const MV *mv_q3,
+                                      const struct scale_factors *sf, int w,
+                                      int h, int do_avg,
 #if CONFIG_DUAL_FILTER
-    const InterpFilter *interp_filter,
+                                      const InterpFilter *interp_filter,
 #else
-    const InterpFilter interp_filter,
+                                      const InterpFilter interp_filter,
 #endif
-    enum mv_precision precision, int x, int y, int bd);
+#if CONFIG_GLOBAL_MOTION
+                                      int is_global, int p_col, int p_row,
+#endif  // CONFIG_GLOBAL_MOTION
+                                      int plane, enum mv_precision precision,
+                                      int x, int y, const MACROBLOCKD *xd);
 #endif
 
 static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 139d1b3..9b501a3 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5493,6 +5493,8 @@
   const int ph = block_size_high[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+  // This function should only ever be called for compound modes
+  assert(has_second_ref(mbmi));
   const int refs[2] = { mbmi->ref_frame[0],
                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
   int_mv ref_mv[2];
@@ -5506,6 +5508,20 @@
   const InterpFilter interp_filter = mbmi->interp_filter;
 #endif  // CONFIG_DUAL_FILTER
   struct scale_factors sf;
+  struct macroblockd_plane *const pd = &xd->plane[0];
+#if CONFIG_GLOBAL_MOTION
+  // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
+  const int ic = block & 1;
+  const int ir = (block - ic) >> 1;
+  const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
+  const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
+  int is_global[2];
+  for (ref = 0; ref < 2; ++ref) {
+    WarpedMotionParams *const wm =
+        &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
+    is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
+  }
+#endif  // CONFIG_GLOBAL_MOTION
 
   // Do joint motion search in compound mode to get more accurate mv.
   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
@@ -5597,19 +5613,28 @@
       av1_highbd_build_inter_predictor(
           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
           &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
-          MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
+#if CONFIG_GLOBAL_MOTION
+          is_global[!id], p_col, p_row,
+#endif  // CONFIG_GLOBAL_MOTION
+          plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
     } else {
       second_pred = (uint8_t *)second_pred_alloc_16;
       av1_build_inter_predictor(
           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
           &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
-          MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
+#if CONFIG_GLOBAL_MOTION
+          is_global[!id], p_col, p_row, plane, !id,
+#endif  // CONFIG_GLOBAL_MOTION
+          MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
     }
 #else
     av1_build_inter_predictor(
         ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
         &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
-        MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
+#if CONFIG_GLOBAL_MOTION
+        is_global[!id], p_col, p_row, plane, !id,
+#endif  // CONFIG_GLOBAL_MOTION
+        MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
     // Do compound motion search on the current reference frame.
@@ -5644,7 +5669,6 @@
       unsigned int sse;
       if (cpi->sf.use_upsampled_references) {
         // Use up-sampled reference frames.
-        struct macroblockd_plane *const pd = &xd->plane[plane];
         struct buf_2d backup_pred = pd->pre[0];
         const YV12_BUFFER_CONFIG *upsampled_ref =
             get_upsampled_ref(cpi, refs[id]);
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 53bf22b..2dd7d02 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -68,30 +68,51 @@
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     av1_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale,
                                      16, 16, which_mv, interp_filter,
-                                     MV_PRECISION_Q3, x, y, xd->bd);
+#if CONFIG_GLOBAL_MOTION
+                                     0, x, y,
+#endif  // CONFIG_GLOBAL_MOTION
+                                     0, MV_PRECISION_Q3, x, y, xd);
 
     av1_highbd_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256],
                                      uv_block_width, &mv, scale, uv_block_width,
                                      uv_block_height, which_mv, interp_filter,
-                                     mv_precision_uv, x, y, xd->bd);
+#if CONFIG_GLOBAL_MOTION
+                                     0, x, y,
+#endif  // CONFIG_GLOBAL_MOTION
+                                     1, mv_precision_uv, x, y, xd);
 
     av1_highbd_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512],
                                      uv_block_width, &mv, scale, uv_block_width,
                                      uv_block_height, which_mv, interp_filter,
-                                     mv_precision_uv, x, y, xd->bd);
+#if CONFIG_GLOBAL_MOTION
+                                     0, x, y,
+#endif  // CONFIG_GLOBAL_MOTION
+                                     2, mv_precision_uv, x, y, xd);
     return;
   }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
   av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
-                            &conv_params, interp_filter, MV_PRECISION_Q3, x, y);
+                            &conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                            0, x, y, 0, 0,
+#endif  // CONFIG_GLOBAL_MOTION
+                            MV_PRECISION_Q3, x, y, xd);
 
   av1_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
                             &mv, scale, uv_block_width, uv_block_height,
-                            &conv_params, interp_filter, mv_precision_uv, x, y);
+                            &conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                            0, x, y, 1, 0,
+#endif  // CONFIG_GLOBAL_MOTION
+                            mv_precision_uv, x, y, xd);
 
   av1_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
                             &mv, scale, uv_block_width, uv_block_height,
-                            &conv_params, interp_filter, mv_precision_uv, x, y);
+                            &conv_params, interp_filter,
+#if CONFIG_GLOBAL_MOTION
+                            0, x, y, 2, 0,
+#endif  // CONFIG_GLOBAL_MOTION
+                            mv_precision_uv, x, y, xd);
 }
 
 void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,