Avoid computing gm params for skippable references

Similar to the logic in inter_mode_search_order_independent_skip, we
do not compute the global motion parameters for a LAST2_FRAME or
LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
global motion model.

0.02% drop in performance on cam_lowres with an average 2% speedup and
5.5% reduction to the number of calls to compute_global_motion_feature_based.

STATS_CHANGED

Change-Id: I11f885f00d6be8fabf17a65c85ac5f071e254219
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 467d3e7..dc8d3ff 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4461,6 +4461,18 @@
   return 1;
 }
 
+// Function to decide if we can skip the global motion parameter computation
+// for a particular ref frame
+static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
+  if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
+      cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
+    return get_relative_dist(
+               cm, cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME],
+               cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0;
+  }
+  return 0;
+}
+
 static void encode_frame_internal(AV1_COMP *cpi) {
   ThreadData *const td = &cpi->td;
   MACROBLOCK *const x = &td->mb;
@@ -4652,7 +4664,7 @@
     };
     int num_refs_using_gm = 0;
 
-    for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+    for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
       ref_buf[frame] = get_ref_frame_buffer(cpi, frame);
       int pframe;
       cm->global_motion[frame] = default_warp_params;
@@ -4660,16 +4672,17 @@
           cm->prev_frame ? &cm->prev_frame->global_motion[frame]
                          : &default_warp_params;
       // check for duplicate buffer
-      for (pframe = LAST_FRAME; pframe < frame; ++pframe) {
+      for (pframe = ALTREF_FRAME; pframe > frame; --pframe) {
         if (ref_buf[frame] == ref_buf[pframe]) break;
       }
-      if (pframe < frame) {
+      if (pframe > frame) {
         memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
                sizeof(WarpedMotionParams));
       } else if (ref_buf[frame] &&
                  ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
                  ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
-                 do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) {
+                 do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
+                 !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
         TransformationType model;
         const int64_t ref_frame_error =
             av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c8fbc97..4974081 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -422,6 +422,7 @@
   sf->model_based_prune_tx_search_level = 0;
   sf->model_based_post_interp_filter_breakout = 0;
   sf->reduce_inter_modes = 0;
+  sf->selective_ref_gm = 1;
   sf->adaptive_motion_search = 0;
   sf->adaptive_pred_interp_filter = 0;
   sf->adaptive_mode_search = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 99c145f..59cb6be 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -317,6 +317,11 @@
   // Limit the inter mode tested in the RD loop
   int reduce_inter_modes;
 
+  // Do not compute the global motion parameters for a LAST2_FRAME or
+  // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
+  // global model.
+  int selective_ref_gm;
+
   // If 1 we iterate finding a best reference for 2 ref frames together - via
   // a log search that iterates 4 times (check around mv for last for best
   // error of combined predictor then check around mv for alt). If 0 we