Add recode loop test for global motion usage

Adds a feature to recode if global motion is used for a reference but
has very few blocks in the frame actually using it.

lowres improves to -0.512% on average.

Change-Id: I61a36770e1b7103b9a27706909443c3f14ee4e42
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 48036d6..57d99a1 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -4014,12 +4014,10 @@
     write_global_motion_params(&cm->global_motion[frame],
                                cm->fc->global_motion_types_prob, w);
     /*
-    printf("Enc Ref %d [%d/%d] (used %d): %d %d %d %d\n",
-           frame, cm->current_video_frame, cm->show_frame,
-           cpi->global_motion_used[frame],
-           cm->global_motion[frame].wmmat[0],
-           cm->global_motion[frame].wmmat[1],
-           cm->global_motion[frame].wmmat[2],
+    printf("Frame %d/%d: Enc Ref %d (used %d): %d %d %d %d\n",
+           cm->current_video_frame, cm->show_frame, frame,
+           cpi->global_motion_used[frame], cm->global_motion[frame].wmmat[0],
+           cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2],
            cm->global_motion[frame].wmmat[3]);
            */
   }
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 04f94fe..bc1c77e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4785,10 +4785,8 @@
 
 #if CONFIG_GLOBAL_MOTION
   av1_zero(cpi->global_motion_used);
-  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
-    set_default_gmparams(&cm->global_motion[i]);
-  }
-  if (cpi->common.frame_type == INTER_FRAME && cpi->Source) {
+  if (cpi->common.frame_type == INTER_FRAME && cpi->Source &&
+      !cpi->global_motion_search_done) {
     YV12_BUFFER_CONFIG *ref_buf;
     int frame;
     double erroradvantage = 0;
@@ -4831,6 +4829,7 @@
         aom_clear_system_state();
       }
     }
+    cpi->global_motion_search_done = 1;
   }
 #endif  // CONFIG_GLOBAL_MOTION
 
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index c3d7252..a1ce822 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -330,10 +330,10 @@
                                  : tokens[i][1].dqc;
         if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
 #else
-        // The 32x32 transform coefficient uses half quantization step size.
-        // Account for the rounding difference in the dequantized coefficeint
-        // value when the quantization index is dropped from an even number
-        // to an odd number.
+// The 32x32 transform coefficient uses half quantization step size.
+// Account for the rounding difference in the dequantized coefficeint
+// value when the quantization index is dropped from an even number
+// to an odd number.
 
 #if CONFIG_AOM_QM
         tran_low_t offset = dqv >> shift;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 88c4b1f..eba9591 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2878,6 +2878,23 @@
   return scale;
 }
 
+#if CONFIG_GLOBAL_MOTION
+#define MIN_GLOBAL_MOTION_BLKS 4
+static int recode_loop_test_global_motion(AV1_COMP *cpi) {
+  int i;
+  int recode = 0;
+  AV1_COMMON *const cm = &cpi->common;
+  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+    if (cm->global_motion[i].wmtype != IDENTITY &&
+        cpi->global_motion_used[i] < MIN_GLOBAL_MOTION_BLKS) {
+      set_default_gmparams(&cm->global_motion[i]);
+      recode |= (cpi->global_motion_used[i] > 0);
+    }
+  }
+  return recode;
+}
+#endif  // CONFIG_GLOBAL_MOTION
+
 // Function to test for conditions that indicate we should loop
 // back and recode a frame.
 static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q,
@@ -3699,6 +3716,13 @@
 }
 
 static void set_size_independent_vars(AV1_COMP *cpi) {
+#if CONFIG_GLOBAL_MOTION
+  int i;
+  for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+    set_default_gmparams(&cpi->common.global_motion[i]);
+  }
+  cpi->global_motion_search_done = 0;
+#endif  // CONFIG_GLOBAL_MOTION
   av1_set_speed_features_framesize_independent(cpi);
   av1_set_rd_speed_thresholds(cpi);
   av1_set_rd_speed_thresholds_sub8x8(cpi);
@@ -4211,6 +4235,12 @@
         rc->projected_frame_size < rc->max_frame_bandwidth)
       loop = 0;
 
+#if CONFIG_GLOBAL_MOTION
+    if (recode_loop_test_global_motion(cpi)) {
+      loop = 1;
+    }
+#endif  // CONFIG_GLOBAL_MOTION
+
     if (loop) {
       ++loop_count;
       ++loop_at_this_size;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index bed3987..28a44c3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -625,6 +625,7 @@
 #endif  // CONFIG_EXT_REFS
 #if CONFIG_GLOBAL_MOTION
   int global_motion_used[TOTAL_REFS_PER_FRAME];
+  int global_motion_search_done;
 #endif
 #if CONFIG_REFERENCE_BUFFER
   SequenceHeader seq_params;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index efa5c69..a7c88ce 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4330,6 +4330,9 @@
 }
 
 #if CONFIG_GLOBAL_MOTION
+#define GLOBAL_MOTION_COST_AMORTIZATION_BLKS 8
+
+#if GLOBAL_MOTION_COST_AMORTIZATION_BLKS > 0
 static int get_gmbitcost(const WarpedMotionParams *gm, const aom_prob *probs) {
   int gmtype_cost[TRANS_TYPES];
   int bits;
@@ -4354,9 +4357,6 @@
   return bits ? (bits << AV1_PROB_COST_SHIFT) + gmtype_cost[type] : 0;
 }
 
-#define GLOBAL_MOTION_COST_AMORTIZATION_BLKS 8
-
-#if GLOBAL_MOTION_COST_AMORTIZATION_BLKS > 0
 #define GLOBAL_MOTION_RATE(ref)                                         \
   (cpi->global_motion_used[ref] >= GLOBAL_MOTION_COST_AMORTIZATION_BLKS \
        ? 0                                                              \