Make global motion multithreaded

Handles AVxEncoderThreadTest failures.

Change-Id: I81ea730a4fcd4a95fab198ee98b346a450d24b88
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 0ddac92..2f345d1 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1039,7 +1039,8 @@
 #endif
 #if CONFIG_GLOBAL_MOTION
 static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
-                                      const MB_MODE_INFO *mbmi, AV1_COMP *cpi) {
+                                      const MB_MODE_INFO *mbmi,
+                                      RD_COUNTS *rdc) {
   if (mode == ZEROMV
 #if CONFIG_EXT_INTER
       || mode == ZERO_ZEROMV
@@ -1049,7 +1050,7 @@
         num_4x4_blocks_wide_lookup[bsize] * num_4x4_blocks_high_lookup[bsize];
     int ref;
     for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
-      cpi->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
+      rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
     }
   }
 }
@@ -1265,7 +1266,7 @@
       if (bsize >= BLOCK_8X8) {
         // TODO(sarahparker): global motion stats need to be handled per-tile
         // to be compatible with tile-based threading.
-        update_global_motion_used(mbmi->mode, bsize, mbmi, (AV1_COMP *)cpi);
+        update_global_motion_used(mbmi->mode, bsize, mbmi, rdc);
       } else {
         const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
         const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -1273,8 +1274,7 @@
         for (idy = 0; idy < 2; idy += num_4x4_h) {
           for (idx = 0; idx < 2; idx += num_4x4_w) {
             const int j = idy * 2 + idx;
-            update_global_motion_used(mi->bmi[j].as_mode, bsize, mbmi,
-                                      (AV1_COMP *)cpi);
+            update_global_motion_used(mi->bmi[j].as_mode, bsize, mbmi, rdc);
           }
         }
       }
@@ -1431,7 +1431,7 @@
       if (bsize >= BLOCK_8X8) {
         // TODO(sarahparker): global motion stats need to be handled per-tile
         // to be compatible with tile-based threading.
-        update_global_motion_used(mbmi->mode, bsize, mbmi, (AV1_COMP *)cpi);
+        update_global_motion_used(mbmi->mode, bsize, mbmi, rdc);
       } else {
         const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
         const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -1439,8 +1439,7 @@
         for (idy = 0; idy < 2; idy += num_4x4_h) {
           for (idx = 0; idx < 2; idx += num_4x4_w) {
             const int j = idy * 2 + idx;
-            update_global_motion_used(mi->bmi[j].as_mode, bsize, mbmi,
-                                      (AV1_COMP *)cpi);
+            update_global_motion_used(mi->bmi[j].as_mode, bsize, mbmi, rdc);
           }
         }
       }
@@ -5208,7 +5207,7 @@
   av1_zero(rdc->comp_pred_diff);
 
 #if CONFIG_GLOBAL_MOTION
-  av1_zero(cpi->global_motion_used);
+  av1_zero(rdc->global_motion_used);
   if (cpi->common.frame_type == INTER_FRAME && cpi->source &&
       !cpi->global_motion_search_done) {
     YV12_BUFFER_CONFIG *ref_buf;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b61eda5..0271091 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3062,17 +3062,18 @@
 static int recode_loop_test_global_motion(AV1_COMP *cpi) {
   int i;
   int recode = 0;
+  RD_COUNTS *const rdc = &cpi->td.rd_counts;
   AV1_COMMON *const cm = &cpi->common;
   for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
     if (cm->global_motion[i].wmtype != IDENTITY &&
-        cpi->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
+        rdc->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
             cpi->gmparams_cost[i]) {
       set_default_warp_params(&cm->global_motion[i]);
       cpi->gmparams_cost[i] = 0;
 #if CONFIG_REF_MV
       recode = 1;
 #else
-      recode |= (cpi->global_motion_used[i] > 0);
+      recode |= (rdc->global_motion_used[i] > 0);
 #endif
     }
   }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index fe37672..4e7aef8 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -309,6 +309,10 @@
 typedef struct RD_COUNTS {
   av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
   int64_t comp_pred_diff[REFERENCE_MODES];
+#if CONFIG_GLOBAL_MOTION
+  // Stores number of 4x4 blocks using global motion per reference frame.
+  int global_motion_used[TOTAL_REFS_PER_FRAME];
+#endif  // CONFIG_GLOBAL_MOTION
 } RD_COUNTS;
 
 typedef struct ThreadData {
@@ -665,8 +669,6 @@
   int arf_map[MAX_EXT_ARFS + 1];
 #endif  // CONFIG_EXT_REFS
 #if CONFIG_GLOBAL_MOTION
-  // Stores number of 4x4 blocks using global motion per reference frame.
-  int global_motion_used[TOTAL_REFS_PER_FRAME];
   int global_motion_search_done;
 #endif
 #if CONFIG_REFERENCE_BUFFER
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 1a60c84..34f0b95 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -20,6 +20,12 @@
   for (i = 0; i < REFERENCE_MODES; i++)
     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
 
+#if CONFIG_GLOBAL_MOTION
+  for (i = 0; i < TOTAL_REFS_PER_FRAME; i++)
+    td->rd_counts.global_motion_used[i] +=
+        td_t->rd_counts.global_motion_used[i];
+#endif  // CONFIG_GLOBAL_MOTION
+
   for (i = 0; i < TX_SIZES; i++)
     for (j = 0; j < PLANE_TYPES; j++)
       for (k = 0; k < REF_TYPES; k++)