Improve global motion model pruning

Three interlinked changes:

* When we aren't going to refine a global motion model (speed 3 and 4),
  stop evaluation of the error early if it becomes clear this model
  won't pass the required quality threshold

* When we are going to refine (speed 0, 1, 2), skip refinement if the
  model fails an initial quality check.

  The threshold for this initial check is set a little higher than
  the main threshold, because refinement may reduce the error, and
  we only want to prune out models which are very unlikely to be used.

* Remove logic where refinement would apply different thresholds at
  different refinement steps. Instead, just always set the threshold
  equal to the best error seen so far.

 Speed | BD-avg-PSNR | BD-ovr-psnr |   BD-ssim   |  Enc time
-------+-------------+-------------+-------------+-------------
   1   |    0.000%   |    0.000%   |    0.000%   |   -0.336%
   2   |   +0.002%   |   +0.002%   |    0.000%   |   -0.502%
   3   |    0.000%   |    0.000%   |    0.000%   |   -0.021%
   4   |    0.000%   |    0.000%   |    0.000%   |   -0.017%

STATS_CHANGED for speed 0, 1, 2

Change-Id: I76a3197aaf02e092543fe1a2a1305d5ab214a440
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index bdbad4a..20bc9db 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c
@@ -389,24 +389,11 @@
                     best_error, segment_map, segment_map_stride);
 }
 
-// Factors used to calculate the thresholds for av1_warp_error
-static double thresh_factors[GM_MAX_REFINEMENT_STEPS] = { 1.25, 1.20, 1.15,
-                                                          1.10, 1.05 };
-
-static INLINE int64_t calc_approx_erroradv_threshold(
-    double scaling_factor, int64_t erroradv_threshold) {
-  return erroradv_threshold <
-                 (int64_t)(((double)INT64_MAX / scaling_factor) + 0.5)
-             ? (int64_t)(scaling_factor * erroradv_threshold + 0.5)
-             : INT64_MAX;
-}
-
 int64_t av1_refine_integerized_param(
     WarpedMotionParams *wm, TransformationType wmtype, int use_hbd, int bd,
     uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst,
     int d_width, int d_height, int d_stride, int n_refinements,
-    int64_t best_frame_error, uint8_t *segment_map, int segment_map_stride,
-    int64_t erroradv_threshold) {
+    int64_t ref_frame_error, uint8_t *segment_map, int segment_map_stride) {
   static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
   const int border = ERRORADV_BORDER;
   int i = 0, p;
@@ -419,22 +406,36 @@
   int32_t best_param;
 
   force_wmtype(wm, wmtype);
+  wm->wmtype = get_wmtype(wm);
+
+  if (n_refinements == 0) {
+    // Compute the maximum error value that will be accepted, so that
+    // av1_warp_error can terminate early if it proves the model will not
+    // be accepted.
+    int64_t selection_threshold = (int64_t)lrint(ref_frame_error * erroradv_tr);
+    return av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
+                          dst + border * d_stride + border, border, border,
+                          d_width - 2 * border, d_height - 2 * border, d_stride,
+                          0, 0, selection_threshold, segment_map,
+                          segment_map_stride);
+  }
+
+  // When refining, use a slightly higher threshold for the initial error
+  // calculation - see comment above erroradv_early_tr for why.
+  int64_t selection_threshold =
+      (int64_t)lrint(ref_frame_error * erroradv_early_tr);
   best_error =
       av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
                      dst + border * d_stride + border, border, border,
                      d_width - 2 * border, d_height - 2 * border, d_stride, 0,
-                     0, best_frame_error, segment_map, segment_map_stride);
+                     0, selection_threshold, segment_map, segment_map_stride);
 
-  if (n_refinements == 0) {
-    wm->wmtype = get_wmtype(wm);
-    return best_error;
+  if (best_error > selection_threshold) {
+    return INT64_MAX;
   }
 
-  best_error = AOMMIN(best_error, best_frame_error);
   step = 1 << (n_refinements - 1);
   for (i = 0; i < n_refinements; i++, step >>= 1) {
-    int64_t error_adv_thresh =
-        calc_approx_erroradv_threshold(thresh_factors[i], erroradv_threshold);
     for (p = 0; p < n_params; ++p) {
       int step_dir = 0;
       param = param_mat + p;
@@ -449,8 +450,7 @@
           av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
                          dst + border * d_stride + border, border, border,
                          d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, AOMMIN(best_error, error_adv_thresh),
-                         segment_map, segment_map_stride);
+                         0, 0, best_error, segment_map, segment_map_stride);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -464,8 +464,7 @@
           av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
                          dst + border * d_stride + border, border, border,
                          d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, AOMMIN(best_error, error_adv_thresh),
-                         segment_map, segment_map_stride);
+                         0, 0, best_error, segment_map, segment_map_stride);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -477,12 +476,11 @@
       while (step_dir) {
         *param = add_param_offset(p, best_param, step * step_dir);
         force_wmtype(wm, wmtype);
-        step_error =
-            av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                           dst + border * d_stride + border, border, border,
-                           d_width - 2 * border, d_height - 2 * border,
-                           d_stride, 0, 0, AOMMIN(best_error, error_adv_thresh),
-                           segment_map, segment_map_stride);
+        step_error = av1_warp_error(
+            wm, use_hbd, bd, ref, r_width, r_height, r_stride,
+            dst + border * d_stride + border, border, border,
+            d_width - 2 * border, d_height - 2 * border, d_stride, 0, 0,
+            best_error, segment_map, segment_map_stride);
         if (step_error < best_error) {
           best_error = step_error;
           best_param = *param;
diff --git a/av1/encoder/global_motion.h b/av1/encoder/global_motion.h
index 4d7755c..bf95261 100644
--- a/av1/encoder/global_motion.h
+++ b/av1/encoder/global_motion.h
@@ -72,11 +72,21 @@
 void av1_convert_model_to_params(const double *params,
                                  WarpedMotionParams *model);
 
-// TODO(sarahparker) These need to be retuned for speed 0 and 1 to
-// maximize gains from segmented error metric
+// Criteria for accepting a global motion model
 static const double erroradv_tr = 0.65;
 static const double erroradv_prod_tr = 20000;
 
+// Early exit threshold for global motion refinement
+// This is set slightly higher than erroradv_tr, as a compromise between
+// two factors:
+//
+// 1) By rejecting un-promising models early, we can reduce the encode time
+//    spent trying to refine them
+//
+// 2) When we refine a model, its error may decrease to below the acceptance
+//    threshold even if the model is initially above the threshold
+static const double erroradv_early_tr = 0.70;
+
 int av1_is_enough_erroradvantage(double best_erroradvantage, int params_cost);
 
 void av1_compute_feature_segmentation_map(uint8_t *segment_map, int width,
@@ -128,8 +138,7 @@
     WarpedMotionParams *wm, TransformationType wmtype, int use_hbd, int bd,
     uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst,
     int d_width, int d_height, int d_stride, int n_refinements,
-    int64_t best_frame_error, uint8_t *segment_map, int segment_map_stride,
-    int64_t erroradv_threshold);
+    int64_t ref_frame_error, uint8_t *segment_map, int segment_map_stride);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index 3fc8508..62eb117 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c
@@ -73,11 +73,6 @@
   return (params_cost << AV1_PROB_COST_SHIFT);
 }
 
-// Calculates the threshold to be used for warp error computation.
-static AOM_INLINE int64_t calc_erroradv_threshold(int64_t ref_frame_error) {
-  return (int64_t)(ref_frame_error * erroradv_tr + 0.5);
-}
-
 // For the given reference frame, computes the global motion parameters for
 // different motion models and finds the best.
 static AOM_INLINE void compute_global_motion_for_ref_frame(
@@ -138,16 +133,12 @@
 
       if (ref_frame_error == 0) continue;
 
-      const int64_t erroradv_threshold =
-          calc_erroradv_threshold(ref_frame_error);
-
       const int64_t warp_error = av1_refine_integerized_param(
           &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd), xd->bd,
           ref_buf[frame]->y_buffer, ref_buf[frame]->y_crop_width,
           ref_buf[frame]->y_crop_height, ref_buf[frame]->y_stride,
           cpi->source->y_buffer, src_width, src_height, src_stride,
-          num_refinements, best_warp_error, segment_map, segment_map_w,
-          erroradv_threshold);
+          num_refinements, ref_frame_error, segment_map, segment_map_w);
 
       // av1_refine_integerized_param() can return a simpler model type than
       // its input, so re-check model type here