Speed-up GOP length decision for speed=5

GOP length is decided based on GF boost
and approximate tpl model for speed 5.

cpu-used  Instruction Count     BD-Rate Loss(%)
           Reduction(%)     avg.psnr  ovr.psnr  ssim
   5          8.364         0.1476    0.1294	0.0998

STATS_CHANGED

Change-Id: I5a05be4fc2154e28238b470747d0b5b17d8f966b
diff --git a/av1/encoder/gop_structure.c b/av1/encoder/gop_structure.c
index 757232d..f768a57 100644
--- a/av1/encoder/gop_structure.c
+++ b/av1/encoder/gop_structure.c
@@ -45,7 +45,7 @@
       gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
       gf_group->layer_depth[*frame_ind] = MAX_ARF_LAYERS;
       gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
-          twopass, rc, frame_info, start, end - start, 0, NULL, NULL);
+          twopass, rc, frame_info, start, end - start, 0, NULL, NULL, 0);
       gf_group->frame_type[*frame_ind] = INTER_FRAME;
       gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
       gf_group->max_layer_depth =
@@ -67,7 +67,7 @@
 
     // Get the boost factor for intermediate ARF frames.
     gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
-        twopass, rc, frame_info, m, end - m, m - start, NULL, NULL);
+        twopass, rc, frame_info, m, end - m, m - start, NULL, NULL, 0);
     ++(*frame_ind);
 
     // Frames displayed before this internal ARF.
diff --git a/av1/encoder/gop_structure.h b/av1/encoder/gop_structure.h
index 6cfca22..56978ab 100644
--- a/av1/encoder/gop_structure.h
+++ b/av1/encoder/gop_structure.h
@@ -69,7 +69,7 @@
 int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
                        FRAME_INFO *frame_info, int offset, int f_frames,
                        int b_frames, int *num_fpstats_used,
-                       int *num_fpstats_required);
+                       int *num_fpstats_required, int project_gfu_boost);
 /*!\endcond */
 
 #ifdef __cplusplus
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 2beb048..209c744 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -642,11 +642,12 @@
 }
 
 #define GF_MAX_BOOST 90.0
+#define GF_MIN_BOOST 50
 #define MIN_DECAY_FACTOR 0.01
 int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
                        FRAME_INFO *frame_info, int offset, int f_frames,
                        int b_frames, int *num_fpstats_used,
-                       int *num_fpstats_required) {
+                       int *num_fpstats_required, int project_gfu_boost) {
   int i;
   GF_GROUP_STATS gf_stats;
   init_gf_stats(&gf_stats);
@@ -719,16 +720,16 @@
   }
   arf_boost += (int)boost_score;
 
-  if (num_fpstats_required) {
+  if (project_gfu_boost) {
+    assert(num_fpstats_required != NULL);
+    assert(num_fpstats_used != NULL);
     *num_fpstats_required = f_frames + b_frames;
-    if (num_fpstats_used) {
-      arf_boost = get_projected_gfu_boost(rc, arf_boost, *num_fpstats_required,
-                                          *num_fpstats_used);
-    }
+    arf_boost = get_projected_gfu_boost(rc, arf_boost, *num_fpstats_required,
+                                        *num_fpstats_used);
   }
 
-  if (arf_boost < ((b_frames + f_frames) * 50))
-    arf_boost = ((b_frames + f_frames) * 50);
+  if (arf_boost < ((b_frames + f_frames) * GF_MIN_BOOST))
+    arf_boost = ((b_frames + f_frames) * GF_MIN_BOOST);
 
   return arf_boost;
 }
@@ -2539,21 +2540,20 @@
     // Calculate the boost for alt ref.
     rc->gfu_boost = av1_calc_arf_boost(
         twopass, rc, frame_info, alt_offset, forward_frames, ext_len,
-        cpi->ppi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
-        cpi->ppi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL);
+        &rc->num_stats_used_for_gfu_boost,
+        &rc->num_stats_required_for_gfu_boost, cpi->ppi->lap_enabled);
   } else {
     reset_fpf_position(twopass, start_pos);
     gf_group->max_layer_depth_allowed = 0;
     set_baseline_gf_interval(cpi, i, active_max_gf_interval, use_alt_ref,
                              is_final_pass);
 
-    rc->gfu_boost = AOMMIN(
-        MAX_GF_BOOST,
-        av1_calc_arf_boost(
-            twopass, rc, frame_info, alt_offset, ext_len, 0,
-            cpi->ppi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
-            cpi->ppi->lap_enabled ? &rc->num_stats_required_for_gfu_boost
-                                  : NULL));
+    rc->gfu_boost =
+        AOMMIN(MAX_GF_BOOST,
+               av1_calc_arf_boost(twopass, rc, frame_info, alt_offset, ext_len,
+                                  0, &rc->num_stats_used_for_gfu_boost,
+                                  &rc->num_stats_required_for_gfu_boost,
+                                  cpi->ppi->lap_enabled));
   }
 
 #define LAST_ALR_BOOST_FACTOR 0.2f
@@ -3674,7 +3674,7 @@
     }
 
     if (max_gop_length > 16 && oxcf->algo_cfg.enable_tpl_model &&
-        !cpi->sf.tpl_sf.disable_gop_length_decision) {
+        !(cpi->sf.tpl_sf.gop_length_decision_method == 2)) {
       int this_idx = rc->frames_since_key + rc->gf_intervals[rc->cur_gf_index] -
                      rc->regions_offset - 1;
       int this_region =
@@ -3692,20 +3692,37 @@
         // max_gop_length = 32 with look-ahead gf intervals.
         define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
         this_frame = this_frame_copy;
-        int is_temporal_filter_enabled =
-            (rc->frames_since_key > 0 && gf_group->arf_index > -1);
-        if (is_temporal_filter_enabled) {
-          int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
-          FRAME_UPDATE_TYPE arf_update_type =
-              gf_group->update_type[gf_group->arf_index];
-          int is_forward_keyframe = 0;
-          av1_temporal_filter(cpi, arf_src_index, arf_update_type,
-                              is_forward_keyframe, NULL);
-          aom_extend_frame_borders(&cpi->alt_ref_buffer,
-                                   av1_num_planes(&cpi->common));
+
+        int is_temporal_filter_enabled = 0;
+        int shorten_gf_interval = 0;
+        if (!cpi->sf.tpl_sf.gop_length_decision_method) {
+          is_temporal_filter_enabled =
+              (rc->frames_since_key > 0 && gf_group->arf_index > -1);
+          if (is_temporal_filter_enabled) {
+            int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
+            FRAME_UPDATE_TYPE arf_update_type =
+                gf_group->update_type[gf_group->arf_index];
+            int is_forward_keyframe = 0;
+            av1_temporal_filter(cpi, arf_src_index, arf_update_type,
+                                is_forward_keyframe, NULL);
+            aom_extend_frame_borders(&cpi->alt_ref_buffer,
+                                     av1_num_planes(&cpi->common));
+          }
+          shorten_gf_interval =
+              !av1_tpl_setup_stats(cpi, 1, frame_params, frame_input);
+          // Tpl stats is reused when the ARF is temporally filtered and gf
+          // interval is not shortened.
+          if (is_temporal_filter_enabled && !shorten_gf_interval)
+            cpi->tpl_data.skip_tpl_setup_stats = 1;
+        } else {
+          // GOP length is decided based on GF boost and approximate tpl model
+          shorten_gf_interval =
+              (rc->gfu_boost <
+               rc->num_stats_used_for_gfu_boost * GF_MIN_BOOST * 1.4) &&
+              !av1_tpl_setup_stats(cpi, 2, frame_params, frame_input);
         }
-        if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input)) {
-          // Tpl decides that a shorter gf interval is better.
+        if (shorten_gf_interval) {
+          // A shorter gf interval is better.
           // TODO(jingning): Remove redundant computations here.
           max_gop_length = 16;
           calculate_gf_length(cpi, max_gop_length, 1);
@@ -3713,10 +3730,6 @@
               (ori_gf_int - rc->gf_intervals[rc->cur_gf_index] < 4)) {
             rc->gf_intervals[rc->cur_gf_index] = ori_gf_int;
           }
-        } else {
-          // Tpl stats is reused only when the ARF frame is temporally filtered
-          if (is_temporal_filter_enabled)
-            cpi->tpl_data.skip_tpl_setup_stats = 1;
         }
       }
     }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 099b7c3..25d8f53 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -991,6 +991,7 @@
     sf->tpl_sf.prune_starting_mv = 3;
     sf->tpl_sf.use_y_only_rate_distortion = 1;
     sf->tpl_sf.subpel_force_stop = FULL_PEL;
+    sf->tpl_sf.gop_length_decision_method = 1;
 
     sf->winner_mode_sf.dc_blk_pred_level = 1;
   }
@@ -1021,7 +1022,7 @@
     sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
     sf->mv_sf.use_bsize_dependent_search_method = 1;
 
-    sf->tpl_sf.disable_gop_length_decision = 1;
+    sf->tpl_sf.gop_length_decision_method = 2;
     sf->tpl_sf.disable_filtered_key_tpl = 1;
 
     sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
@@ -1426,7 +1427,7 @@
 }
 
 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
-  tpl_sf->disable_gop_length_decision = 0;
+  tpl_sf->gop_length_decision_method = 0;
   tpl_sf->prune_intra_modes = 0;
   tpl_sf->prune_starting_mv = 0;
   tpl_sf->reduce_first_step_size = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index c4f8e81..b85bcb2 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -375,8 +375,12 @@
 
 /*!\cond */
 typedef struct TPL_SPEED_FEATURES {
-  // Enable/disable GOP length adaptive decision.
-  int disable_gop_length_decision;
+  // GOP length adaptive decision.
+  // If set to 0, tpl model decides whether a shorter gf interval is better.
+  // If set to 1, approximate tpl model and GF boost decide whether a
+  // shorter gf interval is better. If set to 2, gop length adaptive decision is
+  // disabled.
+  int gop_length_decision_method;
   // Prune the intra modes search by tpl.
   // If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED.
   // If set to 1, we only search DC_PRED, V_PRED, and H_PRED.
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 91af86b..f1f9cc0 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1452,6 +1452,7 @@
   int bottom_index, top_index;
   EncodeFrameParams this_frame_params = *frame_params;
   TplParams *const tpl_data = &cpi->tpl_data;
+  int approx_gop_eval = (gop_eval == 2);
 
   if (cpi->superres_mode != AOM_SUPERRES_NONE) {
     assert(cpi->superres_mode != AOM_SUPERRES_AUTO);
@@ -1499,8 +1500,13 @@
   av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
                     cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs);
 
+  // When approx_gop_eval = 1 tpl stats calculation is done for base layer
+  // and the next layer ARF.
+  int frame_idx_end =
+      approx_gop_eval ? AOMMIN(tpl_gf_group_frames - 1, gf_group->arf_index + 1)
+                      : tpl_gf_group_frames - 1;
   // Backward propagation from tpl_group_frames to 1.
-  for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames;
+  for (int frame_idx = cpi->gf_frame_index; frame_idx <= frame_idx_end;
        ++frame_idx) {
     if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
         gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
@@ -1520,8 +1526,8 @@
                              av1_num_planes(cm));
   }
 
-  for (int frame_idx = tpl_gf_group_frames - 1;
-       frame_idx >= cpi->gf_frame_index; --frame_idx) {
+  for (int frame_idx = frame_idx_end; frame_idx >= cpi->gf_frame_index;
+       --frame_idx) {
     if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
         gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
       continue;
@@ -1588,6 +1594,7 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, av1_tpl_setup_stats_time);
 #endif
+  if (approx_gop_eval) return beta[0] > 1.1;
 
   // Allow larger GOP size if the base layer ARF has higher dependency factor
   // than the intermediate ARF and both ARFs have reasonably high dependency