Re-design GOP operation process

Structure the GOP decision stage ahead of frame processing. Avoid
changing GOP data in the frame coding stage. Re-align the frame data
pointers.

STATS_CHANGED

Change-Id: I6af7598cccf5293948df6ab86c42583957cf12bd
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 2cbbd78..6d1bb18 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -2247,6 +2247,11 @@
   GF_GROUP gf_group;
 
   /*!
+   * Track prior gf group state.
+   */
+  GF_STATE gf_state;
+
+  /*!
    * To control the reference frame buffer and selection.
    */
   RefBufferStack ref_buffer_stack;
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index f5ea986..069bdc9 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -196,6 +196,11 @@
 /*!\cond */
 
 typedef struct {
+  // Track if the last frame in a GOP has higher quality.
+  int arf_gf_boost_lst;
+} GF_STATE;
+
+typedef struct {
   FIRSTPASS_STATS *stats_in_start;
   FIRSTPASS_STATS *stats_in_end;
   FIRSTPASS_STATS *stats_in_buf_end;
diff --git a/av1/encoder/gop_structure.c b/av1/encoder/gop_structure.c
index 362261a..e4ae361 100644
--- a/av1/encoder/gop_structure.c
+++ b/av1/encoder/gop_structure.c
@@ -31,16 +31,14 @@
                                    FRAME_INFO *frame_info, int start, int end,
                                    int *cur_frame_idx, int *frame_ind,
                                    int layer_depth) {
-  const int num_frames_to_process = end - start - 1;
-  assert(num_frames_to_process >= 0);
-  if (num_frames_to_process == 0) return;
+  const int num_frames_to_process = end - start;
 
   // Either we are at the last level of the pyramid, or we don't have enough
   // frames between 'l' and 'r' to create one more level.
   if (layer_depth > gf_group->max_layer_depth_allowed ||
       num_frames_to_process < 3) {
     // Leaf nodes.
-    while (++start < end) {
+    while (start < end) {
       gf_group->update_type[*frame_ind] = LF_UPDATE;
       gf_group->arf_src_offset[*frame_ind] = 0;
       gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
@@ -53,13 +51,14 @@
           AOMMAX(gf_group->max_layer_depth, layer_depth);
       ++(*frame_ind);
       ++(*cur_frame_idx);
+      ++start;
     }
   } else {
-    const int m = (start + end) / 2;
+    const int m = (start + end - 1) / 2;
 
     // Internal ARF.
     gf_group->update_type[*frame_ind] = INTNL_ARF_UPDATE;
-    gf_group->arf_src_offset[*frame_ind] = m - start - 1;
+    gf_group->arf_src_offset[*frame_ind] = m - start;
     gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
     gf_group->layer_depth[*frame_ind] = layer_depth;
     gf_group->frame_type[*frame_ind] = INTER_FRAME;
@@ -86,7 +85,7 @@
     ++(*cur_frame_idx);
 
     // Frames displayed after this internal ARF.
-    set_multi_layer_params(twopass, gf_group, rc, frame_info, m, end,
+    set_multi_layer_params(twopass, gf_group, rc, frame_info, m + 1, end,
                            cur_frame_idx, frame_ind, layer_depth + 1);
   }
 }
@@ -99,7 +98,6 @@
   int cur_frame_index = 0;
 
   // Keyframe / Overlay frame / Golden frame.
-  assert(gf_interval >= 1);
   assert(first_frame_update_type == KF_UPDATE ||
          first_frame_update_type == OVERLAY_UPDATE ||
          first_frame_update_type == GF_UPDATE);
@@ -124,7 +122,7 @@
     gf_group->max_layer_depth = 0;
     ++frame_index;
     cur_frame_index++;
-  } else {
+  } else if (first_frame_update_type != OVERLAY_UPDATE) {
     gf_group->update_type[frame_index] = first_frame_update_type;
     gf_group->arf_src_offset[frame_index] = 0;
     gf_group->cur_frame_idx[frame_index] = cur_frame_index;
@@ -144,7 +142,7 @@
   int is_fwd_kf = (gf_interval == cpi->rc.frames_to_key);
   if (use_altref) {
     gf_group->update_type[frame_index] = ARF_UPDATE;
-    gf_group->arf_src_offset[frame_index] = gf_interval - 1;
+    gf_group->arf_src_offset[frame_index] = gf_interval - cur_frame_index;
     gf_group->cur_frame_idx[frame_index] = cur_frame_index;
     gf_group->layer_depth[frame_index] = 1;
     gf_group->arf_boost[frame_index] = cpi->rc.gfu_boost;
@@ -158,8 +156,9 @@
   }
 
   // Rest of the frames.
-  set_multi_layer_params(twopass, gf_group, rc, frame_info, 0, gf_interval,
-                         &cur_frame_index, &frame_index, use_altref + 1);
+  set_multi_layer_params(twopass, gf_group, rc, frame_info, cur_frame_index,
+                         gf_interval, &cur_frame_index, &frame_index,
+                         use_altref + 1);
 
   if (use_altref) {
     gf_group->update_type[frame_index] = OVERLAY_UPDATE;
@@ -170,21 +169,33 @@
     gf_group->frame_type[frame_index] = INTER_FRAME;
     gf_group->refbuf_state[frame_index] =
         is_fwd_kf ? REFBUF_RESET : REFBUF_UPDATE;
+    ++frame_index;
+  } else {
+    for (; cur_frame_index <= gf_interval; ++cur_frame_index) {
+      gf_group->update_type[frame_index] = LF_UPDATE;
+      gf_group->arf_src_offset[frame_index] = 0;
+      gf_group->cur_frame_idx[frame_index] = cur_frame_index;
+      gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS;
+      gf_group->arf_boost[frame_index] = NORMAL_BOOST;
+      gf_group->frame_type[frame_index] = INTER_FRAME;
+      gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
+      gf_group->max_layer_depth = AOMMAX(gf_group->max_layer_depth, 2);
+      ++frame_index;
+    }
   }
   return frame_index;
 }
 
-void av1_gop_setup_structure(AV1_COMP *cpi,
-                             const EncodeFrameParams *const frame_params) {
+void av1_gop_setup_structure(AV1_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   GF_GROUP *const gf_group = &cpi->gf_group;
   TWO_PASS *const twopass = &cpi->twopass;
   FRAME_INFO *const frame_info = &cpi->frame_info;
-  const int key_frame = (frame_params->frame_type == KEY_FRAME);
+  const int key_frame = rc->frames_since_key == 0;
+  const int use_altref = gf_group->max_layer_depth_allowed > 0;
   const FRAME_UPDATE_TYPE first_frame_update_type =
-      key_frame ? KF_UPDATE
-                : rc->source_alt_ref_active ? OVERLAY_UPDATE : GF_UPDATE;
+      key_frame ? KF_UPDATE : use_altref ? OVERLAY_UPDATE : GF_UPDATE;
   gf_group->size = construct_multi_layer_gf_structure(
-      cpi, twopass, gf_group, rc, frame_info, rc->baseline_gf_interval,
+      cpi, twopass, gf_group, rc, frame_info, rc->baseline_gf_interval - 1,
       first_frame_update_type);
 }
diff --git a/av1/encoder/gop_structure.h b/av1/encoder/gop_structure.h
index a918a52..6cfca22 100644
--- a/av1/encoder/gop_structure.h
+++ b/av1/encoder/gop_structure.h
@@ -37,12 +37,10 @@
  * cpi->twopass.gf_group.update_type[].
  *
  * \param[in]    cpi          Top - level encoder instance structure
- * \param[in]    frame_params Per-frame encoding parameters
  *
  * \return No return value but this function updates group data structures.
  */
-void av1_gop_setup_structure(
-    struct AV1_COMP *cpi, const struct EncodeFrameParams *const frame_params);
+void av1_gop_setup_structure(struct AV1_COMP *cpi);
 
 /*!\brief Distributes bits to frames in a group
  *
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 19b3195..de5a735 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -858,26 +858,27 @@
   const int gf_group_size = gf_group->size;
   int layer_frames[MAX_ARF_LAYERS + 1] = { 0 };
 
-  // Subtract the extra bits set aside for ARF frames from the Group Total
-  if (use_arf || !key_frame) total_group_bits -= gf_arf_bits;
-
-  if (rc->baseline_gf_interval)
-    base_frame_bits = (int)(total_group_bits / rc->baseline_gf_interval);
-  else
-    base_frame_bits = (int)1;
-
   // For key frames the frame target rate is already set and it
   // is also the golden frame.
   // === [frame_index == 0] ===
-  int frame_index = 0;
-  if (!key_frame) {
-    if (rc->source_alt_ref_active)
-      gf_group->bit_allocation[frame_index] = 0;
-    else
-      gf_group->bit_allocation[frame_index] =
-          base_frame_bits + (int)(gf_arf_bits * layer_fraction[1]);
+  int frame_index = !!key_frame;
+
+  // Subtract the extra bits set aside for ARF frames from the Group Total
+  if (use_arf) total_group_bits -= gf_arf_bits;
+
+  int num_frames =
+      AOMMAX(1, rc->baseline_gf_interval - (rc->frames_since_key == 0));
+  base_frame_bits = (int)(total_group_bits / num_frames);
+
+  if (use_arf) {
+    for (; frame_index < gf_group->size; ++frame_index) {
+      if (gf_group->update_type[frame_index] == ARF_UPDATE) {
+        gf_group->bit_allocation[frame_index] = gf_arf_bits;
+        ++frame_index;
+        break;
+      }
+    }
   }
-  frame_index++;
 
   // Check the number of frames in each layer in case we have a
   // non standard group length.
@@ -1253,24 +1254,25 @@
     return;
   }
 
+  if (rc->frames_since_key > 0)
+    max_gop_length += !cpi->gf_state.arf_gf_boost_lst;
+
   // TODO(urvang): Try logic to vary min and max interval based on q.
   const int active_min_gf_interval = rc->min_gf_interval;
   const int active_max_gf_interval =
       AOMMIN(rc->max_gf_interval, max_gop_length);
   const int min_shrink_int = AOMMAX(MIN_SHRINK_LEN, active_min_gf_interval);
 
-  i = 0;
+  i = (rc->frames_since_key == 0);
   max_intervals = cpi->lap_enabled ? 1 : max_intervals;
-  int cut_pos[MAX_NUM_GF_INTERVALS + 1] = { 0 };
   int count_cuts = 1;
-  int cur_start = 0, cur_last;
+  int cur_start = -1 + (rc->frames_since_key == 0), cur_last;
+  int cut_pos[MAX_NUM_GF_INTERVALS + 1] = { -1 };
   int cut_here;
   int prev_lows = 0;
   GF_GROUP_STATS gf_stats;
   init_gf_stats(&gf_stats);
   while (count_cuts < max_intervals + 1) {
-    ++i;
-
     // reaches next key frame, break here
     if (i >= rc->frames_to_key) {
       cut_pos[count_cuts] = i - 1;
@@ -1310,6 +1312,7 @@
           cur_last = rc->frames_to_key - min_int - 1;
         }
       }
+
       // only try shrinking if interval smaller than active_max_gf_interval
       if (cur_last - cur_start <= active_max_gf_interval) {
         // determine in the current decided gop the higher and lower errs
@@ -1321,20 +1324,19 @@
         double errs[MAX_GF_INTERVAL + 1 + MAX_PAD_GF_CHECK * 2] = { 0 };
         double si[MAX_GF_INTERVAL + 1 + MAX_PAD_GF_CHECK * 2] = { 0 };
         int before_pad =
-            AOMMIN(MAX_PAD_GF_CHECK, rc->frames_since_key - 1 + cur_start);
+            AOMMIN(MAX_PAD_GF_CHECK, rc->frames_since_key + cur_start - 1);
         int after_pad =
             AOMMIN(MAX_PAD_GF_CHECK, rc->frames_to_key - cur_last - 1);
         for (n = cur_start - before_pad; n <= cur_last + after_pad; n++) {
-          if (start_pos + n - 1 > twopass->stats_buf_ctx->stats_in_end) {
+          if (start_pos + n > twopass->stats_buf_ctx->stats_in_end) {
             after_pad = n - cur_last - 1;
             assert(after_pad >= 0);
             break;
-          } else if (start_pos + n - 1 <
-                     twopass->stats_buf_ctx->stats_in_start) {
+          } else if (start_pos + n < twopass->stats_buf_ctx->stats_in_start) {
             before_pad = cur_start - n - 1;
             continue;
           }
-          errs[n + before_pad - cur_start] = (start_pos + n - 1)->coded_error;
+          errs[n + before_pad - cur_start] = (start_pos + n)->coded_error;
         }
         const int len = before_pad + after_pad + cur_last - cur_start + 1;
         const int reset = determine_high_err_gf(
@@ -1371,12 +1373,13 @@
       // reset accumulators
       init_gf_stats(&gf_stats);
     }
+    ++i;
   }
 
   // save intervals
   rc->intervals_till_gf_calculate_due = count_cuts - 1;
   for (int n = 1; n < count_cuts; n++) {
-    rc->gf_intervals[n - 1] = cut_pos[n] + 1 - cut_pos[n - 1];
+    rc->gf_intervals[n - 1] = cut_pos[n] - cut_pos[n - 1];
   }
   rc->cur_gf_index = 0;
   twopass->stats_in = start_pos;
@@ -1417,12 +1420,10 @@
  * case of one pass encoding where no lookahead stats are avialable.
  *
  * \param[in]    cpi             Top-level encoder structure
- * \param[in]    frame_params    Structure with frame parameters
  *
  * \return Nothing is returned. Instead, cpi->gf_group is changed.
  */
-static void define_gf_group_pass0(AV1_COMP *cpi,
-                                  const EncodeFrameParams *const frame_params) {
+static void define_gf_group_pass0(AV1_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   GF_GROUP *const gf_group = &cpi->gf_group;
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1457,7 +1458,7 @@
     gf_group->max_layer_depth_allowed = 0;
 
   // Set up the structure of this Group-Of-Pictures (same as GF_GROUP)
-  av1_gop_setup_structure(cpi, frame_params);
+  av1_gop_setup_structure(cpi);
 
   // Allocate bits to each of the frames in the GF group.
   // TODO(sarahparker) Extend this to work with pyramid structure.
@@ -1515,7 +1516,7 @@
       rc->baseline_gf_interval = arf_position - rc->source_alt_ref_pending;
     }
   } else {
-    rc->baseline_gf_interval = arf_position - rc->source_alt_ref_pending;
+    rc->baseline_gf_interval = arf_position;
   }
 }
 
@@ -1564,8 +1565,8 @@
  * \return Nothing is returned. Instead, cpi->gf_group is changed.
  */
 static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
-                            const EncodeFrameParams *const frame_params,
-                            int max_gop_length, int is_final_pass) {
+                            EncodeFrameParams *frame_params, int max_gop_length,
+                            int is_final_pass) {
   AV1_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1577,12 +1578,9 @@
   const GFConfig *const gf_cfg = &oxcf->gf_cfg;
   const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
   int i;
-
   int flash_detected;
   int64_t gf_group_bits;
-  const int is_intra_only =
-      rc->frames_since_key == 0 || frame_params->frame_type == INTRA_ONLY_FRAME;
-  const int arf_active_or_kf = is_intra_only || rc->source_alt_ref_active;
+  const int is_intra_only = rc->frames_since_key == 0;
 
   cpi->internal_altref_allowed = (gf_cfg->gf_max_pyr_height > 1);
 
@@ -1596,7 +1594,7 @@
   av1_zero(next_frame);
 
   if (has_no_stats_stage(cpi)) {
-    define_gf_group_pass0(cpi, frame_params);
+    define_gf_group_pass0(cpi);
     return;
   }
 
@@ -1624,30 +1622,38 @@
 
   // If this is a key frame or the overlay from a previous arf then
   // the error score / cost of this frame has already been accounted for.
-  if (arf_active_or_kf) {
-    gf_stats.gf_group_err -= first_frame_stats.frame_err;
-#if GROUP_ADAPTIVE_MAXQ
-    gf_stats.gf_group_raw_error -= this_frame->coded_error;
-#endif
-    gf_stats.gf_group_skip_pct -= this_frame->intra_skip_pct;
-    gf_stats.gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
-  }
 
   // TODO(urvang): Try logic to vary min and max interval based on q.
   const int active_min_gf_interval = rc->min_gf_interval;
   const int active_max_gf_interval =
       AOMMIN(rc->max_gf_interval, max_gop_length);
 
-  i = 0;
+  i = is_intra_only;
   // get the determined gf group length from rc->gf_intervals
   while (i < rc->gf_intervals[rc->cur_gf_index]) {
-    ++i;
+    // read in the next frame
+    if (EOF == input_stats(twopass, &next_frame)) break;
     // Accumulate error score of frames in this gf group.
     mod_frame_err =
-        calculate_modified_err(frame_info, twopass, oxcf, this_frame);
+        calculate_modified_err(frame_info, twopass, oxcf, &next_frame);
     // accumulate stats for this frame
-    accumulate_this_frame_stats(this_frame, mod_frame_err, &gf_stats);
+    accumulate_this_frame_stats(&next_frame, mod_frame_err, &gf_stats);
 
+    if (i == 0) {
+      first_frame_stats.frame_err = mod_frame_err;
+      first_frame_stats.frame_coded_error = next_frame.coded_error;
+      first_frame_stats.frame_sr_coded_error = next_frame.sr_coded_error;
+      first_frame_stats.frame_tr_coded_error = next_frame.tr_coded_error;
+    }
+
+    ++i;
+  }
+
+  reset_fpf_position(twopass, start_pos);
+
+  i = is_intra_only;
+  input_stats(twopass, &next_frame);
+  while (i < rc->gf_intervals[rc->cur_gf_index]) {
     // read in the next frame
     if (EOF == input_stats(twopass, &next_frame)) break;
 
@@ -1659,8 +1665,11 @@
     accumulate_next_frame_stats(&next_frame, frame_info, flash_detected,
                                 rc->frames_since_key, i, &gf_stats);
 
-    *this_frame = next_frame;
+    ++i;
   }
+
+  i = rc->gf_intervals[rc->cur_gf_index];
+
   // save the errs for the last frame
   last_frame_stats.frame_coded_error = next_frame.coded_error;
   last_frame_stats.frame_sr_coded_error = next_frame.sr_coded_error;
@@ -1758,19 +1767,20 @@
   }
 
   // Should we use the alternate reference frame.
+  int ext_len = i - is_intra_only;
   if (use_alt_ref) {
-    rc->source_alt_ref_pending = 1;
+    rc->source_alt_ref_pending = 0;
     gf_group->max_layer_depth_allowed = gf_cfg->gf_max_pyr_height;
     set_baseline_gf_interval(cpi, i, active_max_gf_interval, use_alt_ref,
                              is_final_pass);
 
-    const int forward_frames = (rc->frames_to_key - i >= i - 1)
-                                   ? i - 1
+    const int forward_frames = (rc->frames_to_key - i >= ext_len)
+                                   ? ext_len
                                    : AOMMAX(0, rc->frames_to_key - i);
 
     // Calculate the boost for alt ref.
     rc->gfu_boost = av1_calc_arf_boost(
-        twopass, rc, frame_info, alt_offset, forward_frames, (i - 1),
+        twopass, rc, frame_info, alt_offset, forward_frames, ext_len,
         cpi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
         cpi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL);
   } else {
@@ -1783,7 +1793,7 @@
     rc->gfu_boost = AOMMIN(
         MAX_GF_BOOST,
         av1_calc_arf_boost(
-            twopass, rc, frame_info, alt_offset, (i - 1), 0,
+            twopass, rc, frame_info, alt_offset, ext_len, 0,
             cpi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
             cpi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL));
   }
@@ -1802,8 +1812,8 @@
   rc->arf_boost_factor = 1.0;
   if (rc->source_alt_ref_pending && !is_lossless_requested(rc_cfg)) {
     // Reduce the boost of altref in the last gf group
-    if (rc->frames_to_key - i == REDUCE_GF_LENGTH_BY ||
-        rc->frames_to_key - i == 0) {
+    if (rc->frames_to_key - ext_len == REDUCE_GF_LENGTH_BY ||
+        rc->frames_to_key - ext_len == 0) {
       rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
     }
   }
@@ -1869,13 +1879,13 @@
     twopass->kf_group_error_left -= (int64_t)gf_stats.gf_group_err;
 
   // Set up the structure of this Group-Of-Pictures (same as GF_GROUP)
-  av1_gop_setup_structure(cpi, frame_params);
+  av1_gop_setup_structure(cpi);
 
   // Reset the file position.
   reset_fpf_position(twopass, start_pos);
 
   // Calculate a section intra ratio used in setting max loop filter.
-  if (frame_params->frame_type != KEY_FRAME) {
+  if (rc->frames_since_key != 0) {
     twopass->section_intra_rating = calculate_section_intra_ratio(
         start_pos, twopass->stats_buf_ctx->stats_in_end,
         rc->baseline_gf_interval);
@@ -1885,9 +1895,19 @@
   twopass->rolling_arf_group_target_bits = 1;
   twopass->rolling_arf_group_actual_bits = 1;
 
-  av1_gop_bit_allocation(cpi, rc, gf_group,
-                         frame_params->frame_type == KEY_FRAME, use_alt_ref,
-                         gf_group_bits);
+  av1_gop_bit_allocation(cpi, rc, gf_group, rc->frames_since_key == 0,
+                         use_alt_ref, gf_group_bits);
+
+  frame_params->frame_type =
+      rc->frames_since_key == 0 ? KEY_FRAME : INTER_FRAME;
+  frame_params->show_frame =
+      !(gf_group->update_type[gf_group->index] == ARF_UPDATE ||
+        gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE);
+
+  // TODO(jingning): Generalize this condition.
+  if (is_final_pass) {
+    cpi->gf_state.arf_gf_boost_lst = use_alt_ref;
+  }
 }
 
 // #define FIXED_ARF_BITS
@@ -2754,7 +2774,7 @@
 
   if (rc->frames_since_key > 0) frame_params->frame_type = INTER_FRAME;
 
-  if (rc->frames_till_gf_update_due > 0 && !(frame_flags & FRAMEFLAGS_KEY)) {
+  if (gf_group->index < gf_group->size && !(frame_flags & FRAMEFLAGS_KEY)) {
     assert(gf_group->index < gf_group->size);
 
     setup_target_rate(cpi);
@@ -2779,21 +2799,23 @@
   av1_zero(this_frame);
   // call above fn
   if (is_stat_consumption_stage(cpi)) {
-    // Do not read if it is overlay for kf arf, since kf already
-    // advanced the first pass stats pointer
-    if (!av1_check_keyframe_overlay(gf_group->index, gf_group,
-                                    rc->frames_since_key)) {
-      process_first_pass_stats(cpi, &this_frame);
+    if (gf_group->index < gf_group->size || rc->frames_to_key == 0) {
+      // Do not read if it is overlay for kf arf, since kf already
+      // advanced the first pass stats pointer
+      if (!av1_check_keyframe_overlay(gf_group->index, gf_group,
+                                      rc->frames_since_key)) {
+        process_first_pass_stats(cpi, &this_frame);
+      }
     }
   } else {
     rc->active_worst_quality = oxcf->rc_cfg.cq_level;
   }
 
   // Keyframe and section processing.
+  FIRSTPASS_STATS this_frame_copy;
+  this_frame_copy = this_frame;
   if (rc->frames_to_key <= 0) {
     assert(rc->frames_to_key >= -1);
-    FIRSTPASS_STATS this_frame_copy;
-    this_frame_copy = this_frame;
     frame_params->frame_type = KEY_FRAME;
     // Define next KF group and assign bits to it.
     find_next_key_frame(cpi, &this_frame);
@@ -2834,7 +2856,7 @@
   }
 
   // Define a new GF/ARF group. (Should always enter here for key frames).
-  if (rc->frames_till_gf_update_due == 0) {
+  if (gf_group->index == gf_group->size) {
     assert(cpi->common.current_frame.frame_number == 0 ||
            gf_group->index == gf_group->size);
     const FIRSTPASS_STATS *const start_position = twopass->stats_in;
@@ -2856,16 +2878,19 @@
             ? AOMMIN(MAX_GF_INTERVAL, oxcf->gf_cfg.lag_in_frames -
                                           oxcf->algo_cfg.arnr_max_frames / 2)
             : MAX_GF_LENGTH_LAP;
-    if (rc->intervals_till_gf_calculate_due == 0) {
+
+    // TODO(jingning): Resoleve the redundant calls here.
+    if (rc->intervals_till_gf_calculate_due == 0 || 1) {
       calculate_gf_length(cpi, max_gop_length, MAX_NUM_GF_INTERVALS);
     }
 
     if (max_gop_length > 16 && oxcf->algo_cfg.enable_tpl_model &&
         !cpi->sf.tpl_sf.disable_gop_length_decision) {
-      if (rc->gf_intervals[rc->cur_gf_index] - 1 > 16) {
+      if (rc->gf_intervals[rc->cur_gf_index] > 16) {
         // The calculate_gf_length function is previously used with
         // max_gop_length = 32 with look-ahead gf intervals.
         define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
+        this_frame = this_frame_copy;
         if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input)) {
           // Tpl decides that a shorter gf interval is better.
           // TODO(jingning): Remove redundant computations here.
@@ -2879,7 +2904,14 @@
         calculate_gf_length(cpi, max_gop_length, 1);
       }
     }
+    define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
+
+    if (gf_group->update_type[gf_group->index] != ARF_UPDATE &&
+        rc->frames_since_key > 0)
+      process_first_pass_stats(cpi, &this_frame);
+
     define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 1);
+
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
     assert(gf_group->index == 0);
 
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 3b65e0d..b24d520 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -1752,7 +1752,7 @@
   rc->source_alt_ref_pending = 0;
 
   // Set the alternate reference frame active flag
-  rc->source_alt_ref_active = 1;
+  rc->source_alt_ref_active = 0;
 }
 
 static void update_golden_frame_stats(AV1_COMP *cpi) {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 92fbd12..67f5dd1 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -470,11 +470,7 @@
   }
 
   best_intra_cost = AOMMAX(best_intra_cost, 1);
-  if (tpl_data->frame_idx == 0) {
-    best_inter_cost = 0;
-  } else {
-    best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost);
-  }
+  best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost);
   tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
   tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
 
@@ -752,8 +748,7 @@
 }
 
 static AOM_INLINE int get_gop_length(const GF_GROUP *gf_group) {
-  int use_arf = gf_group->arf_index >= 0;
-  int gop_length = AOMMIN(gf_group->size - 1 + use_arf, MAX_TPL_FRAME_IDX - 1);
+  int gop_length = AOMMIN(gf_group->size, MAX_TPL_FRAME_IDX - 1);
   return gop_length;
 }
 
@@ -819,7 +814,7 @@
   // The reference frame pruning is not enabled for frames beyond the gop
   // length, as there are fewer reference frames and the reference frames
   // differ from the frames considered during RD search.
-  if (ref_pruning_enabled && (frame_idx <= gop_length)) {
+  if (ref_pruning_enabled && (frame_idx < gop_length)) {
     for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) {
       const MV_REFERENCE_FRAME refs[2] = { idx + 1, NONE_FRAME };
       if (prune_ref_by_selective_ref_frame(cpi, NULL, refs,
@@ -916,10 +911,6 @@
 
 static void mc_flow_synthesizer(AV1_COMP *cpi, int frame_idx) {
   AV1_COMMON *cm = &cpi->common;
-
-  const GF_GROUP *gf_group = &cpi->gf_group;
-  if (frame_idx == gf_group->size) return;
-
   TplParams *const tpl_data = &cpi->tpl_data;
 
   const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
@@ -950,7 +941,7 @@
   int ref_picture_map[REF_FRAMES];
 
   for (int i = 0; i < REF_FRAMES; ++i) {
-    if (frame_params.frame_type == KEY_FRAME || gop_eval) {
+    if (frame_params.frame_type == KEY_FRAME) {
       tpl_data->tpl_frame[-i - 1].gf_picture = NULL;
       tpl_data->tpl_frame[-1 - 1].rec_picture = NULL;
       tpl_data->tpl_frame[-i - 1].frame_display_index = 0;
@@ -967,11 +958,11 @@
   *tpl_group_frames = cur_frame_idx;
 
   int gf_index;
-  int anc_frame_offset = gf_group->cur_frame_idx[cur_frame_idx];
+  int anc_frame_offset = gop_eval ? 0 : gf_group->cur_frame_idx[cur_frame_idx];
   int process_frame_count = 0;
   const int gop_length = get_gop_length(gf_group);
 
-  for (gf_index = cur_frame_idx; gf_index <= gop_length; ++gf_index) {
+  for (gf_index = cur_frame_idx; gf_index < gop_length; ++gf_index) {
     TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
     FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
     int frame_display_index = gf_index == gf_group->size
@@ -979,6 +970,8 @@
                                   : gf_group->cur_frame_idx[gf_index] +
                                         gf_group->arf_src_offset[gf_index];
 
+    int lookahead_index = frame_display_index - anc_frame_offset;
+
     frame_params.show_frame = frame_update_type != ARF_UPDATE &&
                               frame_update_type != INTNL_ARF_UPDATE;
     frame_params.show_existing_frame =
@@ -994,14 +987,13 @@
       *pframe_qindex = gf_group->q_val[gf_index];
 
     if (gf_index == cur_frame_idx) {
-      struct lookahead_entry *buf =
-          av1_lookahead_peek(cpi->lookahead, 1, cpi->compressor_stage);
-      if (buf == NULL) break;
+      struct lookahead_entry *buf = av1_lookahead_peek(
+          cpi->lookahead, lookahead_index, cpi->compressor_stage);
       tpl_frame->gf_picture = gop_eval ? &buf->img : frame_input->source;
     } else {
       struct lookahead_entry *buf = av1_lookahead_peek(
-          cpi->lookahead, frame_display_index - anc_frame_offset,
-          cpi->compressor_stage);
+          cpi->lookahead, lookahead_index, cpi->compressor_stage);
+
       if (buf == NULL) break;
       tpl_frame->gf_picture = &buf->img;
     }
@@ -1041,12 +1033,13 @@
     ++*tpl_group_frames;
   }
 
-  if (cur_frame_idx == 0) return;
+  if (cpi->rc.frames_since_key == 0) return;
 
   int extend_frame_count = 0;
   int extend_frame_length = AOMMIN(
       MAX_TPL_EXTEND, cpi->rc.frames_to_key - cpi->rc.baseline_gf_interval);
-  int frame_display_index = cpi->rc.baseline_gf_interval + 1;
+  int frame_display_index = gf_group->cur_frame_idx[gop_length - 1] +
+                            gf_group->arf_src_offset[gop_length - 1] + 1;
 
   for (;
        gf_index < MAX_TPL_FRAME_IDX && extend_frame_count < extend_frame_length;
@@ -1059,9 +1052,9 @@
         frame_update_type == INTNL_OVERLAY_UPDATE;
     frame_params.frame_type = INTER_FRAME;
 
+    int lookahead_index = frame_display_index - anc_frame_offset;
     struct lookahead_entry *buf = av1_lookahead_peek(
-        cpi->lookahead, frame_display_index - anc_frame_offset,
-        cpi->compressor_stage);
+        cpi->lookahead, lookahead_index, cpi->compressor_stage);
 
     if (buf == NULL) break;
 
@@ -1172,7 +1165,6 @@
         gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
       continue;
 
-    if (gf_group->size == frame_idx) continue;
     init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
     if (mt_info->num_workers > 1) {
       tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
@@ -1236,7 +1228,7 @@
   // Allow larger GOP size if the base layer ARF has higher dependency factor
   // than the intermediate ARF and both ARFs have reasonably high dependency
   // factors.
-  return (beta[0] >= beta[1] + 0.7) && beta[0] > 3.0;
+  return (beta[0] >= beta[1] + 0.7) && beta[0] > 8.0;
 }
 
 void av1_tpl_rdmult_setup(AV1_COMP *cpi) {
diff --git a/test/gf_pyr_height_test.cc b/test/gf_pyr_height_test.cc
index 87a0d0d..71ecc1f 100644
--- a/test/gf_pyr_height_test.cc
+++ b/test/gf_pyr_height_test.cc
@@ -25,32 +25,32 @@
   double psnr_thresh;
 } kTestParams[] = {
   // gf_min_pyr_height = 0
-  { 0, 0, 33.40 },
+  { 0, 0, 33.30 },
   { 0, 1, 33.90 },
   { 0, 2, 34.00 },
   { 0, 3, 34.20 },
   { 0, 4, 34.30 },
-  { 0, 5, 34.40 },
+  { 0, 5, 34.35 },
   // gf_min_pyr_height = 1
   { 1, 1, 33.90 },
   { 1, 2, 34.00 },
   { 1, 3, 34.20 },
   { 1, 4, 34.30 },
-  { 1, 5, 34.40 },
+  { 1, 5, 34.35 },
   // gf_min_pyr_height = 2
   { 2, 2, 34.00 },
   { 2, 3, 34.20 },
   { 2, 4, 34.30 },
-  { 2, 5, 34.40 },
+  { 2, 5, 34.35 },
   // gf_min_pyr_height = 3
   { 3, 3, 34.20 },
   { 3, 4, 34.30 },
-  { 3, 5, 34.40 },
+  { 3, 5, 34.35 },
   // gf_min_pyr_height = 4
   { 4, 4, 34.30 },
-  { 4, 5, 34.40 },
+  { 4, 5, 34.35 },
   // gf_min_pyr_height = 5
-  { 5, 5, 34.40 },
+  { 5, 5, 34.35 },
 };
 
 // Compiler may decide to add some padding to the struct above for alignment,