Adjust gop shrinking for 32 length.

This patch adds various adjustments to better work with the 32
length gop:
1: change the logic when using tpl to determine longer or shorter
   gop. If decided to use longer, or lag-in-frames is not
   sufficient, we can still benefit from lookahead and lookback.
2: fix a problem of coded error lookup when we do not start with
   a key frame in the current gf group. This way the padding is
   also available to gf groups that are decided individually.
3: change the minimum shrinking length to 6, in order to benefit
   individually determined gf groups.
4: now if user decides to restrain the max length to 16, we should
   have a better performance.

On lag-in-frames=35, we have some minor gains on average:

test results (BD-rate, %)  on speed 1:
     	     avg_PSNR	overall_PSNR	ssim	VMAF
lowres	     -0.007	-0.017		-0.017	-0.124
midres	      0.016	 0.074		 0.194	-0.025
hdres	     -0.059	-0.068		-0.096	-0.065
ugc360p	     -0.134	-0.055		-0.064	-0.132

on speed 4:
     	     avg_PSNR	overall_PSNR	ssim	VMAF
lowres	     -0.078	-0.081		-0.056	-0.092
midres	     -0.021	 0.041		 0.131	-0.065
hdres	     -0.004	-0.014		-0.079	-0.060
ugc360p	     -0.206	-0.209		-0.247	-0.229

most gains in ugc360p, where it is not very stable and padding
could help most.
Changes on other test sets are small.

STATS_CHANGED

Change-Id: Ia43ff93a1a1943e53856f3109e1b0e476b9f3016
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 2f20f45..6a4e20c 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -962,7 +962,7 @@
 #define RC_FACTOR_MAX 1.25
 #endif  // GROUP_ADAPTIVE_MAXQ
 #define MIN_FWD_KF_INTERVAL 8
-#define MIN_SHRINK_LEN 8      // the minimum length of gf if we are shrinking
+#define MIN_SHRINK_LEN 6      // the minimum length of gf if we are shrinking
 #define SI_HIGH AVG_SI_THRES  // high quality classification
 #define SI_LOW 0.3            // very unsure classification
 // this function finds an low error frame previously to the current last frame
@@ -1049,7 +1049,8 @@
 
 // This function decides the gf group length of future frames in batch
 // rc->gf_intervals is modified to store the group lengths
-static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length) {
+static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
+                                int max_intervals) {
   RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = &cpi->twopass;
   FIRSTPASS_STATS next_frame;
@@ -1072,11 +1073,11 @@
   av1_zero(next_frame);
 
   if (has_no_stats_stage(cpi)) {
-    for (i = 0; i < NUM_GF_INTERVALS; i++) {
+    for (i = 0; i < MAX_NUM_GF_INTERVALS; i++) {
       rc->gf_intervals[i] = MAX_GF_INTERVAL;
     }
     rc->cur_gf_index = 0;
-    rc->intervals_till_gf_calculate_due = NUM_GF_INTERVALS;
+    rc->intervals_till_gf_calculate_due = MAX_NUM_GF_INTERVALS;
     return;
   }
 
@@ -1086,8 +1087,8 @@
       AOMMIN(rc->max_gf_interval, max_gop_length);
 
   i = 0;
-  const int max_intervals = cpi->lap_enabled ? 1 : NUM_GF_INTERVALS;
-  int cut_pos[NUM_GF_INTERVALS + 1] = { 0 };
+  max_intervals = cpi->lap_enabled ? 1 : max_intervals;
+  int cut_pos[MAX_NUM_GF_INTERVALS + 1] = { 0 };
   int count_cuts = 1;
   int cur_start = 0, cur_last;
   int cut_here;
@@ -1150,7 +1151,8 @@
         int is_high[MAX_GF_INTERVAL + 1 + MAX_PAD_GF_CHECK * 2] = { 0 };
         double errs[MAX_GF_INTERVAL + 1 + MAX_PAD_GF_CHECK * 2] = { 0 };
         double si[MAX_GF_INTERVAL + 1 + MAX_PAD_GF_CHECK * 2] = { 0 };
-        int before_pad = AOMMIN(MAX_PAD_GF_CHECK, cur_start - (cur_start == 0));
+        int before_pad =
+            AOMMIN(MAX_PAD_GF_CHECK, rc->frames_since_key - 1 + cur_start);
         int after_pad =
             AOMMIN(MAX_PAD_GF_CHECK, rc->frames_to_key - cur_last - 1);
         for (n = cur_start - before_pad; n <= cur_last + after_pad; n++) {
@@ -1170,7 +1172,7 @@
         // count how many trailing lower error frames we have in this decided
         // gf group
         prev_lows = 0;
-        for (n = cur_last; n > cur_start; n--) {
+        for (n = cur_last - 1; n > cur_start + MIN_SHRINK_LEN; n--) {
           if (is_high[n - cur_start + before_pad] == 0 &&
               (si[n - cur_start + before_pad] > SI_HIGH || reset)) {
             prev_lows++;
@@ -1436,8 +1438,10 @@
     }
     *this_frame = next_frame;
   }
-  rc->intervals_till_gf_calculate_due--;
-  rc->cur_gf_index++;
+  if (is_final_pass) {
+    rc->intervals_till_gf_calculate_due--;
+    rc->cur_gf_index++;
+  }
 
   // Was the group length constrained by the requirement for a new KF?
   rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
@@ -1561,7 +1565,7 @@
       if (i - roll_back >= active_min_gf_interval + 1) {
         alt_offset = -roll_back;
         i -= roll_back;
-        rc->intervals_till_gf_calculate_due = 0;
+        if (is_final_pass) rc->intervals_till_gf_calculate_due = 0;
       }
     }
   }
@@ -1601,12 +1605,12 @@
       // if possible, merge the last two gf groups
       if (rc->frames_to_key <= active_max_gf_interval) {
         rc->baseline_gf_interval = rc->frames_to_key;
-        rc->intervals_till_gf_calculate_due = 0;
+        if (is_final_pass) rc->intervals_till_gf_calculate_due = 0;
         // if merging the last two gf groups creates a group that is too long,
         // split them and force the last gf group to be the MIN_FWD_KF_INTERVAL
       } else {
         rc->baseline_gf_interval = rc->frames_to_key - MIN_FWD_KF_INTERVAL;
-        rc->intervals_till_gf_calculate_due = 0;
+        if (is_final_pass) rc->intervals_till_gf_calculate_due = 0;
       }
     } else {
       rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
@@ -1619,7 +1623,7 @@
   // the next gf group.
   // TODO(bohanli): should incorporate the usage of alt_ref into
   // calculate_gf_length
-  if (rc->source_alt_ref_pending == 0 &&
+  if (is_final_pass && rc->source_alt_ref_pending == 0 &&
       rc->intervals_till_gf_calculate_due > 0) {
     rc->gf_intervals[rc->cur_gf_index]--;
   }
@@ -2323,18 +2327,29 @@
   if (rc->frames_till_gf_update_due == 0) {
     assert(cpi->common.current_frame.frame_number == 0 ||
            gf_group->index == gf_group->size);
-    int max_gop_length = MAX_GF_INTERVAL;
-    // TODO(jingning): Remove redundant computations here.
-    if (cpi->oxcf.lag_in_frames >= 32) {
-      calculate_gf_length(cpi, max_gop_length);
-      define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
-
-      if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input))
-        max_gop_length = 16;
-    } else {
-      max_gop_length = 16;
+    int max_gop_length = (cpi->oxcf.lag_in_frames >= 32) ? MAX_GF_INTERVAL : 16;
+    if (rc->intervals_till_gf_calculate_due == 0) {
+      calculate_gf_length(cpi, max_gop_length, MAX_NUM_GF_INTERVALS);
     }
-    calculate_gf_length(cpi, max_gop_length);
+
+    if (max_gop_length > 16) {
+      if (rc->gf_intervals[rc->cur_gf_index] - 1 > 16) {
+        // The calculate_gf_length function is previously used with
+        // max_gop_length = 32 with look-ahead gf intervals.
+        define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
+        if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input)) {
+          // Tpl decides that a shorter gf interval is better.
+          // TODO(jingning): Remove redundant computations here.
+          max_gop_length = 16;
+          calculate_gf_length(cpi, max_gop_length, 1);
+        }
+      } else {
+        // Even based on 32 we still decide to use a short gf interval.
+        // Better to re-decide based on 16 then
+        max_gop_length = 16;
+        calculate_gf_length(cpi, max_gop_length, 1);
+      }
+    }
     define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 1);
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index 61d58e4..038eac8 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -48,7 +48,7 @@
 #define MAX_GF_INTERVAL 32
 #define FIXED_GF_INTERVAL 8  // Used in some testing modes only
 
-#define NUM_GF_INTERVALS 150
+#define MAX_NUM_GF_INTERVALS 15
 
 typedef struct {
   int resize_width;
@@ -101,7 +101,7 @@
   // number of determined gf group length left
   int intervals_till_gf_calculate_due;
   // stores gf group length intervals
-  int gf_intervals[NUM_GF_INTERVALS];
+  int gf_intervals[MAX_NUM_GF_INTERVALS];
   // the current index in gf_intervals
   int cur_gf_index;