Added alternate bit allocation method.

Added an alternate algorithm for distributing bits over ARFs.
This is based on a fixed distribution pattern and ratio of bits
allocated.

Also added stats to record the actual Q used at different
levels in the coding pyramid. In tests with strict rate control
this proved to be a good baseline for the lowest allowed Q
for the next level.

All features disabled by default for now so there should be no
stats change for this patch.

Enabling FIXED_ARF_BITS Has a surprisingly modest impact
on metrics.

Enabling STRICT_RC on its own gives quite accurate rate control but
a huge hit to metrics.

Enabling STRICT_RC with FIXED_ARF_BITS and ALT_ARF_ALLOCATION
give accurate rate control with a greatly reduced metrics penalty.
For example at 100 Kbits Akiyo is about 1db better with all enabled than
with only STRICT_RC.

If strict RC is only applied to best quality the metrics are better but rate
control worse for those clips that tend to overshoot. Finding an alternative
way to select an appropriate worst quality at each level is TBD.

Change-Id: I402773c5e5a36a8417f8b38f8ef536d37dc98ac0
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index 266e5fe..8ca4031 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -30,8 +30,6 @@
 
 #define VLOW_MOTION_THRESHOLD 950
 
-#define MAX_ARF_LAYERS 6
-
 typedef struct {
   // Frame number in display order, if stats are for a single frame.
   // No real meaning for a collection of frames.
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index 847f4b9..446f8ee 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -716,6 +716,84 @@
   return bits_assigned;
 }
 
+// Compile time switch on alternate algorithm to allocate bits in ARF groups
+// #define ALT_ARF_ALLOCATION
+#ifdef ALT_ARF_ALLOCATION
+double layer_fraction[MAX_ARF_LAYERS + 1] = { 1.0,  0.70, 0.55, 0.60,
+                                              0.60, 1.0,  1.0 };
+static void allocate_gf_group_bits(GF_GROUP *gf_group, RATE_CONTROL *const rc,
+                                   int64_t gf_group_bits, int gf_arf_bits,
+                                   int key_frame, int use_arf) {
+  int64_t total_group_bits = gf_group_bits;
+  int base_frame_bits;
+  const int gf_group_size = gf_group->size;
+  int layer_frames[MAX_ARF_LAYERS + 1] = { 0 };
+
+  // Subtract the extra bits set aside for ARF frames from the Group Total
+  if (use_arf || !key_frame) total_group_bits -= gf_arf_bits;
+
+  if (rc->baseline_gf_interval)
+    base_frame_bits = (int)(total_group_bits / rc->baseline_gf_interval);
+  else
+    base_frame_bits = (int)1;
+
+  // For key frames the frame target rate is already set and it
+  // is also the golden frame.
+  // === [frame_index == 0] ===
+  int frame_index = 0;
+  if (!key_frame) {
+    if (rc->source_alt_ref_active)
+      gf_group->bit_allocation[frame_index] = 0;
+    else
+      gf_group->bit_allocation[frame_index] =
+          base_frame_bits + (int)(gf_arf_bits * layer_fraction[1]);
+  }
+  frame_index++;
+
+  // Check the number of frames in each layer in case we have a
+  // non standard group length.
+  int max_arf_layer = gf_group->max_layer_depth - 1;
+  for (int idx = frame_index; idx < gf_group_size; ++idx) {
+    if ((gf_group->update_type[idx] == ARF_UPDATE) ||
+        (gf_group->update_type[idx] == INTNL_ARF_UPDATE)) {
+      // max_arf_layer = AOMMAX(max_arf_layer, gf_group->layer_depth[idx]);
+      layer_frames[gf_group->layer_depth[idx]]++;
+    }
+  }
+
+  // Allocate extra bits to each ARF layer
+  int i;
+  int layer_extra_bits[MAX_ARF_LAYERS + 1] = { 0 };
+  for (i = 1; i <= max_arf_layer; ++i) {
+    double fraction = (i == max_arf_layer) ? 1.0 : layer_fraction[i];
+    layer_extra_bits[i] =
+        (int)((gf_arf_bits * fraction) / AOMMAX(1, layer_frames[i]));
+    gf_arf_bits -= (int)(gf_arf_bits * fraction);
+  }
+
+  // Now combine ARF layer and baseline bits to give total bits for each frame.
+  int arf_extra_bits;
+  for (int idx = frame_index; idx < gf_group_size; ++idx) {
+    switch (gf_group->update_type[idx]) {
+      case ARF_UPDATE:
+      case INTNL_ARF_UPDATE:
+        arf_extra_bits = layer_extra_bits[gf_group->layer_depth[idx]];
+        gf_group->bit_allocation[idx] = base_frame_bits + arf_extra_bits;
+        break;
+      case INTNL_OVERLAY_UPDATE:
+      case OVERLAY_UPDATE: gf_group->bit_allocation[idx] = 0; break;
+      default: gf_group->bit_allocation[idx] = base_frame_bits; break;
+    }
+  }
+
+  // Set the frame following the current GOP to 0 bit allocation. For ARF
+  // groups, this next frame will be overlay frame, which is the first frame
+  // in the next GOP. For GF group, next GOP will overwrite the rate allocation.
+  // Setting this frame to use 0 bit (of out the current GOP budget) will
+  // simplify logics in reference frame management.
+  gf_group->bit_allocation[gf_group_size] = 0;
+}
+#else
 static void allocate_gf_group_bits(GF_GROUP *gf_group, RATE_CONTROL *const rc,
                                    int64_t gf_group_bits, int gf_arf_bits,
                                    int key_frame, int use_arf) {
@@ -794,6 +872,7 @@
   // simplify logics in reference frame management.
   gf_group->bit_allocation[gf_group_size] = 0;
 }
+#endif
 
 // Returns true if KF group and GF group both are almost completely static.
 static INLINE int is_almost_static(double gf_zero_motion, int kf_zero_motion) {
@@ -1790,12 +1869,21 @@
                          gf_group_bits);
 }
 
+// #define FIXED_ARF_BITS
+#ifdef FIXED_ARF_BITS
+#define ARF_BITS_FRACTION 0.75
+#endif
 void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
                             GF_GROUP *gf_group, int is_key_frame, int use_arf,
                             int64_t gf_group_bits) {
   // Calculate the extra bits to be used for boosted frame(s)
+#ifdef FIXED_ARF_BITS
+  int gf_arf_bits = (int)(ARF_BITS_FRACTION * gf_group_bits);
+#else
   int gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
                                          rc->gfu_boost, gf_group_bits);
+#endif
+
   gf_arf_bits = adjust_boost_bits_for_target_level(cpi, rc, gf_arf_bits,
                                                    gf_group_bits, 1);
 
@@ -2641,6 +2729,20 @@
     rc->rate_error_estimate = 0;
   }
 
+  // Update the active best quality pyramid.
+  if (!rc->is_src_frame_alt_ref) {
+    const int pyramid_level = cpi->gf_group.layer_depth[cpi->gf_group.index];
+    int i;
+    for (i = pyramid_level; i <= MAX_ARF_LAYERS; ++i) {
+      rc->active_best_quality[i] = cpi->common.base_qindex;
+      // if (pyramid_level >= 2) {
+      //   rc->active_best_quality[pyramid_level] =
+      //     AOMMAX(rc->active_best_quality[pyramid_level],
+      //            cpi->common.base_qindex);
+      // }
+    }
+  }
+
 #if 0
   {
     AV1_COMMON *cm = &cpi->common;
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index a5ef72d..0aeaed1 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -1253,6 +1253,7 @@
   }
 
   aom_clear_system_state();
+#ifndef STRICT_RC
   // Static forced key frames Q restrictions dealt with elsewhere.
   if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced ||
       (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
@@ -1260,6 +1261,7 @@
     active_worst_quality =
         AOMMAX(active_worst_quality + qdelta, active_best_quality);
   }
+#endif
 
   // Modify active_best_quality for downscaled normal frames.
   if (av1_frame_scaled(cm) && !frame_is_kf_gf_arf(cpi)) {
@@ -1409,9 +1411,20 @@
         cm->current_frame.frame_type == KEY_FRAME && cm->show_frame == 0;
     get_intra_q_and_bounds_two_pass(cpi, width, height, &active_best_quality,
                                     &active_worst_quality, cq_level, is_fwd_kf);
+#ifdef STRICT_RC
+    active_best_quality = 0;
+#endif
   } else {
+#ifdef STRICT_RC
+    //  Active best quality limited by previous layer.
+    const int pyramid_level = gf_group_pyramid_level(gf_group, gf_index);
+    active_best_quality =
+        rc->active_best_quality[pyramid_level - 1] +
+        AOMMAX((rc->active_best_quality[pyramid_level - 1] / 10), 5);
+#else
     active_best_quality =
         get_active_best_quality(cpi, active_worst_quality, cq_level, gf_index);
+#endif
 
     // For alt_ref and GF frames (including internal arf frames) adjust the
     // worst allowed quality as well. This insures that even on hard
@@ -1436,7 +1449,11 @@
     active_worst_quality = q;
   }
 
+#ifdef STRICT_RC
+  *top_index = rc->worst_quality;
+#else
   *top_index = active_worst_quality;
+#endif
   *bottom_index = active_best_quality;
 
   assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index fca49ca..fa1d656 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -50,6 +50,9 @@
 
 #define MAX_NUM_GF_INTERVALS 15
 
+#define MAX_ARF_LAYERS 6
+// #define STRICT_RC
+
 typedef struct {
   int resize_width;
   int resize_height;
@@ -171,6 +174,7 @@
   // Q index used for ALT frame
   int arf_q;
   int active_worst_quality;
+  int active_best_quality[MAX_ARF_LAYERS + 1];
   int base_layer_qp;
 
   // Total number of stats used only for kf_boost calculation.