rtc: Reduce number of reference frame buffers

In rt, non-svc use case frame buffer allocated for
key frame was not released when a new frame replaced
key frame as the golden reference frame. In this patch,
buffer holding key frame is released when a new golden
frame is encoded, thereby reducing the number of
reference frame buffer requirement by one.

               Peak Memory Reduction (%)
Resolution    single thread (rt speed 10)
 320x180          3.55
 320x240          3.69
 640x360          4.18
 640x480          4.25
 1280x720         4.28

HEAP memory reduction was measured using the following command.
$valgrind --tool=massif ./aomenc ...

Change-Id: I20de21fe486748025b0a665c975a6ec4ea96ba5a
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 5cc2b28..8d67ba3 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1374,14 +1374,12 @@
 #endif
 #if CONFIG_REALTIME_ONLY
   av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
-  if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
-      cpi->ppi->number_temporal_layers == 1)
+  if (use_one_pass_rt_reference_structure(cpi))
     av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
 #else
   if (use_one_pass_rt_params) {
     av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
-    if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
-        cpi->ppi->number_temporal_layers == 1)
+    if (use_one_pass_rt_reference_structure(cpi))
       av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
   }
 #endif
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index bf95874..bd6c7a2 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -2844,6 +2844,11 @@
   RefreshFrameInfo refresh_frame;
 
   /*!
+   * Flag to reduce the number of reference frame buffers used in rt.
+   */
+  int rt_reduce_num_ref_buffers;
+
+  /*!
    * Flags signalled by the external interface at frame level.
    */
   ExternalFlags ext_flags;
@@ -3820,6 +3825,11 @@
          cpi->oxcf.gf_cfg.lag_in_frames == 0;
 }
 
+static INLINE int use_one_pass_rt_reference_structure(const AV1_COMP *cpi) {
+  return cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
+         cpi->ppi->number_temporal_layers == 1;
+}
+
 // Function return size of frame stats buffer
 static INLINE int get_stats_buf_size(int num_lap_buffer, int num_lag_buffer) {
   /* if lookahead is enabled return num_lap_buffers else num_lag_buffers */
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 21fb0e8..44294db 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h
@@ -997,12 +997,34 @@
   if (!frame_is_intra_only(&cpi->common)) release_scaled_references(cpi);
 }
 
+static AOM_INLINE int reduce_num_ref_buffers(const AV1_COMP *cpi) {
+  const SequenceHeader *const seq_params = cpi->common.seq_params;
+  return is_one_pass_rt_params(cpi) &&
+         use_one_pass_rt_reference_structure(cpi) &&
+         (seq_params->order_hint_info.enable_order_hint == 0) &&
+         cpi->rt_reduce_num_ref_buffers;
+}
+
 // Refresh reference frame buffers according to refresh_frame_flags.
 static AOM_INLINE void refresh_reference_frames(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   // All buffers are refreshed for shown keyframes and S-frames.
+  // In case of RT, golden frame refreshes the 6th slot and other reference
+  // frames refresh slots 0 to 5. Slot 7 is not refreshed by any reference
+  // frame. Thus, only 7 buffers are refreshed for keyframes and S-frames
+  // instead of 8.
+  int num_ref_buffers = REF_FRAMES;
+  if (reduce_num_ref_buffers(cpi)) {
+    const int refresh_all_bufs =
+        (cpi->ppi->gf_group.refbuf_state[cpi->gf_frame_index] == REFBUF_RESET ||
+         frame_is_sframe(cm));
+    assert(IMPLIES(((cm->current_frame.refresh_frame_flags >> 7) & 1) == 1,
+                   refresh_all_bufs));
+    (void)refresh_all_bufs;
+    num_ref_buffers--;
+  }
 
-  for (int ref_frame = 0; ref_frame < REF_FRAMES; ref_frame++) {
+  for (int ref_frame = 0; ref_frame < num_ref_buffers; ref_frame++) {
     if (((cm->current_frame.refresh_frame_flags >> ref_frame) & 1) == 1) {
       assign_frame_buffer_p(&cm->ref_frame_map[ref_frame], cm->cur_frame);
     }
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 3e6939c..fc15581 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -2682,6 +2682,15 @@
     svc->refresh[gld_idx] = 1;
   }
   svc->gld_idx_1layer = gld_idx;
+  // Set the flag to reduce the number of reference frame buffers used.
+  // This assumes that slot 7 is not refreshed by any reference frame.
+  cpi->rt_reduce_num_ref_buffers = 1;
+  cpi->rt_reduce_num_ref_buffers &= (svc->ref_idx[0] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (svc->ref_idx[1] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (svc->ref_idx[3] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (svc->ref_idx[6] < 7);
+  if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
+    cpi->rt_reduce_num_ref_buffers &= (svc->ref_idx[2] < 7);
 }
 
 /*!\brief Check for scene detection, for 1 pass real-time mode.