Allintra: Reduce the instances of RefCntBuffer

The buffer frame_bufs of type RefCntBuffer is a member of
BufferPool and the same is used to hold information regarding the
reference frames. During allintra video encode, only a maximum of
two reference frames are required at any time whereas there were
16 (FRAME_BUFFERS) instances of RefCntBuffer in the parent version.
This CL avoids the memory allocation of the instances of RefCntBuffer
that are not required for allintra video encode. This is a bit-exact
change.

For AVIF image encode with speed = 9,

             HEAP Memory reduction(%)
Resolution   threads=1    threads=4
640x360         4.748       3.765
768x512         2.990       2.331
832x480         3.110       2.606
1280x720        1.466       1.331
4032x3024       0.128       0.126

For AVIF still-image encode, an average encode time reduction of
~0.76% is observed on average across different resolutions with
speed=9 and threads=4.

Change-Id: Id7d485b9c15a938f5951ebac6522cd01c47166cb
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index bf6dd82..1fbf3fe 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2519,19 +2519,28 @@
                                                   COMPRESSOR_STAGE stage,
                                                   int lap_lag_in_frames) {
   aom_codec_err_t res = AOM_CODEC_OK;
+  BufferPool *buffer_pool = *p_buffer_pool;
 
-  if (*p_buffer_pool == NULL) {
-    *p_buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
-    if (*p_buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
-
+  if (buffer_pool == NULL) {
+    buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
+    if (buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
+    *p_buffer_pool = buffer_pool;
+    buffer_pool->num_frame_bufs =
+        (oxcf->mode == ALLINTRA) ? FRAME_BUFFERS_ALLINTRA : FRAME_BUFFERS;
+    buffer_pool->frame_bufs = (RefCntBuffer *)aom_calloc(
+        buffer_pool->num_frame_bufs, sizeof(*buffer_pool->frame_bufs));
+    if (buffer_pool->frame_bufs == NULL) {
+      buffer_pool->num_frame_bufs = 0;
+      return AOM_CODEC_MEM_ERROR;
+    }
 #if CONFIG_MULTITHREAD
-    if (pthread_mutex_init(&((*p_buffer_pool)->pool_mutex), NULL)) {
+    if (pthread_mutex_init(&buffer_pool->pool_mutex, NULL)) {
       return AOM_CODEC_MEM_ERROR;
     }
 #endif
   }
-  *p_cpi = av1_create_compressor(ppi, oxcf, *p_buffer_pool, stage,
-                                 lap_lag_in_frames);
+  *p_cpi =
+      av1_create_compressor(ppi, oxcf, buffer_pool, stage, lap_lag_in_frames);
   if (*p_cpi == NULL) res = AOM_CODEC_MEM_ERROR;
 
   return res;
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 809268f..79ce988 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -428,6 +428,13 @@
 
   ctx->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
   if (ctx->buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
+  ctx->buffer_pool->num_frame_bufs = FRAME_BUFFERS;
+  ctx->buffer_pool->frame_bufs = (RefCntBuffer *)aom_calloc(
+      ctx->buffer_pool->num_frame_bufs, sizeof(*ctx->buffer_pool->frame_bufs));
+  if (ctx->buffer_pool->frame_bufs == NULL) {
+    ctx->buffer_pool->num_frame_bufs = 0;
+    return AOM_CODEC_MEM_ERROR;
+  }
 
 #if CONFIG_MULTITHREAD
   if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 677078d..8117caf 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -36,7 +36,7 @@
 void av1_free_ref_frame_buffers(BufferPool *pool) {
   int i;
 
-  for (i = 0; i < FRAME_BUFFERS; ++i) {
+  for (i = 0; i < pool->num_frame_bufs; ++i) {
     if (pool->frame_bufs[i].ref_count > 0 &&
         pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
       pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
@@ -51,6 +51,9 @@
     pool->frame_bufs[i].seg_map = NULL;
     aom_free_frame_buffer(&pool->frame_bufs[i].buf);
   }
+  aom_free(pool->frame_bufs);
+  pool->frame_bufs = NULL;
+  pool->num_frame_bufs = 0;
 }
 
 static INLINE void free_cdef_linebuf_conditional(
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index b4f5783..338fbe1 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -184,7 +184,8 @@
   aom_get_frame_buffer_cb_fn_t get_fb_cb;
   aom_release_frame_buffer_cb_fn_t release_fb_cb;
 
-  RefCntBuffer frame_bufs[FRAME_BUFFERS];
+  RefCntBuffer *frame_bufs;
+  uint8_t num_frame_bufs;
 
   // Frame buffers allocated internally by the codec.
   InternalFrameBufferList int_frame_buffers;
@@ -1092,10 +1093,11 @@
   int i;
 
   lock_buffer_pool(cm->buffer_pool);
-  for (i = 0; i < FRAME_BUFFERS; ++i)
+  const int num_frame_bufs = cm->buffer_pool->num_frame_bufs;
+  for (i = 0; i < num_frame_bufs; ++i)
     if (frame_bufs[i].ref_count == 0) break;
 
-  if (i != FRAME_BUFFERS) {
+  if (i != num_frame_bufs) {
     if (frame_bufs[i].buf.use_external_reference_buffers) {
       // If this frame buffer's y_buffer, u_buffer, and v_buffer point to the
       // external reference buffers. Restore the buffer pointers to point to the
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 49fc551..8381c1f 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1066,7 +1066,7 @@
       RefCntBuffer *const buf = get_ref_frame_buf(cm, i);
       if (buf != NULL) buf->frame_context = *cm->fc;
     }
-    for (int i = 0; i < FRAME_BUFFERS; ++i)
+    for (int i = 0; i < cm->buffer_pool->num_frame_bufs; ++i)
       cm->buffer_pool->frame_bufs[i].frame_context = *cm->fc;
   }
 }
diff --git a/av1/common/enums.h b/av1/common/enums.h
index b0644da..fb4d756 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -558,8 +558,16 @@
 // REF_FRAMES for the cm->ref_frame_map array, 1 scratch frame for the new
 // frame in cm->cur_frame, INTER_REFS_PER_FRAME for scaled references on the
 // encoder in the cpi->scaled_ref_buf array.
+// The encoder uses FRAME_BUFFERS only in GOOD and REALTIME encoding modes.
+// The decoder also uses FRAME_BUFFERS.
 #define FRAME_BUFFERS (REF_FRAMES + 1 + INTER_REFS_PER_FRAME)
 
+// During allintra encoding, one reference frame buffer is free to be used again
+// only after another frame buffer is stored as the reference frame. Hence, it
+// is necessary and sufficient to maintain only two reference frame buffers in
+// this case.
+#define FRAME_BUFFERS_ALLINTRA 2
+
 #define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
 #define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
 
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 53275ea..7ce0bac 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -4434,7 +4434,7 @@
   lock_buffer_pool(cm->buffer_pool);
   reset_ref_frame_map(cm);
   assert(cm->cur_frame->ref_count == 1);
-  for (i = 0; i < FRAME_BUFFERS; ++i) {
+  for (i = 0; i < cm->buffer_pool->num_frame_bufs; ++i) {
     // Reset all unreferenced frame buffers. We can also reset cm->cur_frame
     // because we are the sole owner of cm->cur_frame.
     if (frame_bufs[i].ref_count > 0 && &frame_bufs[i] != cm->cur_frame) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 0e5159a..14ea3c2 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2035,7 +2035,7 @@
   }
 #ifndef NDEBUG
   BufferPool *const pool = cm->buffer_pool;
-  for (i = 0; i < FRAME_BUFFERS; ++i) {
+  for (i = 0; i < pool->num_frame_bufs; ++i) {
     assert(pool->frame_bufs[i].ref_count == 0);
   }
 #endif
@@ -4746,7 +4746,7 @@
 
       RefCntBuffer *buf = get_ref_frame_buf(cm, ref_frame);
       cpi->scaled_ref_buf[ref_frame - 1] = buf;
-      for (int i = 0; i < FRAME_BUFFERS; ++i) {
+      for (int i = 0; i < cm->buffer_pool->num_frame_bufs; ++i) {
         if (&cm->buffer_pool->frame_bufs[i] == buf) {
           *ref_buffers_used_map |= (1 << i);
         }
@@ -4761,7 +4761,7 @@
 // corresponding to frames in a parallel encode set.
 void av1_increment_scaled_ref_counts_fpmt(BufferPool *buffer_pool,
                                           int ref_buffers_used_map) {
-  for (int i = 0; i < FRAME_BUFFERS; ++i) {
+  for (int i = 0; i < buffer_pool->num_frame_bufs; ++i) {
     if (ref_buffers_used_map & (1 << i)) {
       ++buffer_pool->frame_bufs[i].ref_count;
     }
@@ -4784,7 +4784,7 @@
 // corresponding to frames in a parallel encode set.
 void av1_decrement_ref_counts_fpmt(BufferPool *buffer_pool,
                                    int ref_buffers_used_map) {
-  for (int i = 0; i < FRAME_BUFFERS; ++i) {
+  for (int i = 0; i < buffer_pool->num_frame_bufs; ++i) {
     if (ref_buffers_used_map & (1 << i)) {
       --buffer_pool->frame_bufs[i].ref_count;
     }