Allintra: Reduce the instances of RefCntBuffer
The buffer frame_bufs of type RefCntBuffer is a member of
BufferPool and the same is used to hold information regarding the
reference frames. During allintra video encode, only a maximum of
two reference frames are required at any time whereas there were
16 (FRAME_BUFFERS) instances of RefCntBuffer in the parent version.
This CL avoids the memory allocation of the instances of RefCntBuffer
that are not required for allintra video encode. This is a bit-exact
change.
For AVIF image encode with speed = 9,
HEAP Memory reduction(%)
Resolution threads=1 threads=4
640x360 4.748 3.765
768x512 2.990 2.331
832x480 3.110 2.606
1280x720 1.466 1.331
4032x3024 0.128 0.126
For AVIF still-image encode, an average encode time reduction of
~0.76% is observed on average across different resolutions with
speed=9 and threads=4.
Change-Id: Id7d485b9c15a938f5951ebac6522cd01c47166cb
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index bf6dd82..1fbf3fe 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2519,19 +2519,28 @@
COMPRESSOR_STAGE stage,
int lap_lag_in_frames) {
aom_codec_err_t res = AOM_CODEC_OK;
+ BufferPool *buffer_pool = *p_buffer_pool;
- if (*p_buffer_pool == NULL) {
- *p_buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
- if (*p_buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
-
+ if (buffer_pool == NULL) {
+ buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
+ if (buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
+ *p_buffer_pool = buffer_pool;
+ buffer_pool->num_frame_bufs =
+ (oxcf->mode == ALLINTRA) ? FRAME_BUFFERS_ALLINTRA : FRAME_BUFFERS;
+ buffer_pool->frame_bufs = (RefCntBuffer *)aom_calloc(
+ buffer_pool->num_frame_bufs, sizeof(*buffer_pool->frame_bufs));
+ if (buffer_pool->frame_bufs == NULL) {
+ buffer_pool->num_frame_bufs = 0;
+ return AOM_CODEC_MEM_ERROR;
+ }
#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&((*p_buffer_pool)->pool_mutex), NULL)) {
+ if (pthread_mutex_init(&buffer_pool->pool_mutex, NULL)) {
return AOM_CODEC_MEM_ERROR;
}
#endif
}
- *p_cpi = av1_create_compressor(ppi, oxcf, *p_buffer_pool, stage,
- lap_lag_in_frames);
+ *p_cpi =
+ av1_create_compressor(ppi, oxcf, buffer_pool, stage, lap_lag_in_frames);
if (*p_cpi == NULL) res = AOM_CODEC_MEM_ERROR;
return res;
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 809268f..79ce988 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -428,6 +428,13 @@
ctx->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
if (ctx->buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
+ ctx->buffer_pool->num_frame_bufs = FRAME_BUFFERS;
+ ctx->buffer_pool->frame_bufs = (RefCntBuffer *)aom_calloc(
+ ctx->buffer_pool->num_frame_bufs, sizeof(*ctx->buffer_pool->frame_bufs));
+ if (ctx->buffer_pool->frame_bufs == NULL) {
+ ctx->buffer_pool->num_frame_bufs = 0;
+ return AOM_CODEC_MEM_ERROR;
+ }
#if CONFIG_MULTITHREAD
if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 677078d..8117caf 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -36,7 +36,7 @@
void av1_free_ref_frame_buffers(BufferPool *pool) {
int i;
- for (i = 0; i < FRAME_BUFFERS; ++i) {
+ for (i = 0; i < pool->num_frame_bufs; ++i) {
if (pool->frame_bufs[i].ref_count > 0 &&
pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
@@ -51,6 +51,9 @@
pool->frame_bufs[i].seg_map = NULL;
aom_free_frame_buffer(&pool->frame_bufs[i].buf);
}
+ aom_free(pool->frame_bufs);
+ pool->frame_bufs = NULL;
+ pool->num_frame_bufs = 0;
}
static INLINE void free_cdef_linebuf_conditional(
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index b4f5783..338fbe1 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -184,7 +184,8 @@
aom_get_frame_buffer_cb_fn_t get_fb_cb;
aom_release_frame_buffer_cb_fn_t release_fb_cb;
- RefCntBuffer frame_bufs[FRAME_BUFFERS];
+ RefCntBuffer *frame_bufs;
+ uint8_t num_frame_bufs;
// Frame buffers allocated internally by the codec.
InternalFrameBufferList int_frame_buffers;
@@ -1092,10 +1093,11 @@
int i;
lock_buffer_pool(cm->buffer_pool);
- for (i = 0; i < FRAME_BUFFERS; ++i)
+ const int num_frame_bufs = cm->buffer_pool->num_frame_bufs;
+ for (i = 0; i < num_frame_bufs; ++i)
if (frame_bufs[i].ref_count == 0) break;
- if (i != FRAME_BUFFERS) {
+ if (i != num_frame_bufs) {
if (frame_bufs[i].buf.use_external_reference_buffers) {
// If this frame buffer's y_buffer, u_buffer, and v_buffer point to the
// external reference buffers. Restore the buffer pointers to point to the
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 49fc551..8381c1f 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1066,7 +1066,7 @@
RefCntBuffer *const buf = get_ref_frame_buf(cm, i);
if (buf != NULL) buf->frame_context = *cm->fc;
}
- for (int i = 0; i < FRAME_BUFFERS; ++i)
+ for (int i = 0; i < cm->buffer_pool->num_frame_bufs; ++i)
cm->buffer_pool->frame_bufs[i].frame_context = *cm->fc;
}
}
diff --git a/av1/common/enums.h b/av1/common/enums.h
index b0644da..fb4d756 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -558,8 +558,16 @@
// REF_FRAMES for the cm->ref_frame_map array, 1 scratch frame for the new
// frame in cm->cur_frame, INTER_REFS_PER_FRAME for scaled references on the
// encoder in the cpi->scaled_ref_buf array.
+// The encoder uses FRAME_BUFFERS only in GOOD and REALTIME encoding modes.
+// The decoder also uses FRAME_BUFFERS.
#define FRAME_BUFFERS (REF_FRAMES + 1 + INTER_REFS_PER_FRAME)
+// During allintra encoding, one reference frame buffer is free to be used again
+// only after another frame buffer is stored as the reference frame. Hence, it
+// is necessary and sufficient to maintain only two reference frame buffers in
+// this case.
+#define FRAME_BUFFERS_ALLINTRA 2
+
#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 53275ea..7ce0bac 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -4434,7 +4434,7 @@
lock_buffer_pool(cm->buffer_pool);
reset_ref_frame_map(cm);
assert(cm->cur_frame->ref_count == 1);
- for (i = 0; i < FRAME_BUFFERS; ++i) {
+ for (i = 0; i < cm->buffer_pool->num_frame_bufs; ++i) {
// Reset all unreferenced frame buffers. We can also reset cm->cur_frame
// because we are the sole owner of cm->cur_frame.
if (frame_bufs[i].ref_count > 0 && &frame_bufs[i] != cm->cur_frame) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 0e5159a..14ea3c2 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -2035,7 +2035,7 @@
}
#ifndef NDEBUG
BufferPool *const pool = cm->buffer_pool;
- for (i = 0; i < FRAME_BUFFERS; ++i) {
+ for (i = 0; i < pool->num_frame_bufs; ++i) {
assert(pool->frame_bufs[i].ref_count == 0);
}
#endif
@@ -4746,7 +4746,7 @@
RefCntBuffer *buf = get_ref_frame_buf(cm, ref_frame);
cpi->scaled_ref_buf[ref_frame - 1] = buf;
- for (int i = 0; i < FRAME_BUFFERS; ++i) {
+ for (int i = 0; i < cm->buffer_pool->num_frame_bufs; ++i) {
if (&cm->buffer_pool->frame_bufs[i] == buf) {
*ref_buffers_used_map |= (1 << i);
}
@@ -4761,7 +4761,7 @@
// corresponding to frames in a parallel encode set.
void av1_increment_scaled_ref_counts_fpmt(BufferPool *buffer_pool,
int ref_buffers_used_map) {
- for (int i = 0; i < FRAME_BUFFERS; ++i) {
+ for (int i = 0; i < buffer_pool->num_frame_bufs; ++i) {
if (ref_buffers_used_map & (1 << i)) {
++buffer_pool->frame_bufs[i].ref_count;
}
@@ -4784,7 +4784,7 @@
// corresponding to frames in a parallel encode set.
void av1_decrement_ref_counts_fpmt(BufferPool *buffer_pool,
int ref_buffers_used_map) {
- for (int i = 0; i < FRAME_BUFFERS; ++i) {
+ for (int i = 0; i < buffer_pool->num_frame_bufs; ++i) {
if (ref_buffers_used_map & (1 << i)) {
--buffer_pool->frame_bufs[i].ref_count;
}