Allocate mbmi_ext at BLOCK_8X8 level for 4k+ videos
Previously mbmi_ext is dynamically allocated at the beginning for each
BLOCK_4X4. This change makes the allocation resolution dependent, so it
is allocated for each BLOCK_8X8 instead.
Memory Reduction:
About 1GB for 4K videos, which is about 25% of heap memory.
BUG=aomedia:2453
Change-Id: Ic38349eb19adccbe2bf3355db60cb02f2116272c
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 27c3510..0507f23 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -1275,8 +1275,11 @@
AV1_COMMON *const cm = &cpi->common;
const MB_MODE_INFO *const *mbmi =
*(cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col));
+ const int mi_alloc_size_1d = cpi->mi_alloc_size_1d;
+ const int mi_alloc_row = (mi_row + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
+ const int mi_alloc_col = (mi_col + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
const MB_MODE_INFO_EXT *const *mbmi_ext =
- cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+ cpi->mbmi_ext_base + (mi_alloc_row * cpi->mi_alloc_cols + mi_alloc_col);
if (is_inter_block(mbmi)) {
#define FRAME_TO_CHECK 11
if (cm->current_frame.frame_number == FRAME_TO_CHECK &&
@@ -1462,7 +1465,12 @@
const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
xd->mi = cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col);
- cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+
+ const int mi_alloc_size_1d = cpi->mi_alloc_size_1d;
+ const int mi_alloc_row = (mi_row + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
+ const int mi_alloc_col = (mi_col + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
+ cpi->td.mb.mbmi_ext =
+ cpi->mbmi_ext_base + (mi_alloc_row * cpi->mi_alloc_cols + mi_alloc_col);
const MB_MODE_INFO *mbmi = xd->mi[0];
const BLOCK_SIZE bsize = mbmi->sb_type;
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 4c4c712..e139c5b 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1054,8 +1054,8 @@
av1_init_context_buffers(cm);
setup_mi(cpi, frame_input->source);
av1_init_macroblockd(cm, xd, NULL);
- memset(cpi->mbmi_ext_base, 0,
- cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
+ const int alloc_mi_size = cpi->mi_alloc_rows * cpi->mi_alloc_cols;
+ memset(cpi->mbmi_ext_base, 0, alloc_mi_size * sizeof(*cpi->mbmi_ext_base));
av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index aed8250..20368d8 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -13,6 +13,7 @@
#include <math.h>
#include <stdio.h>
+#include "av1/common/enums.h"
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "config/aom_scale_rtcd.h"
@@ -424,11 +425,23 @@
static void alloc_context_buffers_ext(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
- int mi_size = cm->mi_cols * cm->mi_rows;
+ const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
+
+ cpi->mi_alloc_bsize = is_4k_or_larger ? BLOCK_8X8 : BLOCK_4X4;
+ cpi->mi_alloc_size_1d = mi_size_wide[cpi->mi_alloc_bsize];
+ cpi->mi_alloc_rows =
+ (cm->mi_rows + cpi->mi_alloc_size_1d - 1) / cpi->mi_alloc_size_1d;
+ cpi->mi_alloc_cols =
+ (cm->mi_cols + cpi->mi_alloc_size_1d - 1) / cpi->mi_alloc_size_1d;
+
+ assert(mi_size_wide[cpi->mi_alloc_bsize] ==
+ mi_size_high[cpi->mi_alloc_bsize]);
+
+ const int alloc_mi_size = cpi->mi_alloc_rows * cpi->mi_alloc_cols;
dealloc_context_buffers_ext(cpi);
CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
- aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
+ aom_calloc(alloc_mi_size, sizeof(*cpi->mbmi_ext_base)));
}
static void reset_film_grain_chroma_params(aom_film_grain_t *pars) {
@@ -932,8 +945,9 @@
av1_set_mb_mi(cm, cm->width, cm->height);
av1_init_context_buffers(cm);
av1_init_macroblockd(cm, xd, NULL);
- memset(cpi->mbmi_ext_base, 0,
- cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
+
+ const int alloc_mi_size = cpi->mi_alloc_rows * cpi->mi_alloc_cols;
+ memset(cpi->mbmi_ext_base, 0, alloc_mi_size * sizeof(*cpi->mbmi_ext_base));
set_tile_info(cpi);
}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index b9db4ed..514d536 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -768,6 +768,12 @@
struct lookahead_entry *alt_ref_source;
int no_show_kf;
+ // The minimum size each allocateed mi_ext can correspond to. Currently set to
+ // BLOCK_4X4 for resolution below 4k, and BLOCK_8X8 for resolution above 4k
+ BLOCK_SIZE mi_alloc_bsize;
+ int mi_alloc_size_1d; // Number of 4x4 blocks in an allocated mi_ext
+ int mi_alloc_rows, mi_alloc_cols;
+
int optimize_seg_arr[MAX_SEGMENTS];
YV12_BUFFER_CONFIG *source;
@@ -1372,7 +1378,12 @@
const int idx_str = xd->mi_stride * mi_row + mi_col;
xd->mi = cm->mi_grid_base + idx_str;
xd->mi[0] = cm->mi + idx_str;
- x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+
+ const int mi_alloc_size_1d = cpi->mi_alloc_size_1d;
+ const int mi_alloc_row = (mi_row + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
+ const int mi_alloc_col = (mi_col + mi_alloc_size_1d - 1) / mi_alloc_size_1d;
+ x->mbmi_ext =
+ cpi->mbmi_ext_base + (mi_alloc_row * cpi->mi_alloc_cols + mi_alloc_col);
}
// Check to see if the given partition size is allowed for a specified number