Allocate scaled source buffers on the fly
The scaled source buffers are allocated/reallocated on the fly
based on the target dimensions.
For AVIF image encode with speed = 9,
HEAP Memory reduction(%)
Resolution threads=1 threads=4
640x360 4.49 2.71
768x512 4.26 2.89
832x480 4.49 3.12
1280x720 4.51 3.59
For threads=4, an average encode time reduction of ~1.12% is
observed for 360p-720p resolutions.
HEAP memory reduction was measured using the following command.
$valgrind --tool=massif ./avifenc ...
Change-Id: I7faf035207ffa9dfab5d57ff8db4ae70b8534d6c
diff --git a/aom_scale/generic/yv12config.c b/aom_scale/generic/yv12config.c
index 6ddc82f..dedfc02 100644
--- a/aom_scale/generic/yv12config.c
+++ b/aom_scale/generic/yv12config.c
@@ -185,7 +185,7 @@
* between planes, which would break the semantics of things like
* aom_img_set_rect(). */
if (border & 0x1f) return AOM_CODEC_MEM_ERROR;
- *y_stride = ((aligned_width + 2 * border) + 31) & ~31;
+ *y_stride = aom_calc_y_stride(aligned_width, border);
*yplane_size =
(aligned_height + 2 * border) * (uint64_t)(*y_stride) + byte_alignment;
diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index c0e0361..31af69c 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h
@@ -167,6 +167,18 @@
int aom_copy_metadata_to_frame_buffer(YV12_BUFFER_CONFIG *ybf,
const aom_metadata_array_t *arr);
+/*!\brief Calculate the stride required for the image.
+ *
+ * Calculates the stride value for an image from aligned width and border.
+ * Returns the y stride value.
+ *
+ * \param[in] aligned_width Aligned width of the image
+ * \param[in] border Border in pixels
+ */
+static AOM_INLINE int aom_calc_y_stride(int aligned_width, int border) {
+ return ((aligned_width + 2 * border) + 31) & ~31;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/av1/common/resize.c b/av1/common/resize.c
index 112a08a..a3c3c0e 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c
@@ -1338,21 +1338,34 @@
aom_extend_frame_borders(dst, num_planes);
}
-YV12_BUFFER_CONFIG *av1_scale_if_required(
+YV12_BUFFER_CONFIG *av1_realloc_and_scale_if_required(
AV1_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
const InterpFilter filter, const int phase, const bool use_optimized_scaler,
- const bool for_psnr) {
+ const bool for_psnr, const int border_in_pixels,
+ const bool alloc_y_buffer_8bit) {
// If scaling is performed for the sole purpose of calculating PSNR, then our
// target dimensions are superres upscaled width/height. Otherwise our target
// dimensions are coded width/height.
- const bool scaling_required =
- for_psnr ? (cm->superres_upscaled_width != unscaled->y_crop_width ||
- cm->superres_upscaled_height != unscaled->y_crop_height)
- : (cm->width != unscaled->y_crop_width ||
- cm->height != unscaled->y_crop_height);
+ const int scaled_width = for_psnr ? cm->superres_upscaled_width : cm->width;
+ const int scaled_height =
+ for_psnr ? cm->superres_upscaled_height : cm->height;
+ const bool scaling_required = (scaled_width != unscaled->y_crop_width) ||
+ (scaled_height != unscaled->y_crop_height);
if (scaling_required) {
const int num_planes = av1_num_planes(cm);
+ const SequenceHeader *seq_params = cm->seq_params;
+
+ // Reallocate the frame buffer based on the target dimensions when scaling
+ // is required.
+ if (aom_realloc_frame_buffer(
+ scaled, scaled_width, scaled_height, seq_params->subsampling_x,
+ seq_params->subsampling_y, seq_params->use_highbitdepth,
+ border_in_pixels, cm->features.byte_alignment, NULL, NULL, NULL,
+ alloc_y_buffer_8bit))
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+ "Failed to allocate scaled buffer");
+
#if CONFIG_AV1_HIGHBITDEPTH
if (use_optimized_scaler && cm->seq_params->bit_depth == AOM_BITS_8) {
av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
diff --git a/av1/common/resize.h b/av1/common/resize.h
index b08de80..75abe62 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h
@@ -71,10 +71,11 @@
const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
-YV12_BUFFER_CONFIG *av1_scale_if_required(
+YV12_BUFFER_CONFIG *av1_realloc_and_scale_if_required(
AV1_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
const InterpFilter filter, const int phase, const bool use_optimized_scaler,
- const bool for_psnr);
+ const bool for_psnr, const int border_in_pixels,
+ const bool alloc_y_buffer_8bit);
void av1_resize_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst, int bd,
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 6dda945..547c71e 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1067,9 +1067,10 @@
// Set frame_input source to true source for psnr calculation.
if (apply_filtering && is_psnr_calc_enabled(cpi)) {
- cpi->source =
- av1_scale_if_required(cm, source_buffer, &cpi->scaled_source,
- cm->features.interp_filter, 0, false, true);
+ cpi->source = av1_realloc_and_scale_if_required(
+ cm, source_buffer, &cpi->scaled_source, cm->features.interp_filter, 0,
+ false, true, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
cpi->unscaled_source = source_buffer;
}
#if CONFIG_COLLECT_COMPONENT_TIMING
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index d6a60db..7c4d1e5 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1878,14 +1878,16 @@
static void init_motion_estimation(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
- const int y_stride = cpi->scaled_source.y_stride;
+ const int aligned_width = (cm->width + 7) & ~7;
+ const int y_stride =
+ aom_calc_y_stride(aligned_width, cpi->oxcf.border_in_pixels);
const int y_stride_src = ((cpi->oxcf.frm_dim_cfg.width != cm->width ||
cpi->oxcf.frm_dim_cfg.height != cm->height) ||
av1_superres_scaled(cm))
? y_stride
: cpi->ppi->lookahead->buf->img.y_stride;
- int fpf_y_stride = cm->cur_frame != NULL ? cm->cur_frame->buf.y_stride
- : cpi->scaled_source.y_stride;
+ int fpf_y_stride =
+ cm->cur_frame != NULL ? cm->cur_frame->buf.y_stride : y_stride;
// Update if search_site_cfg is uninitialized or the current frame has a new
// stride
@@ -2331,8 +2333,10 @@
}
#endif
- cpi->source = av1_scale_if_required(cm, unscaled, &cpi->scaled_source,
- filter_scaler, phase_scaler, true, false);
+ cpi->source = av1_realloc_and_scale_if_required(
+ cm, unscaled, &cpi->scaled_source, filter_scaler, phase_scaler, true,
+ false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
if (frame_is_intra_only(cm) || resize_pending != 0) {
memset(cpi->consec_zero_mv, 0,
((cm->mi_params.mi_rows * cm->mi_params.mi_cols) >> 2) *
@@ -2340,9 +2344,10 @@
}
if (cpi->unscaled_last_source != NULL) {
- cpi->last_source = av1_scale_if_required(
+ cpi->last_source = av1_realloc_and_scale_if_required(
cm, cpi->unscaled_last_source, &cpi->scaled_last_source, filter_scaler,
- phase_scaler, true, false);
+ phase_scaler, true, false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
}
if (cpi->sf.rt_sf.use_temporal_noise_estimate) {
@@ -2567,9 +2572,10 @@
gm_info->search_done = 0;
}
}
- cpi->source =
- av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source,
- EIGHTTAP_REGULAR, 0, false, false);
+ cpi->source = av1_realloc_and_scale_if_required(
+ cm, cpi->unscaled_source, &cpi->scaled_source, EIGHTTAP_REGULAR, 0,
+ false, false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
#if CONFIG_TUNE_BUTTERAUGLI
if (oxcf->tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
@@ -2586,9 +2592,10 @@
#endif
if (cpi->unscaled_last_source != NULL) {
- cpi->last_source = av1_scale_if_required(
+ cpi->last_source = av1_realloc_and_scale_if_required(
cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
- EIGHTTAP_REGULAR, 0, false, false);
+ EIGHTTAP_REGULAR, 0, false, false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
}
#if CONFIG_FRAME_PARALLEL_ENCODE
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 0d43a98..8f5c3a8 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -372,31 +372,6 @@
aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate trial restored frame buffer");
}
-
- if (aom_realloc_frame_buffer(
- &cpi->scaled_source, cm->width, cm->height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- cpi->oxcf.border_in_pixels, byte_alignment, NULL, NULL, NULL,
- cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate scaled source buffer");
-
- // The frame buffer cpi->scaled_last_source is used to hold the previous
- // source frame information. As the previous source frame buffer allocation in
- // the lookahead queue is avoided for all-intra frame encoding,
- // cpi->unscaled_last_source will be NULL in such cases. As
- // cpi->unscaled_last_source is NULL, cpi->scaled_last_source will not be used
- // for all-intra frame encoding. Hence, the buffer is allocated conditionally.
- if (cpi->oxcf.kf_cfg.key_freq_max > 0) {
- if (aom_realloc_frame_buffer(
- &cpi->scaled_last_source, cm->width, cm->height,
- seq_params->subsampling_x, seq_params->subsampling_y,
- seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
- byte_alignment, NULL, NULL, NULL,
- cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate scaled last source buffer");
- }
}
static AOM_INLINE YV12_BUFFER_CONFIG *realloc_and_scale_source(
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 66cd272..3892e86 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -979,13 +979,15 @@
// Setup necessary params for encoding, including frame source, etc.
- cpi->source =
- av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source,
- cm->features.interp_filter, 0, false, false);
+ cpi->source = av1_realloc_and_scale_if_required(
+ cm, cpi->unscaled_source, &cpi->scaled_source, cm->features.interp_filter,
+ 0, false, false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
if (cpi->unscaled_last_source != NULL) {
- cpi->last_source = av1_scale_if_required(
+ cpi->last_source = av1_realloc_and_scale_if_required(
cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
- cm->features.interp_filter, 0, false, false);
+ cm->features.interp_filter, 0, false, false, cpi->oxcf.border_in_pixels,
+ cpi->oxcf.tool_cfg.enable_global_motion);
}
av1_setup_frame(cpi);