Document buffers in MACROBLOCK
This CL adds a brief description for the buffers in MACROBLOCK. In
addition, the four buffers used to speed up obmc search are refactored to
a new struct called OBMCBuffer.
BUG=aoemdia:2618
Change-Id: Ibb9e57c359504c81af9c6aa50ccf880901f7c5c3
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9d4c6b2..87456f5 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -224,6 +224,21 @@
uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask
} CompoundTypeRdBuffers;
+// Struct for buffers used to speed up rdopt for obmc.
+// See the comments for calc_target_weighted_pred for details.
+typedef struct {
+ // A new source weighted with the above and left predictors for efficient
+ // rdopt in obmc mode.
+ int32_t *wsrc;
+ // A new mask constructed from the original left and horizontal masks for
+ // fast obmc rdopt.
+ int32_t *mask;
+ // Holds a prediction using the above/left predictor. This is used to build
+ // the obmc predictor.
+ uint8_t *above_pred;
+ uint8_t *left_pred;
+} OBMCBuffer;
+
typedef struct {
// A multiplier that converts mv cost to l2 error.
int errorperbit;
@@ -308,14 +323,13 @@
int pred_mv_sad[REF_FRAMES];
int best_pred_mv_sad;
- int32_t *wsrc_buf;
- int32_t *mask_buf;
- uint8_t *above_pred_buf;
- uint8_t *left_pred_buf;
-
+ // Buffers used to hold/create predictions during rdopt
+ OBMCBuffer obmc_buffer;
PALETTE_BUFFER *palette_buffer;
CompoundTypeRdBuffers comp_rd_buffer;
+ // A buffer used for convolution during the averaging prediction in compound
+ // mode.
CONV_BUF_TYPE *tmp_conv_dst;
// Points to a buffer that is used to hold temporary prediction results. This
@@ -323,7 +337,7 @@
// 1. This is a temporary buffer used to pingpong the prediction in
// handle_inter_mode.
// 2. xd->tmp_obmc_bufs also points to this buffer, and is used in ombc
- // prediction.
+ // prediction.
uint8_t *tmp_pred_bufs[2];
FRAME_CONTEXT *row_ctx;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 23a6c80..ebfe6cf 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -834,14 +834,7 @@
cpi->vmaf_info.rdmult_scaling_factors = NULL;
#endif
- aom_free(cpi->td.mb.above_pred_buf);
- cpi->td.mb.above_pred_buf = NULL;
-
- aom_free(cpi->td.mb.left_pred_buf);
- cpi->td.mb.left_pred_buf = NULL;
-
- aom_free(cpi->td.mb.wsrc_buf);
- cpi->td.mb.wsrc_buf = NULL;
+ av1_release_obmc_buffers(&cpi->td.mb.obmc_buffer);
aom_free(cpi->td.mb.inter_modes_info);
cpi->td.mb.inter_modes_info = NULL;
@@ -851,8 +844,6 @@
aom_free(cpi->td.mb.intrabc_hash_info.hash_value_buffer[i][j]);
cpi->td.mb.intrabc_hash_info.hash_value_buffer[i][j] = NULL;
}
- aom_free(cpi->td.mb.mask_buf);
- cpi->td.mb.mask_buf = NULL;
aom_free(cm->tpl_mvs);
cm->tpl_mvs = NULL;
@@ -2689,6 +2680,35 @@
*block_mis_log2 = is_720p_or_larger ? 2 : 1;
}
+void av1_alloc_obmc_buffers(OBMCBuffer *obmc_buffer, AV1_COMMON *cm) {
+ CHECK_MEM_ERROR(
+ cm, obmc_buffer->wsrc,
+ (int32_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*obmc_buffer->wsrc)));
+ CHECK_MEM_ERROR(
+ cm, obmc_buffer->mask,
+ (int32_t *)aom_memalign(16, MAX_SB_SQUARE * sizeof(*obmc_buffer->mask)));
+ CHECK_MEM_ERROR(
+ cm, obmc_buffer->above_pred,
+ (uint8_t *)aom_memalign(
+ 16, MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*obmc_buffer->above_pred)));
+ CHECK_MEM_ERROR(
+ cm, obmc_buffer->left_pred,
+ (uint8_t *)aom_memalign(
+ 16, MAX_MB_PLANE * MAX_SB_SQUARE * sizeof(*obmc_buffer->left_pred)));
+}
+
+void av1_release_obmc_buffers(OBMCBuffer *obmc_buffer) {
+ aom_free(obmc_buffer->mask);
+ aom_free(obmc_buffer->above_pred);
+ aom_free(obmc_buffer->left_pred);
+ aom_free(obmc_buffer->wsrc);
+
+ obmc_buffer->mask = NULL;
+ obmc_buffer->above_pred = NULL;
+ obmc_buffer->left_pred = NULL;
+ obmc_buffer->wsrc = NULL;
+}
+
void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
CompoundTypeRdBuffers *const bufs) {
CHECK_MEM_ERROR(
@@ -3164,18 +3184,7 @@
int sb_mi_size = av1_get_sb_mi_size(cm);
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.above_pred_buf,
- (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*cpi->td.mb.above_pred_buf)));
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.left_pred_buf,
- (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*cpi->td.mb.left_pred_buf)));
-
- CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf)));
+ av1_alloc_obmc_buffers(&cpi->td.mb.obmc_buffer, cm);
CHECK_MEM_ERROR(
cm, cpi->td.mb.inter_modes_info,
@@ -3191,10 +3200,6 @@
cpi->td.mb.intrabc_hash_info.g_crc_initialized = 0;
- CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
-
CHECK_MEM_ERROR(cm, cpi->td.mb.mbmi_ext,
aom_calloc(sb_mi_size, sizeof(*cpi->td.mb.mbmi_ext)));
@@ -3608,9 +3613,7 @@
for (int j = 0; j < 2; ++j) {
aom_free(thread_data->td->tmp_pred_bufs[j]);
}
- aom_free(thread_data->td->above_pred_buf);
- aom_free(thread_data->td->left_pred_buf);
- aom_free(thread_data->td->wsrc_buf);
+ av1_release_obmc_buffers(&thread_data->td->obmc_buffer);
aom_free(thread_data->td->vt64x64);
aom_free(thread_data->td->inter_modes_info);
@@ -3620,7 +3623,6 @@
thread_data->td->hash_value_buffer[x][y] = NULL;
}
}
- aom_free(thread_data->td->mask_buf);
aom_free(thread_data->td->counts);
aom_free(thread_data->td->mbmi_ext);
av1_free_shared_coeff_buffer(&thread_data->td->shared_coeff_buf);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index da7e6d2..b93ade3 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -750,10 +750,7 @@
SIMPLE_MOTION_DATA_TREE *sms_root;
InterModesInfo *inter_modes_info;
uint32_t *hash_value_buffer[2][2];
- int32_t *wsrc_buf;
- int32_t *mask_buf;
- uint8_t *above_pred_buf;
- uint8_t *left_pred_buf;
+ OBMCBuffer obmc_buffer;
PALETTE_BUFFER *palette_buffer;
CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
@@ -1674,6 +1671,8 @@
int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
+void av1_alloc_obmc_buffers(OBMCBuffer *obmc_buffer, AV1_COMMON *cm);
+void av1_release_obmc_buffers(OBMCBuffer *obmc_buffer);
void av1_alloc_compound_type_rd_buffers(AV1_COMMON *const cm,
CompoundTypeRdBuffers *const bufs);
void av1_release_compound_type_rd_buffers(CompoundTypeRdBuffers *const bufs);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6c36894..09039af 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -485,19 +485,7 @@
av1_setup_sms_tree(cpi, thread_data->td);
av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
- CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
- (uint8_t *)aom_memalign(
- 16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->above_pred_buf)));
- CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
- (uint8_t *)aom_memalign(
- 16, MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->left_pred_buf)));
-
- CHECK_MEM_ERROR(
- cm, thread_data->td->wsrc_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
+ av1_alloc_obmc_buffers(&thread_data->td->obmc_buffer, cm);
CHECK_MEM_ERROR(cm, thread_data->td->inter_modes_info,
(InterModesInfo *)aom_malloc(
@@ -511,10 +499,6 @@
AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
sizeof(*thread_data->td->hash_value_buffer[0][0])));
- CHECK_MEM_ERROR(
- cm, thread_data->td->mask_buf,
- (int32_t *)aom_memalign(
- 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
// Allocate frame counters in thread data.
CHECK_MEM_ERROR(cm, thread_data->td->counts,
aom_calloc(1, sizeof(*thread_data->td->counts)));
@@ -637,9 +621,7 @@
if (thread_data->td != &cpi->td) {
thread_data->td->mb = cpi->td.mb;
thread_data->td->rd_counts = cpi->td.rd_counts;
- thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
- thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
- thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
+ thread_data->td->mb.obmc_buffer = thread_data->td->obmc_buffer;
thread_data->td->mb.inter_modes_info = thread_data->td->inter_modes_info;
for (int x = 0; x < 2; x++) {
@@ -652,7 +634,6 @@
thread_data->td->hash_value_buffer[x][y];
}
}
- thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
thread_data->td->mb.mbmi_ext = thread_data->td->mbmi_ext;
}
if (thread_data->td->counts != &cpi->counts) {
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index ffe014c..8827486 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -51,8 +51,8 @@
av1_set_ms_compound_refs(ms_buffers, NULL, NULL, 0, 0);
- ms_buffers->wsrc = x->wsrc_buf;
- ms_buffers->obmc_mask = x->mask_buf;
+ ms_buffers->wsrc = x->obmc_buffer.wsrc;
+ ms_buffers->obmc_mask = x->obmc_buffer.mask;
}
void av1_make_default_fullpel_ms_params(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fd30d46..6496493 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3401,28 +3401,28 @@
~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
}
-static AOM_INLINE void init_pred_buf(const MACROBLOCK *const x,
- HandleInterModeArgs *const args) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- if (is_cur_buf_hbd(xd)) {
+static AOM_INLINE void init_neighbor_pred_buf(
+ const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
+ int is_hbd) {
+ if (is_hbd) {
const int len = sizeof(uint16_t);
- args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
- args->above_pred_buf[1] =
- CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+ args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
+ args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
+ (MAX_SB_SQUARE >> 1) * len);
args->above_pred_buf[2] =
- CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
- args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
+ CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
+ args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
args->left_pred_buf[1] =
- CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len);
+ CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
args->left_pred_buf[2] =
- CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
+ CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
} else {
- args->above_pred_buf[0] = x->above_pred_buf;
- args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1);
- args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE;
- args->left_pred_buf[0] = x->left_pred_buf;
- args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1);
- args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE;
+ args->above_pred_buf[0] = obmc_buffer->above_pred;
+ args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
+ args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
+ args->left_pred_buf[0] = obmc_buffer->left_pred;
+ args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
+ args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
}
}
@@ -3439,7 +3439,7 @@
MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
unsigned char segment_id = mbmi->segment_id;
- init_pred_buf(x, args);
+ init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
av1_collect_neighbors_ref_counts(xd);
estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
ref_costs_comp);
@@ -5225,7 +5225,7 @@
}
struct calc_target_weighted_pred_ctxt {
- const MACROBLOCK *x;
+ const OBMCBuffer *obmc_buffer;
const uint8_t *tmp;
int tmp_stride;
int overlap;
@@ -5245,8 +5245,8 @@
const int bw = xd->width << MI_SIZE_LOG2;
const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
- int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
- int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
+ int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
+ int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
const int is_hbd = is_cur_buf_hbd(xd);
@@ -5293,8 +5293,8 @@
const int bw = xd->width << MI_SIZE_LOG2;
const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
- int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
- int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
+ int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
+ int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
const int is_hbd = is_cur_buf_hbd(xd);
@@ -5374,8 +5374,9 @@
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const int bw = xd->width << MI_SIZE_LOG2;
const int bh = xd->height << MI_SIZE_LOG2;
- int32_t *mask_buf = x->mask_buf;
- int32_t *wsrc_buf = x->wsrc_buf;
+ const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
+ int32_t *mask_buf = obmc_buffer->mask;
+ int32_t *wsrc_buf = obmc_buffer->wsrc;
const int is_hbd = is_cur_buf_hbd(xd);
const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
@@ -5391,8 +5392,8 @@
if (xd->up_available) {
const int overlap =
AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
- struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
- overlap };
+ struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
+ above_stride, overlap };
foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
max_neighbor_obmc[mi_size_wide_log2[bsize]],
calc_target_weighted_pred_above, &ctxt);
@@ -5407,8 +5408,8 @@
if (xd->left_available) {
const int overlap =
AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
- struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
- overlap };
+ struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
+ left_stride, overlap };
foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
max_neighbor_obmc[mi_size_high_log2[bsize]],
calc_target_weighted_pred_left, &ctxt);