Refactor av1_cyclic_refresh_postencode() function
This patch reduces post-encode stage overhead by moving
cyclic refresh counters accumulation within encode stage.
Change-Id: I4e06298ce6bffed8b8daad4188007a1b24b22f27
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 3308ffe..c7abe43 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -16,6 +16,7 @@
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/segmentation.h"
+#include "av1/encoder/tokenize.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/system_state.h"
@@ -146,11 +147,13 @@
return bits_per_mb;
}
-void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
- MB_MODE_INFO *const mbmi, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip) {
+void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int64_t rate, int64_t dist, int skip,
+ RUN_TYPE dry_run) {
const AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
@@ -189,12 +192,37 @@
// Update entries in the cyclic refresh map with new_map_value, and
// copy mbmi->segment_id into global segmentation map.
// 8x8 is smallest coding block size for non-key frames.
- for (int y = 0; y < ymis; y += 2)
- for (int x = 0; x < xmis; x += 2) {
- int map_offset = block_index + y * cm->mi_params.mi_cols + x;
+ const int sh = bw << 1;
+ for (int mi_y = 0; mi_y < ymis; mi_y += 2) {
+ for (int mi_x = 0; mi_x < xmis; mi_x += 2) {
+ int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = new_map_value;
cpi->enc_seg.map[map_offset] = mbmi->segment_id;
}
+ // Accumulate cyclic refresh update counters.
+ if (!dry_run && !frame_is_intra_only(cm)) {
+ if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
+ x->actual_num_seg1_blocks += sh;
+ else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
+ CR_SEGMENT_ID_BOOST2)
+ x->actual_num_seg2_blocks += sh;
+ }
+ }
+}
+
+// Initializes counters used for cyclic refresh.
+void av1_init_cyclic_refresh_counters(MACROBLOCK *const x) {
+ x->actual_num_seg1_blocks = 0;
+ x->actual_num_seg2_blocks = 0;
+ x->cnt_zeromv = 0;
+}
+
+// Accumulate cyclic refresh counters.
+void av1_accumulate_cyclic_refresh_counters(
+ CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x) {
+ cyclic_refresh->actual_num_seg1_blocks += x->actual_num_seg1_blocks;
+ cyclic_refresh->actual_num_seg2_blocks += x->actual_num_seg2_blocks;
+ cyclic_refresh->cnt_zeromv += x->cnt_zeromv;
}
void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
@@ -203,41 +231,15 @@
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
RATE_CONTROL *const rc = &cpi->rc;
SVC *const svc = &cpi->svc;
- unsigned char *const seg_map = cpi->enc_seg.map;
- int cnt_zeromv = 0;
- cr->actual_num_seg1_blocks = 0;
- cr->actual_num_seg2_blocks = 0;
- // 8X8 blocks are smallest partition used on delta frames.
- for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += 2) {
- MB_MODE_INFO **mi = mi_params->mi_grid_base + mi_row * mi_params->mi_stride;
- int sh = 2;
- for (int mi_col = 0; mi_col < mi_params->mi_cols; mi_col += sh) {
- sh = mi_size_wide[mi[0]->bsize];
- MV mv = mi[0]->mv[0].as_mv;
- if (cm->seg.enabled) {
- int map_index = mi_row * mi_params->mi_cols + mi_col;
- if (cyclic_refresh_segment_id(seg_map[map_index]) ==
- CR_SEGMENT_ID_BOOST1)
- cr->actual_num_seg1_blocks += sh << 1;
- else if (cyclic_refresh_segment_id(seg_map[map_index]) ==
- CR_SEGMENT_ID_BOOST2)
- cr->actual_num_seg2_blocks += sh << 1;
- }
- // Accumulate low_content_frame.
- if (is_inter_block(mi[0]) && mi[0]->ref_frame[0] == LAST_FRAME &&
- abs(mv.row) < 8 && abs(mv.col) < 8)
- cnt_zeromv += sh << 1;
- if (mi_col + sh < mi_params->mi_cols) {
- mi += sh;
- }
- }
- }
- cnt_zeromv = 100 * cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
+ const int avg_cnt_zeromv =
+ 100 * cr->cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
+
if (!cpi->use_svc ||
(cpi->use_svc &&
!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
- rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) / 4;
+ rc->avg_frame_low_motion =
+ (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
// For SVC: set avg_frame_low_motion (only computed on top spatial layer)
// to all lower spatial layers.
if (cpi->use_svc &&
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index 23d1f16..97bd6f2 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -13,6 +13,8 @@
#define AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
#include "av1/common/blockd.h"
+#include "av1/encoder/block.h"
+#include "av1/encoder/tokenize.h"
#ifdef __cplusplus
extern "C" {
@@ -70,6 +72,10 @@
*/
int rdmult;
/*!
+ * Count of zero motion vectors
+ */
+ int cnt_zeromv;
+ /*!
* Cyclic refresh map.
*/
int8_t *map;
@@ -166,21 +172,59 @@
* \callergraph
*
* \param[in] cpi Top level encoder structure
- * \param[in] mbmi MB_MODE_INFO pointer for mi block
+ * \param[in] x Pointer to MACROBLOCK structure
* \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE
* \param[in] mi_col Col coordinate of the block in a step size of MI_SIZE
* \param[in] bsize Block size
* \param[in] rate Projected block rate from pickmode
* \param[in] dist Projected block dist from pickmode
- * \param[in] skip Skip flag set from picmode
+ * \param[in] skip Skip flag set from picmode
+ * \param[in] dry_run A code indicating whether it is part of the final
+ * pass for reconstructing the superblock
*
* \return Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
* the \c cm->cpi->enc_seg.map.
*/
void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
- MB_MODE_INFO *const mbmi, int mi_row,
+ MACROBLOCK *const x, int mi_row,
int mi_col, BLOCK_SIZE bsize,
- int64_t rate, int64_t dist, int skip);
+ int64_t rate, int64_t dist, int skip,
+ RUN_TYPE dry_run);
+
+/*!\brief Initialize counters used for cyclic refresh.
+ *
+ * Initializes cyclic refresh counters cnt_zeromv, actual_num_seg1_blocks and
+ * actual_num_seg2_blocks.
+ *
+ * \ingroup cyclic_refresh
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in] x Pointer to MACROBLOCK structure
+ *
+ * \return Update the \c x->cnt_zeromv, the \c x->actual_num_seg1_blocks and
+ * the \c x->actual_num_seg1_blocks.
+ */
+void av1_init_cyclic_refresh_counters(MACROBLOCK *const x);
+
+/*!\brief Accumulate cyclic refresh counters.
+ *
+ * Accumulates cyclic refresh counters cnt_zeromv, actual_num_seg1_blocks and
+ * actual_num_seg2_blocks from MACROBLOCK strcture to CYCLIC_REFRESH strcture.
+ *
+ * \ingroup cyclic_refresh
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in] cyclic_refresh Pointer to CYCLIC_REFRESH structure
+ * \param[in] x Pointer to MACROBLOCK structure
+ *
+ * \return Update the \c cyclic_refresh->cnt_zeromv, the \c
+ * cyclic_refresh->actual_num_seg1_blocks and the \c
+ * cyclic_refresh->actual_num_seg1_blocks.
+ */
+void av1_accumulate_cyclic_refresh_counters(
+ CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x);
/*!\brief Update stats after encoding frame.
*
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9993acb..03dae3e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -969,6 +969,22 @@
* set 0 and all txfms are skipped.
*/
int seg_skip_block;
+
+ /*! \brief Number of segment 1 blocks
+ * Actual number of (4x4) blocks that were applied delta-q,
+ * for segment 1.
+ */
+ int actual_num_seg1_blocks;
+
+ /*!\brief Number of segment 2 blocks
+ * Actual number of (4x4) blocks that were applied delta-q,
+ * for segment 2.
+ */
+ int actual_num_seg2_blocks;
+
+ /*!\brief Number of zero motion vectors
+ */
+ int cnt_zeromv;
/**@}*/
/*****************************************************************************
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 345ad12..24d3488 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1048,7 +1048,15 @@
cpi->td.deltaq_used = 0;
cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
+ // Reset cyclic refresh counters.
+ av1_init_cyclic_refresh_counters(&cpi->td.mb);
+
av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
+ // Accumulate cyclic refresh params.
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+ !frame_is_intra_only(&cpi->common))
+ av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
+ &cpi->td.mb);
cpi->intrabc_used |= cpi->td.intrabc_used;
cpi->deltaq_used |= cpi->td.deltaq_used;
}
@@ -1388,6 +1396,10 @@
// base_qindex
cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
+ } else {
+ cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
+ cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
+ cpi->cyclic_refresh->cnt_zeromv = 0;
}
av1_frame_init_quantizer(cpi);
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 6251233..efecc79 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -256,14 +256,26 @@
// Else for cyclic refresh mode update the segment map, set the segment id
// and then update the quantizer.
if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
- av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
+ av1_cyclic_refresh_update_segment(cpi, x, mi_row, mi_col, bsize,
ctx->rd_stats.rate, ctx->rd_stats.dist,
- txfm_info->skip_txfm);
+ txfm_info->skip_txfm, dry_run);
}
if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
mi_addr->uv_mode = UV_DC_PRED;
}
+ // Count zero motion vector.
+ if (!dry_run && cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+ !frame_is_intra_only(cm)) {
+ const MV mv = mi->mv[0].as_mv;
+ if (is_inter_block(mi) && mi->ref_frame[0] == LAST_FRAME &&
+ abs(mv.row) < 8 && abs(mv.col) < 8) {
+ const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
+ // Accumulate low_content_frame.
+ for (int mi_y = 0; mi_y < ymis; mi_y += 2) x->cnt_zeromv += bw << 1;
+ }
+ }
+
for (i = 0; i < num_planes; ++i) {
p[i].coeff = ctx->coeff[i];
p[i].qcoeff = ctx->qcoeff[i];
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 0a9325c..3735ca3 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -775,6 +775,11 @@
EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
cpi->intrabc_used |= thread_data->td->intrabc_used;
cpi->deltaq_used |= thread_data->td->deltaq_used;
+ // Accumulate cyclic refresh params.
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+ !frame_is_intra_only(&cpi->common))
+ av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
+ &thread_data->td->mb);
if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
thread_data->td->mb.txfm_search_info.txb_rd_records = NULL;
@@ -842,6 +847,9 @@
sizeof(MvCosts));
}
}
+ // Reset cyclic refresh counters.
+ av1_init_cyclic_refresh_counters(&thread_data->td->mb);
+
if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
(TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));