rtc-screen : Force zeromv-skip at block level
In parent version, zeromv-skip decision was based on the SAD
metric calculated w.r.t. LAST_FRAME recon at superblock
level. In this CL, the decision is extended to block level
by recalculating SAD w.r.t. recon LAST_FRAME. This CL helps
in speed-up by forcing zeromv-skip for the blocks in a
superblock that have lower recon SAD. The decision is also
extended to the blocks in partial superblocks at frame
boundaries.
Instruction Count BD-Rate Loss(%)
cpu Reduction(%) avg.psnr ovr.psnr ssim
9 1.398 -1.5024 -0.1591 -0.8699
STATS_CHANGED for rtc-screen speed 9
Change-Id: I5be439fd52b293c78c7ba172d446fb2882cfccbe
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index fe46aec..7a35256 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1017,9 +1017,16 @@
*/
int cnt_zeromv;
- /*!\brief Flag to force zeromv-skip block, for nonrd path.
+ /*!\brief Flag to force zeromv-skip at superblock level, for nonrd path.
+ *
+ * 0/1 imply zeromv-skip is disabled/enabled. 2 implies that the blocks
+ * in the superblock may be marked as zeromv-skip at block level.
*/
- int force_zeromv_skip;
+ int force_zeromv_skip_for_sb;
+
+ /*!\brief Flag to force zeromv-skip at block level, for nonrd path.
+ */
+ int force_zeromv_skip_for_blk;
/*! \brief Previous segment id for which qmatrices were updated.
* This is used to bypass setting of qmatrices if no change in qindex.
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 857e069..e2381cd 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -912,7 +912,7 @@
x->content_state_sb.source_sad_rd = kMedSad;
x->content_state_sb.lighting_change = 0;
x->content_state_sb.low_sumdiff = 0;
- x->force_zeromv_skip = 0;
+ x->force_zeromv_skip_for_sb = 0;
if (cpi->oxcf.mode == ALLINTRA) {
x->intra_sb_rdmult_modifier = 128;
@@ -1387,6 +1387,43 @@
#endif // !CONFIG_REALTIME_ONLY
}
+#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
+#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
+
+// Populates block level thresholds for force zeromv-skip decision
+static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
+ if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
+
+ // Threshold for forcing zeromv-skip decision is as below:
+ // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
+ // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
+ // allowing slightly higher error for smaller blocks.
+ // Per Pixel Threshold of 64x64 block Area of 64x64 block 1 1
+ // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
+ // Per Pixel Threshold of 128x128 block Area of 128x128 block 4 2
+ // Thus, per pixel thresholds for blocks of size 32x32, 16x16,... can be
+ // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
+ // small blocks, the same is clipped to 4.
+ const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
+ const int num_128x128_pix =
+ block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
+
+ for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
+ const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
+
+ // Calculate the threshold for zeromv-skip decision based on area of the
+ // partition
+ unsigned int thresh_exit_part_blk =
+ (unsigned int)(thresh_exit_128x128_part *
+ sqrt((double)num_block_pix / num_128x128_pix) +
+ 0.5);
+ thresh_exit_part_blk = AOMMIN(
+ thresh_exit_part_blk,
+ (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
+ cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
+ }
+}
+
/*!\brief Encoder setup(only for the current frame), encoding, and recontruction
* for a single frame
*
@@ -1650,6 +1687,7 @@
// has to be called after 'skip_mode_flag' is initialized.
av1_initialize_rd_consts(cpi);
av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
+ populate_thresh_to_force_zeromv_skip(cpi);
enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index a5a83f4..f35843f 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -3411,6 +3411,11 @@
* Struct for the reference structure for RTC.
*/
RTC_REF rtc_ref;
+
+ /*!
+ * Block level thresholds to force zeromv-skip at partition level.
+ */
+ unsigned int zeromv_skip_thresh_exit_part[BLOCK_SIZES_ALL];
} AV1_COMP;
/*!
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 9eaa578..73b8a95 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -630,7 +630,7 @@
TX_SIZE tx_size;
int k;
- if (x->force_zeromv_skip) {
+ if (x->force_zeromv_skip_for_blk) {
*early_term = 1;
rd_stats->rate = 0;
rd_stats->dist = 0;
@@ -785,7 +785,14 @@
static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
RD_STATS *rd_stats, unsigned int *var_out,
- int calculate_rd) {
+ int calculate_rd, int *early_term) {
+ if (x->force_zeromv_skip_for_blk && early_term != NULL) {
+ *early_term = 1;
+ rd_stats->rate = 0;
+ rd_stats->dist = 0;
+ rd_stats->sse = 0;
+ }
+
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
@@ -1542,7 +1549,7 @@
xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
unsigned int var;
- model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 1);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 1, NULL);
const int16_t mode_ctx =
av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
@@ -1654,7 +1661,7 @@
&pf_rd_stats[i], this_early_term, 1, best_sse,
&curr_var, UINT_MAX);
else
- model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], &curr_var, 1);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], &curr_var, 1, NULL);
pf_rd_stats[i].rate += av1_get_switchable_rate(
x, xd, cm->features.interp_filter, cm->seq_params->enable_dual_filter);
cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
@@ -1801,7 +1808,7 @@
&pf_rd_stats[i], this_early_term, 1, best_sse,
NULL, UINT_MAX);
else
- model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
pf_rd_stats[i].rate +=
av1_get_switchable_rate(x, xd, cm->features.interp_filter,
cm->seq_params->enable_dual_filter);
@@ -1864,7 +1871,7 @@
&pf_rd_stats[i], this_early_term, 1,
best_sse, NULL, UINT_MAX);
else
- model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
pf_rd_stats[i].rate +=
mode_costs->motion_mode_cost[bsize][mi->motion_mode];
@@ -2096,7 +2103,7 @@
}
if (use_last_ref_frame &&
- (x->nonrd_prune_ref_frame_search > 2 || x->force_zeromv_skip ||
+ (x->nonrd_prune_ref_frame_search > 2 || x->force_zeromv_skip_for_blk ||
(x->nonrd_prune_ref_frame_search > 1 && bsize > BLOCK_64X64))) {
use_golden_ref_frame = 0;
use_alt_ref_frame = 0;
@@ -2790,7 +2797,7 @@
int use_zeromv =
cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN ||
((cpi->oxcf.speed >= 9 && cpi->rc.avg_frame_low_motion > 70) ||
- cpi->sf.rt_sf.nonrd_agressive_skip || x->force_zeromv_skip);
+ cpi->sf.rt_sf.nonrd_agressive_skip || x->force_zeromv_skip_for_blk);
int skip_pred_mv = 0;
const int num_inter_modes =
use_zeromv ? NUM_INTER_MODES_REDUCED : NUM_INTER_MODES_RT;
@@ -2897,7 +2904,7 @@
get_ref_frame_use_mask(cpi, x, mi, mi_row, mi_col, bsize, gf_temporal_ref,
use_ref_frame_mask, &force_skip_low_temp_var);
- skip_pred_mv = x->force_zeromv_skip ||
+ skip_pred_mv = x->force_zeromv_skip_for_blk ||
(x->nonrd_prune_ref_frame_search > 2 &&
x->color_sensitivity[0] != 2 && x->color_sensitivity[1] != 2);
@@ -3004,7 +3011,7 @@
if (!use_ref_frame_mask[ref_frame]) continue;
- if (x->force_zeromv_skip &&
+ if (x->force_zeromv_skip_for_blk &&
((!(this_mode == NEARESTMV &&
frame_mv[this_mode][ref_frame].as_int == 0) &&
this_mode != GLOBALMV) ||
@@ -3271,7 +3278,8 @@
&this_early_term, 0, best_pickmode.best_sse,
&var, var_threshold);
} else {
- model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 0);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 0,
+ &this_early_term);
}
if (!comp_pred) {
vars[INTER_OFFSET(this_mode)][ref_frame] = var;
@@ -3494,7 +3502,7 @@
ms_stat.num_nonskipped_searches[bsize][DC_PRED]++;
#endif
- if (!x->force_zeromv_skip)
+ if (!x->force_zeromv_skip_for_blk)
estimate_intra_mode(cpi, x, bsize, best_early_term,
ref_costs_single[INTRA_FRAME], reuse_inter_pred,
&orig_dst, tmp, &this_mode_pred, &best_rdc,
@@ -3507,7 +3515,7 @@
// Check for IDTX: based only on Y channel, so avoid when color_sen is set.
if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && !skip_idtx_palette &&
- !cpi->oxcf.txfm_cfg.use_inter_dct_only && !x->force_zeromv_skip &&
+ !cpi->oxcf.txfm_cfg.use_inter_dct_only && !x->force_zeromv_skip_for_blk &&
is_inter_mode(best_pickmode.best_mode) &&
(!cpi->sf.rt_sf.prune_idtx_nonrd ||
(cpi->sf.rt_sf.prune_idtx_nonrd && bsize <= BLOCK_32X32 &&
@@ -3545,7 +3553,7 @@
av1_allow_palette(cpi->common.features.allow_screen_content_tools,
mi->bsize);
try_palette = try_palette && is_mode_intra(best_pickmode.best_mode) &&
- x->source_variance > 0 && !x->force_zeromv_skip &&
+ x->source_variance > 0 && !x->force_zeromv_skip_for_blk &&
(cpi->rc.high_source_sad || x->source_variance > 500);
if (try_palette) {
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 9a7aee3..d704bb4 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -2223,6 +2223,46 @@
cm->seq_params->sb_size, bsize, mi_row, mi_col);
}
+static int get_force_zeromv_skip_flag_for_blk(const AV1_COMP *cpi,
+ const MACROBLOCK *x,
+ BLOCK_SIZE bsize) {
+ // Force zero MV skip based on SB level decision
+ if (x->force_zeromv_skip_for_sb < 2) return x->force_zeromv_skip_for_sb;
+
+ // For blocks of size equal to superblock size, the decision would have been
+ // already done at superblock level. Hence zeromv-skip decision is skipped.
+ const AV1_COMMON *const cm = &cpi->common;
+ if (bsize == cm->seq_params->sb_size) return 0;
+
+ const int num_planes = av1_num_planes(cm);
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const unsigned int thresh_exit_part_y =
+ cpi->zeromv_skip_thresh_exit_part[bsize];
+ const unsigned int thresh_exit_part_uv =
+ CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
+ const unsigned int thresh_exit_part[MAX_MB_PLANE] = { thresh_exit_part_y,
+ thresh_exit_part_uv,
+ thresh_exit_part_uv };
+ const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
+ const struct scale_factors *const sf =
+ get_ref_scale_factors_const(cm, LAST_FRAME);
+
+ struct buf_2d yv12_mb[MAX_MB_PLANE];
+ av1_setup_pred_block(xd, yv12_mb, yv12, sf, sf, num_planes);
+
+ for (int plane = 0; plane < num_planes; ++plane) {
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bs =
+ get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
+ const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf(
+ p->src.buf, p->src.stride, yv12_mb[plane].buf, yv12_mb[plane].stride);
+ assert(plane < MAX_MB_PLANE);
+ if (plane_sad >= thresh_exit_part[plane]) return 0;
+ }
+ return 1;
+}
+
/*!\brief Top level function to pick block mode for non-RD optimized case
*
* \ingroup partition_search
@@ -2291,7 +2331,11 @@
p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
}
for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
- if (!x->force_zeromv_skip) {
+
+ x->force_zeromv_skip_for_blk =
+ get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
+
+ if (!x->force_zeromv_skip_for_blk) {
x->source_variance = av1_get_perpixel_variance_facade(
cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index ed3e9b4..7e57bdc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1406,7 +1406,7 @@
// TODO(marpan): Check settings for speed 7 and 8.
if (speed >= 9) {
sf->rt_sf.prune_idtx_nonrd = 1;
- sf->rt_sf.part_early_exit_zeromv = 1;
+ sf->rt_sf.part_early_exit_zeromv = 2;
sf->rt_sf.skip_lf_screen = 1;
sf->rt_sf.use_nonrd_filter_search = 0;
sf->rt_sf.nonrd_prune_ref_frame_search = 3;
@@ -1421,6 +1421,7 @@
sf->part_sf.disable_8x8_part_based_on_qidx = 1;
sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
+ sf->rt_sf.part_early_exit_zeromv = 1;
}
sf->rt_sf.skip_cdef_sb = 1;
sf->rt_sf.use_rtc_tf = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 1888e23..a740cde 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1550,6 +1550,9 @@
// For nonrd: early exit out of variance partition that sets the
// block size to superblock size, and sets mode to zeromv-last skip.
+ // 0: disabled
+ // 1: zeromv-skip is enabled at SB level only
+ // 2: zeromv-skip is enabled at SB level and coding block level
int part_early_exit_zeromv;
// Early terminate inter mode search based on sse in non-rd path.
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 53d3d2a..235a1d9 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1373,14 +1373,12 @@
chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, is_key_frame, zero_motion,
uv_sad);
- x->force_zeromv_skip = 0;
+ x->force_zeromv_skip_for_sb = 0;
const bool is_set_force_zeromv_skip =
is_set_force_zeromv_skip_based_on_src_sad(
cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
x->content_state_sb.source_sad_nonrd);
- const unsigned int thresh_exit_part =
- (cm->seq_params->sb_size == BLOCK_64X64) ? 5000 : 10000;
// If the superblock is completely static (zero source sad) and
// the y_sad (relative to LAST ref) is very small, take the sb_size partition
// and exit, and force zeromv_last skip mode for nonrd_pickmode.
@@ -1391,18 +1389,25 @@
cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->cyclic_refresh->apply_cyclic_refresh &&
segment_id == CR_SEGMENT_ID_BASE && is_set_force_zeromv_skip &&
- ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0 &&
- y_sad < thresh_exit_part && uv_sad[0]<(3 * thresh_exit_part)>> 2 &&
- uv_sad[1]<(3 * thresh_exit_part)>> 2) {
+ ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) {
const int block_width = mi_size_wide[cm->seq_params->sb_size];
const int block_height = mi_size_high[cm->seq_params->sb_size];
+ const unsigned int thresh_exit_part_y =
+ cpi->zeromv_skip_thresh_exit_part[bsize];
+ const unsigned int thresh_exit_part_uv =
+ CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
if (mi_col + block_width <= tile->mi_col_end &&
- mi_row + block_height <= tile->mi_row_end) {
+ mi_row + block_height <= tile->mi_row_end &&
+ y_sad < thresh_exit_part_y && uv_sad[0] < thresh_exit_part_uv &&
+ uv_sad[1] < thresh_exit_part_uv) {
set_block_size(cpi, mi_row, mi_col, bsize);
- x->force_zeromv_skip = 1;
+ x->force_zeromv_skip_for_sb = 1;
if (vt2) aom_free(vt2);
if (vt) aom_free(vt);
return 0;
+ } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
+ cpi->sf.rt_sf.part_early_exit_zeromv >= 2) {
+ x->force_zeromv_skip_for_sb = 2;
}
}
diff --git a/av1/encoder/var_based_part.h b/av1/encoder/var_based_part.h
index 0136268..7febc0e 100644
--- a/av1/encoder/var_based_part.h
+++ b/av1/encoder/var_based_part.h
@@ -28,6 +28,8 @@
100 // Use increased thresholds for midres for speed 9 when qindex is above
// this threshold
+#define CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part) \
+ ((3 * (thresh_exit_part)) >> 2)
/*!\brief Set the thresholds for variance based partition.
*
* Set the variance split thresholds for following the block sizes: