rtc: Speed 11 for video mode, for resoln < 720p.
This is a very aggressive fast speed setting for
video in real-time mode (RTC).
-force single reference
-aggressive cdef skip
-selective cdf_update
-turn off motion interpolation filter search
-use only DC for intra-modes
-increase thresholds for source_sad and transform
skipping test
-faster partitioning - fixed per superblock
bdrate and IC change from speed 10 rtc_derf:
~29% IC speedup and ~26% bdrate loss
To be tuned/adjusted further.
Change-Id: Ia14d35d6d6ba81532e904c136a7ca388ed4674a4
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index d73840c..a18d294 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -535,11 +535,17 @@
}
#endif
// Set the partition
- if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
+ if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
+ (sf->rt_sf.use_fast_fixed_part &&
+ x->content_state_sb.source_sad_nonrd < kMedSad)) {
// set a fixed-size partition
av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
- const BLOCK_SIZE bsize =
- seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
+ BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
+ if (sf->rt_sf.use_fast_fixed_part &&
+ x->content_state_sb.source_sad_nonrd < kLowSad) {
+ bsize_select = BLOCK_64X64;
+ }
+ const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
} else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
// set a variance-based partition
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 94298c8..7654864 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1398,6 +1398,11 @@
36000 }; // ~3*3*(64*64)
uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64)
+ if (cpi->sf.rt_sf.increase_source_sad_thresh) {
+ avg_source_sse_threshold_high = avg_source_sse_threshold_high << 1;
+ avg_source_sse_threshold_low[0] = avg_source_sse_threshold_low[0] << 1;
+ avg_source_sse_threshold_verylow = avg_source_sse_threshold_verylow << 1;
+ }
uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
src_y += src_offset;
last_src_y += last_src_offset;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 117150f..452a3a4 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3821,6 +3821,8 @@
if (cpi->sf.rt_sf.disable_cdf_update_non_reference_frame &&
cpi->ppi->rtc_ref.non_reference_frame && cpi->rc.frames_since_key > 2)
features->disable_cdf_update = 1;
+ else if (cpi->sf.rt_sf.selective_cdf_update)
+ features->disable_cdf_update = selective_disable_cdf_rtc(cpi);
else
features->disable_cdf_update = 0;
break;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 1836869..f939b6d 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -577,7 +577,7 @@
struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
const uint32_t dc_quant = p->dequant_QTX[0];
const uint32_t ac_quant = p->dequant_QTX[1];
- const int64_t dc_thr = dc_quant * dc_quant >> 6;
+ int64_t dc_thr = dc_quant * dc_quant >> 6;
int64_t ac_thr = ac_quant * ac_quant >> 6;
const int bw = b_width_log2_lookup[bsize];
const int bh = b_height_log2_lookup[bsize];
@@ -597,6 +597,11 @@
#endif
+ if (cpi->sf.rt_sf.increase_source_sad_thresh) {
+ dc_thr = dc_thr << 1;
+ ac_thr = ac_thr << 2;
+ }
+
for (int k = 0; k < num_blk; k++) {
// Check if all ac coefficients can be quantized to zero.
if (!(var_tx[k] < ac_thr || var == 0)) {
@@ -626,10 +631,12 @@
const BLOCK_SIZE uv_bsize = get_plane_block_size(
bsize, puvd->subsampling_x, puvd->subsampling_y);
// Adjust these thresholds for UV.
+ const int shift_ac = cpi->sf.rt_sf.increase_source_sad_thresh ? 5 : 3;
+ const int shift_dc = cpi->sf.rt_sf.increase_source_sad_thresh ? 4 : 3;
const int64_t uv_dc_thr =
- (puv->dequant_QTX[0] * puv->dequant_QTX[0]) >> 3;
+ (puv->dequant_QTX[0] * puv->dequant_QTX[0]) >> shift_dc;
const int64_t uv_ac_thr =
- (puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> 3;
+ (puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> shift_ac;
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
plane, plane);
var_uv[j] = cpi->ppi->fn_ptr[uv_bsize].vf(puv->src.buf, puv->src.stride,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index dcc0c5a..2000111 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -2897,10 +2897,12 @@
for (int i = 0; i < REF_FRAMES; ++i) rtc_ref->refresh[i] = 0;
// Set the reference frame flags.
ext_flags->ref_frame_flags ^= AOM_LAST_FLAG;
- ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
- ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
- if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
- ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
+ if (!cpi->sf.rt_sf.force_only_last_ref) {
+ ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
+ ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
+ if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
+ ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
+ }
const int sh = 6;
// Moving index slot for last: 0 - (sh - 1).
if (frame_number > 1) last_idx = ((frame_number - 1) % sh);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6b18775..4c0a3bd 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1453,7 +1453,25 @@
if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
}
-
+ // TODO(marpan): Tune settings for speed 11 video mode,
+ // for resolutions below 720p.
+ if (speed >= 11 && !is_720p_or_larger &&
+ cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
+ sf->rt_sf.skip_cdef_sb = 2;
+ sf->rt_sf.force_only_last_ref = 1;
+ sf->rt_sf.selective_cdf_update = 1;
+ sf->rt_sf.use_nonrd_filter_search = 0;
+ if (is_360p_or_larger) {
+ sf->part_sf.fixed_partition_size = BLOCK_32X32;
+ sf->rt_sf.use_fast_fixed_part = 1;
+ }
+ sf->rt_sf.increase_source_sad_thresh = 1;
+ sf->rt_sf.part_early_exit_zeromv = 2;
+ sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
+ for (int i = 0; i < BLOCK_SIZES; ++i) {
+ sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
+ }
+ }
// Setting for SVC, or when the ref_frame_config control is
// used to set the reference structure.
if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) {
@@ -1769,6 +1787,8 @@
FLAG_EARLY_TERMINATE;
sf->rt_sf.var_part_split_threshold_shift = 5;
if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
+ sf->rt_sf.use_fast_fixed_part = 0;
+ sf->rt_sf.increase_source_sad_thresh = 0;
if (speed >= 6) {
sf->mv_sf.use_fullpel_costlist = 1;
@@ -2256,6 +2276,8 @@
rt_sf->enable_ref_short_signaling = false;
rt_sf->check_globalmv_on_single_ref = true;
rt_sf->increase_color_thresh_palette = false;
+ rt_sf->selective_cdf_update = 0;
+ rt_sf->force_only_last_ref = 0;
}
static fractional_mv_step_fp
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 14cd874..d7e8989 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1654,10 +1654,24 @@
// rc->high_source_sad = 0 (non slide-changes), and color sensitivity off.
int skip_cdef_sb;
+ // Force selective cdf update.
+ int selective_cdf_update;
+
+ // Force only single reference (LAST) for prediction.
+ int force_only_last_ref;
+
// Forces larger partition blocks in variance based partitioning for intra
// frames
int force_large_partition_blocks_intra;
+ // Use fixed partition for superblocks based on source_sad.
+ // 0: disabled
+ // 1: enabled
+ int use_fast_fixed_part;
+
+ // Increase source_sad thresholds in nonrd pickmode.
+ int increase_source_sad_thresh;
+
// Skip evaluation of no split in tx size selection for merge partition
int skip_tx_no_split_var_based_partition;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index d545600..53abfa0 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1528,7 +1528,9 @@
int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
if (set_zeromv_skip_based_on_source_sad == 0) return false;
- if (set_zeromv_skip_based_on_source_sad >= 2)
+ if (set_zeromv_skip_based_on_source_sad >= 3)
+ return source_sad_nonrd <= kLowSad;
+ else if (set_zeromv_skip_based_on_source_sad >= 2)
return source_sad_nonrd <= kVeryLowSad;
else if (set_zeromv_skip_based_on_source_sad >= 1)
return source_sad_nonrd == kZeroSad;
@@ -1545,12 +1547,13 @@
cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
x->content_state_sb.source_sad_nonrd))
return false;
+ int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
const int block_width = mi_size_wide[cm->seq_params->sb_size];
const int block_height = mi_size_high[cm->seq_params->sb_size];
const unsigned int thresh_exit_part_y =
- cpi->zeromv_skip_thresh_exit_part[bsize];
+ cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
unsigned int thresh_exit_part_uv =
- CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
+ CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
// Be more aggressive in UV threshold if source_sad >= VeryLowSad
// to suppreess visual artifact caused by the speed feature:
// set_zeromv_skip_based_on_source_sad = 2. For now only for