rtc: SAD-based sb-level var-based-part in rd-path
Improved rtc-derf speed 5 and 6 by tuning qindex-thresholds
using block-level SSE-based kLowSad flag. For a low-SAD sb,
qindex thresholds are tuned accordingly
Instruction Count BD-Rate Loss(%)
cpu Test-set Reduction(%) avg.psnr ovr.psnr ssim
5 rtc_derf 2.309 0.2206 0.2348 0.2168
6 rtc_derf 4.439 0.3192 0.3751 0.3016
STATS_CHANGED
Change-Id: I2ab717942de521cfec9df4aa594107919947e598
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index cca23e3..9692289 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -766,7 +766,10 @@
} SOURCE_SAD;
typedef struct {
- SOURCE_SAD source_sad;
+ //! SAD levels in non-rd path for var-based part and inter-mode search
+ SOURCE_SAD source_sad_nonrd;
+ //! SAD levels in rd-path for var-based part qindex thresholds
+ SOURCE_SAD source_sad_rd;
int lighting_change;
int low_sumdiff;
} CONTENT_STATE_SB;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 418c907..d8187f6 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -482,7 +482,7 @@
if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0)
av1_source_content_sb(cpi, x, mi_row, mi_col);
else
- x->content_state_sb.source_sad = kZeroSad;
+ x->content_state_sb.source_sad_nonrd = kZeroSad;
}
#if CONFIG_RT_ML_PARTITIONING
if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
@@ -637,6 +637,16 @@
init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
1);
+ // Grade the temporal variation of the sb, the grade will be used to decide
+ // partition thresholds for coding blocks
+ if ((sf->rt_sf.var_part_based_on_qidx >= 3) &&
+ (cm->width * cm->height <= 352 * 288)) {
+ if (cpi->rc.frame_source_sad > 0)
+ av1_source_content_sb(cpi, x, mi_row, mi_col);
+ else
+ x->content_state_sb.source_sad_rd = kZeroSad;
+ }
+
// Encode the superblock
if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
// partition search starting from a variance-based partition
@@ -851,7 +861,8 @@
x->color_sensitivity_sb[1] = 0;
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
- x->content_state_sb.source_sad = kMedSad;
+ x->content_state_sb.source_sad_nonrd = kMedSad;
+ x->content_state_sb.source_sad_rd = kMedSad;
x->content_state_sb.lighting_change = 0;
x->content_state_sb.low_sumdiff = 0;
x->force_zeromv_skip = 0;
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 0128156..09e998c 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1322,7 +1322,8 @@
uint8_t *last_src_y = cpi->last_source->y_buffer;
int last_src_ystride = cpi->last_source->y_stride;
const int offset = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
- uint64_t avg_source_sse_threshold = 100000; // ~5*5*(64*64)
+ uint64_t avg_source_sse_threshold[2] = { 100000, // ~5*5*(64*64)
+ 36000 }; // ~3*3*(64*64)
uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64)
uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
#if CONFIG_AV1_HIGHBITDEPTH
@@ -1333,13 +1334,21 @@
last_src_y += offset;
tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
last_src_ystride, &tmp_sse);
+ // rd thresholds
+ if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3) {
+ if (tmp_sse < avg_source_sse_threshold[1])
+ x->content_state_sb.source_sad_rd = kLowSad;
+ return;
+ }
+ // nonrd thresholds
if (tmp_sse == 0)
- x->content_state_sb.source_sad = kZeroSad;
- else if (tmp_sse < avg_source_sse_threshold)
- x->content_state_sb.source_sad = kLowSad;
+ x->content_state_sb.source_sad_nonrd = kZeroSad;
+ else if (tmp_sse < avg_source_sse_threshold[0])
+ x->content_state_sb.source_sad_nonrd = kLowSad;
else if (tmp_sse > avg_source_sse_threshold_high)
- x->content_state_sb.source_sad = kHighSad;
+ x->content_state_sb.source_sad_nonrd = kHighSad;
+
// Detect large lighting change.
// Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
if (tmp_sse > 0) {
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 46bd24c..54ddce1 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1203,7 +1203,7 @@
int left_mv_valid = 0;
int above_row = INVALID_MV_ROW_COL, above_col = INVALID_MV_ROW_COL;
int left_row = INVALID_MV_ROW_COL, left_col = INVALID_MV_ROW_COL;
- if (bsize >= BLOCK_64X64 && content_state_sb.source_sad != kHighSad &&
+ if (bsize >= BLOCK_64X64 && content_state_sb.source_sad_nonrd != kHighSad &&
spatial_variance < 300 &&
(mv_row > 16 || mv_row < -16 || mv_col > 16 || mv_col < -16)) {
this_rdc->rdcost = this_rdc->rdcost << 2;
@@ -1971,8 +1971,8 @@
// Keep golden (longer-term) reference if sb has high source sad, for
// frames whose average souce_sad is below threshold. This is to try to
// capture case where only part of frame has high motion.
- if (x->content_state_sb.source_sad >= kHighSad && bsize <= BLOCK_32X32 &&
- cpi->rc.frame_source_sad < 50000)
+ if (x->content_state_sb.source_sad_nonrd >= kHighSad &&
+ bsize <= BLOCK_32X32 && cpi->rc.frame_source_sad < 50000)
use_golden_ref_frame = 1;
}
@@ -2094,13 +2094,13 @@
do_early_exit_rdthresh = 0;
}
if (x->source_variance < AOMMAX(50, (spatial_var_thresh >> 1)) &&
- x->content_state_sb.source_sad >= kHighSad)
+ x->content_state_sb.source_sad_nonrd >= kHighSad)
force_intra_check = 1;
// For big blocks worth checking intra (since only DC will be checked),
// even if best_early_term is set.
if (bsize >= BLOCK_32X32) best_early_term = 0;
} else if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
- x->content_state_sb.source_sad == kLowSad) {
+ x->content_state_sb.source_sad_nonrd == kLowSad) {
perform_intra_pred = 0;
}
@@ -2158,7 +2158,7 @@
// For spatially flat blocks with zero motion only check
// DC mode.
if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
- x->content_state_sb.source_sad == kZeroSad &&
+ x->content_state_sb.source_sad_nonrd == kZeroSad &&
x->source_variance == 0 && this_mode != DC_PRED)
continue;
}
@@ -2288,7 +2288,7 @@
return 1;
}
- if (content_state_sb.source_sad != kHighSad && bsize >= BLOCK_64X64 &&
+ if (content_state_sb.source_sad_nonrd != kHighSad && bsize >= BLOCK_64X64 &&
force_skip_low_temp_var && mode == NEWMV) {
return 1;
}
@@ -2669,7 +2669,7 @@
use_modeled_non_rd_cost =
(quant_params->base_qindex > 120 && x->source_variance > 100 &&
bsize <= BLOCK_16X16 && !x->content_state_sb.lighting_change &&
- x->content_state_sb.source_sad != kHighSad);
+ x->content_state_sb.source_sad_nonrd != kHighSad);
}
#if COLLECT_PICK_MODE_STAT
@@ -2795,9 +2795,9 @@
// has motion skip the modes with zero motion for flat blocks.
if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
if ((frame_mv[this_mode][ref_frame].as_int != 0 &&
- x->content_state_sb.source_sad == kZeroSad) ||
+ x->content_state_sb.source_sad_nonrd == kZeroSad) ||
(frame_mv[this_mode][ref_frame].as_int == 0 &&
- x->content_state_sb.source_sad != kZeroSad &&
+ x->content_state_sb.source_sad_nonrd != kZeroSad &&
x->source_variance == 0))
continue;
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6167a48..8c0fbec 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1502,6 +1502,7 @@
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
FLAG_EARLY_TERMINATE;
sf->rt_sf.var_part_split_threshold_shift = 5;
+ if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 3;
// For SVC: use better mv search on base temporal layers, and only
// on base spatial layer if highest resolution is above 640x360.
@@ -1528,6 +1529,8 @@
sf->rt_sf.gf_refresh_based_on_qp = 1;
sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
sf->rt_sf.var_part_split_threshold_shift = 7;
+ if (!frame_is_intra_only(&cpi->common))
+ sf->rt_sf.var_part_based_on_qidx = 4;
}
if (speed >= 7) {
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 0511cf7..c59208a 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -412,7 +412,8 @@
static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
int q, int content_lowsumdiff,
- int source_sad, int segment_id) {
+ int source_sad_nonrd,
+ int source_sad_rd, int segment_id) {
AV1_COMMON *const cm = &cpi->common;
const int is_key_frame = frame_is_intra_only(cm);
const int threshold_multiplier = is_key_frame ? 120 : 1;
@@ -484,10 +485,15 @@
if (cm->width >= 1280 && cm->height >= 720)
thresholds[3] = thresholds[3] << 1;
if (cm->width * cm->height <= 352 * 288) {
- const int qindex_thr[3][2] = { { 200, 220 }, { 200, 210 }, { 170, 220 } };
- assert(cpi->sf.rt_sf.var_part_based_on_qidx < 3);
- int qindex_low_thr = qindex_thr[cpi->sf.rt_sf.var_part_based_on_qidx][0];
- int qindex_high_thr = qindex_thr[cpi->sf.rt_sf.var_part_based_on_qidx][1];
+ const int qindex_thr[5][2] = {
+ { 200, 220 }, { 200, 210 }, { 170, 220 }, { 140, 170 }, { 120, 150 }
+ };
+ int th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
+ if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
+ th_idx =
+ (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
+ const int qindex_low_thr = qindex_thr[th_idx][0];
+ const int qindex_high_thr = qindex_thr[th_idx][1];
if (current_qindex >= qindex_high_thr) {
threshold_base = (5 * threshold_base) >> 1;
thresholds[1] = threshold_base >> 3;
@@ -541,7 +547,7 @@
thresholds[3] = INT32_MAX;
if (segment_id == 0) {
thresholds[1] <<= 2;
- thresholds[2] <<= (source_sad == kLowSad) ? 5 : 4;
+ thresholds[2] <<= (source_sad_nonrd == kLowSad) ? 5 : 4;
} else {
thresholds[1] <<= 1;
thresholds[2] <<= 3;
@@ -552,7 +558,8 @@
// (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
// have high source sad).
} else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
- (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
+ (source_sad_nonrd != kHighSad ||
+ cpi->rc.avg_source_sad > 50000)) {
thresholds[0] = (3 * thresholds[0]) >> 1;
thresholds[3] = INT32_MAX;
if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
@@ -562,7 +569,8 @@
(int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
}
} else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
- (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
+ (source_sad_nonrd != kHighSad ||
+ cpi->rc.avg_source_sad > 50000)) {
thresholds[1] =
(int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
thresholds[2] =
@@ -857,7 +865,7 @@
return;
} else {
set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
- 0);
+ 0, 0);
// The threshold below is not changed locally.
cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
}
@@ -1145,11 +1153,13 @@
const int q =
av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
- x->content_state_sb.source_sad, 1);
+ x->content_state_sb.source_sad_nonrd,
+ x->content_state_sb.source_sad_rd, 1);
} else {
set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
x->content_state_sb.low_sumdiff,
- x->content_state_sb.source_sad, 0);
+ x->content_state_sb.source_sad_nonrd,
+ x->content_state_sb.source_sad_rd, 0);
}
// For non keyframes, disable 4x4 average for low resolution when speed = 8
@@ -1211,7 +1221,7 @@
cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->cyclic_refresh->apply_cyclic_refresh &&
segment_id == CR_SEGMENT_ID_BASE &&
- x->content_state_sb.source_sad == kZeroSad &&
+ x->content_state_sb.source_sad_nonrd == kZeroSad &&
ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0 &&
y_sad < thresh_exit_part) {
const int block_width = mi_size_wide[cm->seq_params->sb_size];
@@ -1288,7 +1298,7 @@
(thresholds[2] >> 1) &&
maxvar_16x16[m][i] > thresholds[2]) ||
(cpi->sf.rt_sf.force_large_partition_blocks &&
- x->content_state_sb.source_sad > kLowSad &&
+ x->content_state_sb.source_sad_nonrd > kLowSad &&
cpi->rc.frame_source_sad < 20000 &&
maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
maxvar_16x16[m][i] > (minvar_16x16[m][i] << 2)))) {