rtc: Reduce blk-level MV pel precision
This CL extends the sf force_half_pel_block to speeds 7, 8
for rtc set with a new algorithm. It reduces MV precision for
relatively-static (e.g. background), low-complex large blocks
using source variance, SAD, bsize and qp.
The speed-up / BD Rate trade-off is:
---------------------------------------------------------
|cpu|Resolution|Instr. Count| BD-Rate Drop (%) |
| | |Reduction(%)|avg. psnr|ovr. psnr| ssim |
---------------------------------------------------------
| 7 | HDRES | 2.256 | 0.2120 | 0.2121 | 0.2066 |
| 8 | HDRES | 2.195 | 0.1821 | 0.1830 | 0.1826 |
---------------------------------------------------------
Worst and Best case-drops:
----------------------------------------------------------
| | | | BD Rate Drop (%) |
| |cpu| Clip | (-ve: Gain, +ve Loss) |
| | | |-----------------------------|
| | | |avg.psnr |ovr. psnr| ssim |
|----------------------------------------------------------|
| Best | 7 |testnoise720p | -0.3958 | -0.2445 | -0.7787 |
| | 8 |testnoise720p | -0.7229 | -0.4860 | -0.7079 |
|----------------------------------------------------------|
|Worst | 7 |vidyo1 (avg.psnr)| 0.8261 | 0.7455 | 0.5534 |
| | 7 |mj1vc720p (ssim) | 0.6386 | 0.6482 | 0.7535 |
| | 8 |mj1vc720p | 0.8805 | 0.8451 | 0.9263 |
----------------------------------------------------------
No changes to speed 9, 10 or rtc-derf set
STATS_CHANGED
Change-Id: I7f9ba58274e003b8cfc188cd80099b28f187be67
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 0ad118d..9fb37e4 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -807,8 +807,9 @@
} SOURCE_SAD;
typedef struct {
- //! SAD levels in non-rd path for var-based part and inter-mode search
- SOURCE_SAD source_sad_nonrd;
+ //! SAD levels in non-rd path
+ //! 0: var-based part and inter-mode search, 1: blk-level mv pel precision
+ SOURCE_SAD source_sad_nonrd[2];
//! SAD levels in rd-path for var-based part qindex thresholds
SOURCE_SAD source_sad_rd;
int lighting_change;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 2a395aa..64cca4e 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -797,10 +797,12 @@
if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
cpi->svc.number_spatial_layers <= 1 &&
cm->current_frame.frame_type != KEY_FRAME) {
- if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0)
+ if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
calc_src_content = true;
- else
- x->content_state_sb.source_sad_nonrd = kZeroSad;
+ } else {
+ x->content_state_sb.source_sad_nonrd[0] = kZeroSad;
+ x->content_state_sb.source_sad_nonrd[1] = kZeroSad;
+ }
} else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
(cm->width * cm->height <= 352 * 288)) {
if (cpi->rc.frame_source_sad > 0)
@@ -891,7 +893,8 @@
x->color_sensitivity_sb_g[1] = 0;
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
- x->content_state_sb.source_sad_nonrd = kMedSad;
+ x->content_state_sb.source_sad_nonrd[0] = kMedSad;
+ x->content_state_sb.source_sad_nonrd[1] = kMedSad;
x->content_state_sb.source_sad_rd = kMedSad;
x->content_state_sb.lighting_change = 0;
x->content_state_sb.low_sumdiff = 0;
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 3846b78..a2eddd4 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1321,8 +1321,10 @@
uint8_t *last_src_y = cpi->last_source->y_buffer;
int last_src_ystride = cpi->last_source->y_stride;
const int offset = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
- uint64_t avg_source_sse_threshold[2] = { 100000, // ~5*5*(64*64)
- 36000 }; // ~3*3*(64*64)
+ uint64_t avg_source_sse_threshold_low[3] = { 100000, // ~5*5*(64*64)
+ 36000, // ~3*3*(64*64)
+ 10000 }; // ~1.5*1.5*(64*64)
+
uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64)
uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
#if CONFIG_AV1_HIGHBITDEPTH
@@ -1334,16 +1336,21 @@
tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
last_src_ystride, &tmp_sse);
// rd thresholds
- if (tmp_sse < avg_source_sse_threshold[1])
+ if (tmp_sse < avg_source_sse_threshold_low[1])
x->content_state_sb.source_sad_rd = kLowSad;
// nonrd thresholds
if (tmp_sse == 0)
- x->content_state_sb.source_sad_nonrd = kZeroSad;
- else if (tmp_sse < avg_source_sse_threshold[0])
- x->content_state_sb.source_sad_nonrd = kLowSad;
+ x->content_state_sb.source_sad_nonrd[0] = kZeroSad;
+ else if (tmp_sse < avg_source_sse_threshold_low[0])
+ x->content_state_sb.source_sad_nonrd[0] = kLowSad;
else if (tmp_sse > avg_source_sse_threshold_high)
- x->content_state_sb.source_sad_nonrd = kHighSad;
+ x->content_state_sb.source_sad_nonrd[0] = kHighSad;
+
+ if (tmp_sse == 0)
+ x->content_state_sb.source_sad_nonrd[1] = kZeroSad;
+ else if (tmp_sse < avg_source_sse_threshold_low[2])
+ x->content_state_sb.source_sad_nonrd[1] = kLowSad;
// Detect large lighting change.
// Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index cf3af34..1553186 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -205,19 +205,34 @@
memset(&bp->pmi, 0, sizeof(bp->pmi));
}
-static INLINE int subpel_select(AV1_COMP *cpi, BLOCK_SIZE bsize, int_mv *mv) {
- int mv_thresh = 4;
- const int is_low_resoln =
- (cpi->common.width * cpi->common.height <= 320 * 240);
- mv_thresh = (bsize > BLOCK_32X32) ? 2 : (bsize > BLOCK_16X16) ? 4 : 6;
- if (cpi->rc.avg_frame_low_motion > 0 && cpi->rc.avg_frame_low_motion < 40)
- mv_thresh = 12;
- mv_thresh = (is_low_resoln) ? mv_thresh >> 1 : mv_thresh;
- if (abs(mv->as_fullmv.row) >= mv_thresh ||
- abs(mv->as_fullmv.col) >= mv_thresh)
- return HALF_PEL;
- else
- return cpi->sf.mv_sf.subpel_force_stop;
+static INLINE int subpel_select(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int_mv *mv) {
+ assert(cpi->sf.rt_sf.reduce_mv_pel_precision);
+ if (cpi->sf.rt_sf.reduce_mv_pel_precision == 2) {
+ int mv_thresh = 4;
+ const int is_low_resoln =
+ (cpi->common.width * cpi->common.height <= 320 * 240);
+ mv_thresh = (bsize > BLOCK_32X32) ? 2 : (bsize > BLOCK_16X16) ? 4 : 6;
+ if (cpi->rc.avg_frame_low_motion > 0 && cpi->rc.avg_frame_low_motion < 40)
+ mv_thresh = 12;
+ mv_thresh = (is_low_resoln) ? mv_thresh >> 1 : mv_thresh;
+ if (abs(mv->as_fullmv.row) >= mv_thresh ||
+ abs(mv->as_fullmv.col) >= mv_thresh)
+ return HALF_PEL;
+ } else if (cpi->sf.rt_sf.reduce_mv_pel_precision == 1) {
+ // Reduce MV precision for relatively static (e.g. background), low-complex
+ // large areas
+ const int qband = x->qindex >> (QINDEX_BITS - 2);
+ assert(qband < 4);
+ if (x->content_state_sb.source_sad_nonrd[1] <= kLowSad &&
+ bsize > BLOCK_16X16 && qband != 0) {
+ if (x->source_variance < 500)
+ return FULL_PEL;
+ else if (x->source_variance < 5000)
+ return HALF_PEL;
+ }
+ }
+ return cpi->sf.mv_sf.subpel_force_stop;
}
/*!\brief Runs Motion Estimation for a specific block and specific ref frame.
@@ -311,9 +326,9 @@
SUBPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
cost_list);
- if (cpi->sf.rt_sf.force_half_pel_block &&
+ if (cpi->sf.rt_sf.reduce_mv_pel_precision &&
cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
- ms_params.forced_stop = subpel_select(cpi, bsize, tmp_mv);
+ ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv);
if (cpi->sf.rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 &&
ref_mv.col == 0 && start_mv.row == 0 && start_mv.col == 0) {
// If both the refmv and the fullpel results show zero mv, then there is
@@ -407,9 +422,9 @@
SUBPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, NULL);
- if (cpi->sf.rt_sf.force_half_pel_block &&
+ if (cpi->sf.rt_sf.reduce_mv_pel_precision &&
cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
- ms_params.forced_stop = subpel_select(cpi, bsize, &best_mv);
+ ms_params.forced_stop = subpel_select(cpi, x, bsize, &best_mv);
MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
cpi->mv_search_params.find_fractional_mv_step(
xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis,
@@ -1297,7 +1312,8 @@
int left_mv_valid = 0;
int above_row = INVALID_MV_ROW_COL, above_col = INVALID_MV_ROW_COL;
int left_row = INVALID_MV_ROW_COL, left_col = INVALID_MV_ROW_COL;
- if (bsize >= BLOCK_64X64 && content_state_sb.source_sad_nonrd != kHighSad &&
+ if (bsize >= BLOCK_64X64 &&
+ content_state_sb.source_sad_nonrd[0] != kHighSad &&
spatial_variance < 300 &&
(mv_row > 16 || mv_row < -16 || mv_col > 16 || mv_col < -16)) {
this_rdc->rdcost = this_rdc->rdcost << 2;
@@ -2080,7 +2096,7 @@
// capture case where only part of frame has high motion.
// Exclude screen content mode.
if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
- x->content_state_sb.source_sad_nonrd >= kHighSad &&
+ x->content_state_sb.source_sad_nonrd[0] >= kHighSad &&
bsize <= BLOCK_32X32 && cpi->rc.frame_source_sad < 50000)
use_golden_ref_frame = 1;
}
@@ -2093,7 +2109,7 @@
// Skip golden reference if color is set, on flat blocks with motion.
if (x->source_variance < 500 &&
- x->content_state_sb.source_sad_nonrd > kLowSad &&
+ x->content_state_sb.source_sad_nonrd[0] > kLowSad &&
(x->color_sensitivity_sb_g[0] == 1 || x->color_sensitivity_sb_g[1] == 1))
use_golden_ref_frame = 0;
@@ -2209,18 +2225,18 @@
do_early_exit_rdthresh = 0;
}
if ((x->source_variance < AOMMAX(50, (spatial_var_thresh >> 1)) &&
- x->content_state_sb.source_sad_nonrd >= kHighSad) ||
+ x->content_state_sb.source_sad_nonrd[0] >= kHighSad) ||
(cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
x->source_variance == 0 &&
((bsize >= BLOCK_32X32 &&
- x->content_state_sb.source_sad_nonrd != kZeroSad) ||
+ x->content_state_sb.source_sad_nonrd[0] != kZeroSad) ||
x->color_sensitivity[0] == 1 || x->color_sensitivity[1] == 1)))
force_intra_check = 1;
// For big blocks worth checking intra (since only DC will be checked),
// even if best_early_term is set.
if (bsize >= BLOCK_32X32) best_early_term = 0;
} else if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
- x->content_state_sb.source_sad_nonrd == kLowSad) {
+ x->content_state_sb.source_sad_nonrd[0] == kLowSad) {
perform_intra_pred = 0;
}
@@ -2278,7 +2294,7 @@
cpi->sf.rt_sf.source_metrics_sb_nonrd) {
// For spatially flat blocks with zero motion only check
// DC mode.
- if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
+ if (x->content_state_sb.source_sad_nonrd[0] == kZeroSad &&
x->source_variance == 0 && this_mode != DC_PRED)
continue;
// Only test Intra for big blocks if spatial_variance is 0.
@@ -2345,7 +2361,7 @@
// Otherwise bias against intra for blocks with zero
// motion and no color, on non-scene/slide changes.
else if (!cpi->rc.high_source_sad && x->source_variance > 0 &&
- x->content_state_sb.source_sad_nonrd == kZeroSad &&
+ x->content_state_sb.source_sad_nonrd[0] == kZeroSad &&
x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0)
this_rdc.rdcost = (3 * this_rdc.rdcost) >> 1;
}
@@ -2426,8 +2442,8 @@
return 1;
}
- if (content_state_sb.source_sad_nonrd != kHighSad && bsize >= BLOCK_64X64 &&
- force_skip_low_temp_var && mode == NEWMV) {
+ if (content_state_sb.source_sad_nonrd[0] != kHighSad &&
+ bsize >= BLOCK_64X64 && force_skip_low_temp_var && mode == NEWMV) {
return 1;
}
return 0;
@@ -2965,7 +2981,7 @@
use_modeled_non_rd_cost =
(quant_params->base_qindex > 120 && x->source_variance > 100 &&
bsize <= BLOCK_16X16 && !x->content_state_sb.lighting_change &&
- x->content_state_sb.source_sad_nonrd != kHighSad);
+ x->content_state_sb.source_sad_nonrd[0] != kHighSad);
}
#if COLLECT_PICK_MODE_STAT
@@ -3079,9 +3095,9 @@
// below after search_new_mv.
if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
if ((frame_mv[this_mode][ref_frame].as_int != 0 &&
- x->content_state_sb.source_sad_nonrd == kZeroSad) ||
+ x->content_state_sb.source_sad_nonrd[0] == kZeroSad) ||
(frame_mv[this_mode][ref_frame].as_int == 0 &&
- x->content_state_sb.source_sad_nonrd != kZeroSad &&
+ x->content_state_sb.source_sad_nonrd[0] != kZeroSad &&
((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
cpi->rc.high_source_sad) &&
x->source_variance == 0))
@@ -3175,7 +3191,7 @@
cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
cpi->sf.rt_sf.source_metrics_sb_nonrd) {
if (frame_mv[this_mode][ref_frame].as_int == 0 &&
- x->content_state_sb.source_sad_nonrd != kZeroSad &&
+ x->content_state_sb.source_sad_nonrd[0] != kZeroSad &&
((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
cpi->rc.high_source_sad) &&
x->source_variance == 0)
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f2e0bd3..10ead95 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1339,13 +1339,16 @@
} else {
if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
+ if (speed >= 7) sf->rt_sf.reduce_mv_pel_precision = 1;
if (speed >= 9) {
sf->rt_sf.sad_based_adp_altref_lag = 1;
sf->rt_sf.sad_based_comp_prune = 1;
+ sf->rt_sf.reduce_mv_pel_precision = 0;
}
if (speed >= 10) {
sf->rt_sf.sad_based_adp_altref_lag = 3;
sf->rt_sf.sad_based_comp_prune = 2;
+ sf->rt_sf.reduce_mv_pel_precision = 2;
}
}
if (cpi->ppi->use_svc) {
@@ -1394,7 +1397,7 @@
sf->rt_sf.nonrd_prune_ref_frame_search = 3;
sf->rt_sf.var_part_split_threshold_shift = 10;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
- sf->rt_sf.force_half_pel_block = 1;
+ sf->rt_sf.reduce_mv_pel_precision = 2;
sf->rt_sf.reduce_zeromv_mvres = true;
}
if (speed >= 10 && cm->width * cm->height > 1920 * 1080)
@@ -1701,6 +1704,7 @@
sf->rt_sf.var_part_based_on_qidx = 0;
sf->rt_sf.frame_level_mode_cost_update = true;
sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
+ sf->rt_sf.reduce_mv_pel_precision = 0;
}
if (speed >= 10) {
sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
@@ -1708,7 +1712,7 @@
sf->rt_sf.nonrd_prune_ref_frame_search = 3;
sf->rt_sf.var_part_split_threshold_shift = 10;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
- sf->rt_sf.force_half_pel_block = 1;
+ sf->rt_sf.reduce_mv_pel_precision = 2;
sf->rt_sf.reduce_zeromv_mvres = true;
sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
}
@@ -2023,7 +2027,7 @@
rt_sf->prune_inter_modes_with_golden_ref = 0;
rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
rt_sf->prune_inter_modes_using_temp_var = 0;
- rt_sf->force_half_pel_block = 0;
+ rt_sf->reduce_mv_pel_precision = 0;
rt_sf->prune_intra_mode_based_on_mv_range = 0;
rt_sf->var_part_split_threshold_shift = 7;
rt_sf->gf_refresh_based_on_qp = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 96a21e4..3c87384 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1508,8 +1508,10 @@
// variance wrt LAST reference.
int prune_inter_modes_using_temp_var;
- // Force half_pel at block level.
- int force_half_pel_block;
+ // Reduce MV precision at block level, represents various algos (0: disabled)
+ // 1: switch to halfpel, fullpel based on blk SAD, source var, bsize and qp
+ // 2: switch to halfpel based on integer mv size, bsize, frame-level motion
+ int reduce_mv_pel_precision;
// Prune intra mode evaluation in inter frames based on mv range.
BLOCK_SIZE prune_intra_mode_based_on_mv_range;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 8ec7b71..f429190 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1128,7 +1128,7 @@
// For non-SVC GOLDEN is another temporal reference. Check if it should be
// used as reference for partitioning.
if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
- x->content_state_sb.source_sad_nonrd != kZeroSad) {
+ x->content_state_sb.source_sad_nonrd[0] != kZeroSad) {
yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
if (yv12_g && yv12_g != yv12) {
av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
@@ -1273,12 +1273,12 @@
const int q =
av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
- x->content_state_sb.source_sad_nonrd,
+ x->content_state_sb.source_sad_nonrd[0],
x->content_state_sb.source_sad_rd, 1);
} else {
set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
x->content_state_sb.low_sumdiff,
- x->content_state_sb.source_sad_nonrd,
+ x->content_state_sb.source_sad_nonrd[0],
x->content_state_sb.source_sad_rd, 0);
}
@@ -1346,7 +1346,7 @@
cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->cyclic_refresh->apply_cyclic_refresh &&
segment_id == CR_SEGMENT_ID_BASE &&
- x->content_state_sb.source_sad_nonrd == kZeroSad &&
+ x->content_state_sb.source_sad_nonrd[0] == kZeroSad &&
ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0 &&
y_sad < thresh_exit_part && uv_sad[0]<(3 * thresh_exit_part)>> 2 &&
uv_sad[1]<(3 * thresh_exit_part)>> 2) {
@@ -1427,7 +1427,7 @@
(thresholds[2] >> 1) &&
maxvar_16x16[m][i] > thresholds[2]) ||
(cpi->sf.rt_sf.prefer_large_partition_blocks &&
- x->content_state_sb.source_sad_nonrd > kLowSad &&
+ x->content_state_sb.source_sad_nonrd[0] > kLowSad &&
cpi->rc.frame_source_sad < 20000 &&
maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
maxvar_16x16[m][i] > (minvar_16x16[m][i] << 2)))) {