Improve RTC temporal filtering
This change focused on improving performance for <360p.
More work will be done and more CLs will follow.
Borg result on rtc_derf set:
avg_psnr: ovr_psnr: ssim:
speed 9: -0.405 -0.240 -0.547
speed 10: -0.344 -0.204 -0.494
Encoder speed drop: <=0.5%
STATS_CHANGED for rtc_derf speed 9 & 10
Change-Id: I28ecc68b70a08d27952eb5a1115e3ed48ab5cb64
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f8e0d33..857e069 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -803,8 +803,9 @@
*/
// TODO(any): consolidate sfs to make interface cleaner
static AOM_INLINE void grade_source_content_sb(AV1_COMP *cpi,
- MACROBLOCK *const x, int mi_row,
- int mi_col) {
+ MACROBLOCK *const x,
+ TileDataEnc *tile_data,
+ int mi_row, int mi_col) {
AV1_COMMON *const cm = &cpi->common;
bool calc_src_content = false;
@@ -823,7 +824,8 @@
else
x->content_state_sb.source_sad_rd = kZeroSad;
}
- if (calc_src_content) av1_source_content_sb(cpi, x, mi_row, mi_col);
+ if (calc_src_content)
+ av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
}
/*!\brief Encode a superblock row by breaking it into superblocks
@@ -939,7 +941,7 @@
// Grade the temporal variation of the sb, the grade will be used to decide
// fast mode search strategy for coding blocks
- grade_source_content_sb(cpi, x, mi_row, mi_col);
+ grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
// encode the superblock
if (use_nonrd_mode) {
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index e3a606c..e40698c 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -1309,10 +1309,78 @@
CFL_ALPHABET_SIZE);
}
+// Check neighbor blocks' motion information.
+static int check_neighbor_blocks(MB_MODE_INFO **mi, int mi_stride,
+ const TileInfo *const tile_info, int mi_row,
+ int mi_col) {
+ int is_above_low_motion = 1;
+ int is_left_low_motion = 1;
+ const int thr = 24;
+
+ // Check above block.
+ if (mi_row > tile_info->mi_row_start) {
+ const MB_MODE_INFO *above_mbmi = mi[-mi_stride];
+ const int_mv above_mv = above_mbmi->mv[0];
+ if (above_mbmi->mode >= INTRA_MODE_END &&
+ (abs(above_mv.as_mv.row) > thr || abs(above_mv.as_mv.col) > thr))
+ is_above_low_motion = 0;
+ }
+
+ // Check left block.
+ if (mi_col > tile_info->mi_col_start) {
+ const MB_MODE_INFO *left_mbmi = mi[-1];
+ const int_mv left_mv = left_mbmi->mv[0];
+ if (left_mbmi->mode >= INTRA_MODE_END &&
+ (abs(left_mv.as_mv.row) > thr || abs(left_mv.as_mv.col) > thr))
+ is_left_low_motion = 0;
+ }
+
+ return (is_above_low_motion && is_left_low_motion);
+}
+
+// Check this block's motion in a fast way.
+static int fast_detect_non_zero_motion(AV1_COMP *cpi, const uint8_t *src_y,
+ int src_ystride,
+ const uint8_t *last_src_y,
+ int last_src_ystride, int mi_row,
+ int mi_col) {
+ AV1_COMMON *const cm = &cpi->common;
+ const BLOCK_SIZE bsize = cm->seq_params->sb_size;
+ unsigned int blk_sad = INT_MAX;
+ if (cpi->src_sad_blk_64x64 != NULL) {
+ const int sb_size_by_mb = (bsize == BLOCK_128X128)
+ ? (cm->seq_params->mib_size >> 1)
+ : cm->seq_params->mib_size;
+ const int sb_cols =
+ (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
+ const int sbi_col = mi_col / sb_size_by_mb;
+ const int sbi_row = mi_row / sb_size_by_mb;
+ blk_sad = (unsigned int)cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
+ } else {
+ blk_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
+ last_src_ystride);
+ }
+
+ // Search 4 1-away points.
+ const uint8_t *const search_pos[4] = {
+ last_src_y - last_src_ystride,
+ last_src_y - 1,
+ last_src_y + 1,
+ last_src_y + last_src_ystride,
+ };
+ unsigned int sad_arr[4];
+ cpi->ppi->fn_ptr[bsize].sdx4df(src_y, src_ystride, search_pos,
+ last_src_ystride, sad_arr);
+
+ blk_sad = (blk_sad * 5) >> 3;
+ return (blk_sad < sad_arr[0] && blk_sad < sad_arr[1] &&
+ blk_sad < sad_arr[2] && blk_sad < sad_arr[3]);
+}
+
// Grade the temporal variation of the source by comparing the current sb and
// its collocated block in the last frame.
-void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- int mi_col) {
+void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
+ int mi_row, int mi_col) {
unsigned int tmp_sse;
unsigned int tmp_variance;
const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
@@ -1363,6 +1431,9 @@
cpi->last_source->y_height != cpi->source->y_height)
return;
if (!cpi->sf.rt_sf.use_rtc_tf || tmp_sse == 0) return;
+ if (cpi->sf.rt_sf.use_rtc_tf == 2 &&
+ (cpi->rc.high_source_sad || cpi->rc.frame_source_sad > 20000))
+ return;
// In-place temporal filter. If psnr calculation is enabled, we store the
// source for that.
@@ -1371,10 +1442,35 @@
const unsigned int nmean2 = tmp_sse - tmp_variance;
const int ac_q_step = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
cm->seq_params->bit_depth);
- const unsigned int threshold = 3 * ac_q_step * ac_q_step / 2;
+ // Keep the threshold for >= 360p unchanged. It will be tested and modified
+ // later when needed.
+ const unsigned int threshold = (cpi->sf.rt_sf.use_rtc_tf == 1)
+ ? ((3 * ac_q_step * ac_q_step) >> 1)
+ : 250 * ac_q_step;
// TODO(yunqing): use a weighted sum instead of averaging in filtering.
if (tmp_variance <= threshold && nmean2 <= 15) {
+ if (cpi->sf.rt_sf.use_rtc_tf == 2) {
+ // Check neighbor blocks. If neighbor blocks aren't low-motion blocks,
+ // skip temporal filtering for this block.
+ MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
+ get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
+ const TileInfo *const tile_info = &tile_data->tile_info;
+ const int is_neighbor_blocks_low_motion = check_neighbor_blocks(
+ mi, cm->mi_params.mi_stride, tile_info, mi_row, mi_col);
+ if (!is_neighbor_blocks_low_motion) return;
+
+ // Only consider 64x64 SB for now. Need to extend to 128x128 for large SB
+ // size.
+ // Test several nearby points. If non-zero mv exists, don't do temporal
+ // filtering.
+ const int is_this_blk_low_motion =
+ fast_detect_non_zero_motion(cpi, src_y, src_ystride, last_src_y,
+ last_src_ystride, mi_row, mi_col);
+
+ if (!is_this_blk_low_motion) return;
+ }
+
const int shift_x[2] = { 0, cpi->source->subsampling_x };
const int shift_y[2] = { 0, cpi->source->subsampling_y };
const uint8_t h = block_size_high[bsize];
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 462600c..0e9c439 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -391,8 +391,8 @@
void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
int wt_left, int wt_tr);
-void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- int mi_col);
+void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
+ int mi_row, int mi_col);
void av1_reset_mbmi(CommonModeInfoParams *const mi_params, BLOCK_SIZE sb_size,
int mi_row, int mi_col);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 80457a2..9f11e87 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1279,6 +1279,8 @@
sf->rt_sf.use_comp_ref_nonrd = 0;
sf->rt_sf.nonrd_agressive_skip = 1;
sf->rt_sf.skip_intra_pred = 1;
+ sf->rt_sf.use_rtc_tf = 2;
+
// TODO(kyslov) Re-enable when AV1 models are trained
#if 0
#if CONFIG_RT_ML_PARTITIONING