rtc-screen: Rework sb search window and palette testing For superblock motion in variance partitioning: allow for larger search for scroll motion detection. And adjust some threshold for palette testing in nonrd_pickmode, needed in particular for cases where scroll motion is not detected. This helps to reduce encode_time spikes, observed in 4K webrtc screen content test with scrolling. Change-Id: I4a452a586ae9017808153af9540b7ce6682c793d
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index 1739066..85df081 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -2055,12 +2055,13 @@ } int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl, - int search_size, int full_search, int *sad) { + int search_size_top, int search_size_bottom, + int full_search, int *sad) { int best_sad = INT_MAX; int this_sad; int d; int center, offset = 0; - int bw = search_size << 1; + int bw = search_size_top + search_size_bottom; if (full_search) { for (d = 0; d <= bw; d++) { @@ -2072,7 +2073,7 @@ } center = offset; *sad = best_sad; - return (center - (bw >> 1)); + return (center - search_size_top); } for (d = 0; d <= bw; d += 16) { @@ -2131,18 +2132,16 @@ } } *sad = best_sad; - return (center - (bw >> 1)); + return (center - search_size_top); } // A special fast version of motion search used in rt mode. // The search window along columns and row is given by: // +/- me_search_size_col/row. -unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, - int mi_col, const MV *ref_mv, - unsigned int *y_sad_zero, - int me_search_size_col, - int me_search_size_row) { +unsigned int av1_int_pro_motion_estimation( + const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, + int mi_col, const MV *ref_mv, unsigned int *y_sad_zero, + int me_search_size_col, int me_search_size_row, int is_var_part) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mi = xd->mi[0]; @@ -2156,15 +2155,44 @@ is_screen && bsize == cm->seq_params->sb_size; // Keep border a multiple of 16. const int border = (cpi->oxcf.border_in_pixels >> 4) << 4; - int search_size_width = me_search_size_col; - int search_size_height = me_search_size_row; - // Adjust based on boundary. - if (((mi_col << 2) - search_size_width < -border) || - ((mi_col << 2) + search_size_width > cm->width + border)) - search_size_width = border; - if (((mi_row << 2) - search_size_height < -border) || - ((mi_row << 2) + search_size_height > cm->height + border)) - search_size_height = border; + int search_size_width_left = me_search_size_col; + int search_size_width_right = me_search_size_col; + int search_size_height_top = me_search_size_row; + int search_size_height_bottom = me_search_size_row; + // Allow for larger search size for column/horizontal screen motion. + if (screen_scroll_superblock && is_var_part) { + if (((mi_col << 2) - search_size_width_left) < -border) + search_size_width_left = (mi_col << 2) + border; + if (((mi_col << 2) + search_size_width_right + bw) > cm->width + border) + search_size_width_right = cm->width + border - (mi_col << 2) - bw; + } else { + if (((mi_col << 2) - search_size_width_left < -border) || + ((mi_col << 2) + search_size_width_right + bw > cm->width + border)) { + search_size_width_left = AOMMIN(border, (mi_col << 2) + border); + search_size_width_right = + AOMMIN(border, cm->width + border - (mi_col << 2) - bw); + } + } + // Allow for larger search size for row/vertical screen motion. + if (screen_scroll_superblock && is_var_part) { + if (((mi_row << 2) - search_size_height_top) < -border) + search_size_height_top = (mi_row << 2) + border; + if (((mi_row << 2) + search_size_height_bottom + bh) > cm->height + border) + search_size_height_bottom = cm->height + border - (mi_row << 2) - bh; + } else { + if (((mi_row << 2) - search_size_height_top < -border) || + ((mi_row << 2) + search_size_height_bottom + bh > + cm->height + border)) { + search_size_height_top = AOMMIN(border, (mi_row << 2) + border); + search_size_height_bottom = + AOMMIN(border, cm->height + border - (mi_row << 2) - bh); + } + } + // Make search_size_width/height_left/right/top/bottom multiple of 16. + search_size_width_left &= ~15; + search_size_width_right &= ~15; + search_size_height_top &= ~15; + search_size_height_bottom &= ~15; const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; @@ -2203,8 +2231,10 @@ } return best_sad; } - const int width_ref_buf = (search_size_width << 1) + bw; - const int height_ref_buf = (search_size_height << 1) + bh; + const int width_ref_buf = + search_size_width_left + search_size_width_right + bw; + const int height_ref_buf = + search_size_height_top + search_size_height_bottom + bh; int16_t *hbuf = (int16_t *)aom_malloc(width_ref_buf * sizeof(*hbuf)); int16_t *vbuf = (int16_t *)aom_malloc(height_ref_buf * sizeof(*vbuf)); int16_t *src_hbuf = (int16_t *)aom_malloc(bw * sizeof(*src_hbuf)); @@ -2219,12 +2249,12 @@ } // Set up prediction 1-D reference set for rows. - ref_buf = xd->plane[0].pre[0].buf - search_size_width; + ref_buf = xd->plane[0].pre[0].buf - search_size_width_left; aom_int_pro_row(hbuf, ref_buf, ref_stride, width_ref_buf, bh, row_norm_factor); // Set up prediction 1-D reference set for cols - ref_buf = xd->plane[0].pre[0].buf - search_size_height * ref_stride; + ref_buf = xd->plane[0].pre[0].buf - search_size_height_top * ref_stride; aom_int_pro_col(vbuf, ref_buf, ref_stride, bw, height_ref_buf, col_norm_factor); @@ -2234,12 +2264,12 @@ aom_int_pro_col(src_vbuf, src_buf, src_stride, bw, bh, col_norm_factor); // Find the best match per 1-D search - best_int_mv->as_fullmv.col = - av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize], - search_size_width, full_search, &best_sad_col); - best_int_mv->as_fullmv.row = - av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize], - search_size_height, full_search, &best_sad_row); + best_int_mv->as_fullmv.col = av1_vector_match( + hbuf, src_hbuf, mi_size_wide_log2[bsize], search_size_width_left, + search_size_width_right, full_search, &best_sad_col); + best_int_mv->as_fullmv.row = av1_vector_match( + vbuf, src_vbuf, mi_size_high_log2[bsize], search_size_height_top, + search_size_height_bottom, full_search, &best_sad_row); // For screen: select between horiz or vert motion. if (is_screen) {
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h index 05cbf87..d268481 100644 --- a/av1/encoder/mcomp.h +++ b/av1/encoder/mcomp.h
@@ -240,12 +240,13 @@ int av1_init_search_range(int size); int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl, - int search_size, int full_search, int *sad); + int search_size_top, int search_size_bottom, + int full_search, int *sad); unsigned int av1_int_pro_motion_estimation( const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *ref_mv, unsigned int *y_sad_zero, - int me_search_size_col, int me_search_size_row); + int me_search_size_col, int me_search_size_row, int is_var_part); int av1_refining_search_8p_c(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv, FULLPEL_MV *best_mv);
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c index 14581d7..49953d0 100644 --- a/av1/encoder/nonrd_pickmode.c +++ b/av1/encoder/nonrd_pickmode.c
@@ -331,7 +331,7 @@ MV ref_mv = av1_get_ref_mv(x, 0).as_mv; tmp_sad = av1_int_pro_motion_estimation( cpi, x, bsize, mi_row, mi_col, &ref_mv, &y_sad_zero, me_search_size_col, - me_search_size_row); + me_search_size_row, 0); if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1; @@ -3591,7 +3591,7 @@ if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && x->content_state_sb.source_sad_nonrd != kZeroSad && bsize <= BLOCK_16X16) { - unsigned int thresh_sse = cpi->rc.high_source_sad ? 15000 : 200000; + unsigned int thresh_sse = cpi->rc.high_source_sad ? 15000 : 100000; unsigned int thresh_source_var = cpi->rc.high_source_sad ? 50 : 200; unsigned int best_sse_inter_motion = (unsigned int)(search_state.best_rdc.sse >> @@ -3622,7 +3622,7 @@ x->content_state_sb.source_sad_nonrd != kZeroSad && !cpi->rc.high_source_sad && (cpi->rc.high_motion_content_screen_rtc || - cpi->rc.frame_source_sad < 10000); + cpi->rc.frame_source_sad < 1000); bool try_palette = enable_palette( cpi, is_mode_intra(best_pickmode->best_mode), bsize, x->source_variance,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c index 491e7a5..646219e 100644 --- a/av1/encoder/ratectrl.c +++ b/av1/encoder/ratectrl.c
@@ -3246,12 +3246,12 @@ unsigned int best_sad; int best_sad_col, best_sad_row; // Find the best match per 1-D search - *best_intmv_col = - av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize], - search_size_width, full_search, &best_sad_col); - *best_intmv_row = - av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize], - search_size_height, full_search, &best_sad_row); + *best_intmv_col = av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize], + search_size_width, search_size_width, + full_search, &best_sad_col); + *best_intmv_row = av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize], + search_size_height, search_size_height, + full_search, &best_sad_row); if (best_sad_col < best_sad_row) { *best_intmv_row = 0; best_sad = best_sad_col;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c index d09b0ca..b0c1a5b 100644 --- a/av1/encoder/var_based_part.c +++ b/av1/encoder/var_based_part.c
@@ -1340,10 +1340,8 @@ MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mi = xd->mi[0]; const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; - const int increase_col_sw = source_sad_nonrd > kMedSad && - !cpi->rc.high_motion_content_screen_rtc && - (cpi->svc.temporal_layer_id == 0 || - cpi->rc.num_col_blscroll_last_tl0 > 2); + const int increase_col_sw = + source_sad_nonrd > kMedSad && !cpi->rc.high_motion_content_screen_rtc; int me_search_size_col = is_screen ? increase_col_sw ? 512 : 96 : block_size_wide[cm->seq_params->sb_size] >> 1; @@ -1352,15 +1350,14 @@ int me_search_size_row = is_screen ? source_sad_nonrd > kMedSad ? 512 : 192 : block_size_high[cm->seq_params->sb_size] >> 1; - if (cm->width * cm->height >= 3840 * 2160 && - cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) { + if (cm->width * cm->height >= 3840 * 2160) { me_search_size_row = me_search_size_row << 1; me_search_size_col = me_search_size_col << 1; } unsigned int y_sad_zero; *y_sad = av1_int_pro_motion_estimation( cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero, - me_search_size_col, me_search_size_row); + me_search_size_col, me_search_size_row, 1); // The logic below selects whether the motion estimated in the // int_pro_motion() will be used in nonrd_pickmode. Only do this // for screen for now.