Speedup handle_inter_intra_mode 1. Add wedge rate to best_interintra_rd 2. If after the motion search, mv is unchanged, skip the av1_build_inter_predictors_sby. 3. Split the finial av1_build_inter_predictors_sb, do av1_build_inter_predictors_sby only when necessary. For encoder, about 0.2% faster shows by encoding 20 frame of BasketballDrill_832x480_50.y4m at 800kbps on speed 1 with no performance change. ( 211917 ms -> 211470 ms) Change-Id: I4dfd26061842b99475d3a68233600eae1a890213
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 9a1d1f9..4d3579d 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -8437,7 +8437,8 @@ const int *const interintra_mode_cost = x->interintra_mode_cost[size_group_lookup[bsize]]; const int_mv mv0 = mbmi->mv[0]; - + const int is_wedge_used = is_interintra_wedge_used(bsize); + int rwedge = is_wedge_used ? x->wedge_interintra_cost[bsize][0] : 0; mbmi->ref_frame[1] = NONE_FRAME; xd->plane[0].dst.buf = tmp_buf; xd->plane[0].dst.stride = bw; @@ -8477,22 +8478,15 @@ rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum); + rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum + rwedge, dist_sum); best_interintra_rd = rd; - if (ref_best_rd < INT64_MAX && (best_interintra_rd >> 1) > ref_best_rd) { return -1; } - if (is_interintra_wedge_used(bsize)) { - int64_t best_interintra_rd_nowedge = INT64_MAX; + if (is_wedge_used) { + int64_t best_interintra_rd_nowedge = rd; int64_t best_interintra_rd_wedge = INT64_MAX; int_mv tmp_mv; - InterpFilters backup_interp_filters = mbmi->interp_filters; - int rwedge = x->wedge_interintra_cost[bsize][0]; - if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum + rwedge, dist_sum); - best_interintra_rd_nowedge = rd; - // Disable wedge search if source variance is small if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) { mbmi->use_wedge_interintra = 1; @@ -8505,6 +8499,7 @@ best_interintra_rd_wedge += RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0); + rd = INT64_MAX; // Refine motion vector. if (have_newmv_in_inter_mode(mbmi->mode)) { // get negative of mask @@ -8514,20 +8509,18 @@ compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, mi_col, intrapred, mask, bw, &tmp_rate_mv, 0); - mbmi->mv[0].as_int = tmp_mv.as_int; - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); - model_rd_fn[MODELRD_LEGACY](cpi, bsize, x, xd, 0, 0, mi_row, mi_col, - &rate_sum, &dist_sum, &tmp_skip_txfm_sb, - &tmp_skip_sse_sb, NULL, NULL, NULL); - rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge, - dist_sum); - if (rd >= best_interintra_rd_wedge) { - tmp_mv.as_int = mv0.as_int; - tmp_rate_mv = rate_mv; - mbmi->interp_filters = backup_interp_filters; - av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); + if (mbmi->mv[0].as_int != tmp_mv.as_int) { + mbmi->mv[0].as_int = tmp_mv.as_int; + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, + bsize); + model_rd_fn[MODELRD_LEGACY](cpi, bsize, x, xd, 0, 0, mi_row, mi_col, + &rate_sum, &dist_sum, &tmp_skip_txfm_sb, + &tmp_skip_sse_sb, NULL, NULL, NULL); + rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge, + dist_sum); } - } else { + } + if (rd >= best_interintra_rd_wedge) { tmp_mv.as_int = mv0.as_int; tmp_rate_mv = rate_mv; av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); @@ -8546,14 +8539,15 @@ } else { mbmi->use_wedge_interintra = 0; mbmi->mv[0].as_int = mv0.as_int; - mbmi->interp_filters = backup_interp_filters; + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); } } else { mbmi->use_wedge_interintra = 0; } } // if (is_interintra_wedge_used(bsize)) - restore_dst_buf(xd, *orig_dst, num_planes); - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); + if (num_planes > 1) { + av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, orig_dst, bsize); + } return 0; }