Speed up av1_get_switchable_rate The switchable_ctx for each filter during interpolation filter type search are always the same, no need to call av1_get_switchable_rate repetitively. For encoder, about 0.3% faster shows by encoding 20 frame of BasketballDrill_832x480_50.y4m. ( 510848 ms -> 509604 ms) a) gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 b) CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz c) Config cmd cmake ../ -DENABLE_CCACHE=1 -DCONFIG_LOWBITDEPTH=1 d) Test cmd: ./aomenc --cpu-used=1 --end-usage=vbr \ --target-bitrate=800 --limit=20 Change-Id: I38d4ae3cadf36c38ecb076bd491a8c0bf0abf40d
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c index 17f23e5..9f8be95 100644 --- a/av1/encoder/rd.c +++ b/av1/encoder/rd.c
@@ -44,9 +44,6 @@ #define RD_THRESH_POW 1.25 -// Factor to weigh the rate for switchable interp filters. -#define SWITCHABLE_INTERP_RATE_FACTOR 1 - // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size.
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h index 281b676..692367d 100644 --- a/av1/encoder/rd.h +++ b/av1/encoder/rd.h
@@ -43,6 +43,9 @@ #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 +// Factor to weigh the rate for switchable interp filters. +#define SWITCHABLE_INTERP_RATE_FACTOR 1 + // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code. typedef enum {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 9ad1e32..a14f876 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -7580,12 +7580,24 @@ restore_dst_buf(xd, *dst_bufs[0], num_planes); } +static INLINE int get_switchable_rate(MACROBLOCK *const x, + const InterpFilters filters, + const int ctx[2]) { + int inter_filter_cost; + const InterpFilter filter0 = av1_extract_interp_filter(filters, 0); + const InterpFilter filter1 = av1_extract_interp_filter(filters, 1); + inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0]; + inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1]; + return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; +} + // calculate the rdcost of given interpolation_filter static INLINE int64_t interpolation_filter_rd( MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb, - int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx) { + int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx, + const int switchable_ctx[2]) { const AV1_COMMON *cm = &cpi->common; const int num_planes = av1_num_planes(cm); MACROBLOCKD *const xd = &x->e_mbd; @@ -7595,7 +7607,8 @@ const InterpFilters last_best = mbmi->interp_filters; mbmi->interp_filters = filter_sets[filter_idx]; - const int tmp_rs = av1_get_switchable_rate(cm, x, xd); + const int tmp_rs = + get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate, &tmp_dist, &tmp_skip_sb, &tmp_skip_sse, NULL, NULL, NULL); @@ -7677,7 +7690,11 @@ if (!need_search || match_found == -1) { set_default_interp_filters(mbmi, assign_filter); } - *switchable_rate = av1_get_switchable_rate(cm, x, xd); + int switchable_ctx[2]; + switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0); + switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1); + *switchable_rate = + get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate, &tmp_dist, skip_txfm_sb, skip_sse_sb, NULL, NULL, NULL); @@ -7704,7 +7721,7 @@ for (i = 1; i < SWITCHABLE_FILTERS; ++i) { if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i)) { + dst_bufs, i, switchable_ctx)) { best_dual_mode = i; } } @@ -7713,7 +7730,7 @@ i += SWITCHABLE_FILTERS) { interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i); + dst_bufs, i, switchable_ctx); } } else { // EIGHTTAP_REGULAR mode is calculated beforehand @@ -7725,7 +7742,7 @@ } interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i); + dst_bufs, i, switchable_ctx); } } swap_dst_buf(xd, dst_bufs, num_planes);