Speed up av1_get_switchable_rate
The switchable_ctx for each filter during
interpolation filter type search are always
the same, no need to call av1_get_switchable_rate
repetitively.
For encoder, about 0.3% faster shows by encoding
20 frame of BasketballDrill_832x480_50.y4m.
( 510848 ms -> 509604 ms)
a) gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
b) CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz
c) Config cmd
cmake ../ -DENABLE_CCACHE=1 -DCONFIG_LOWBITDEPTH=1
d) Test cmd:
./aomenc --cpu-used=1 --end-usage=vbr \
--target-bitrate=800 --limit=20
Change-Id: I38d4ae3cadf36c38ecb076bd491a8c0bf0abf40d
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 17f23e5..9f8be95 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -44,9 +44,6 @@
#define RD_THRESH_POW 1.25
-// Factor to weigh the rate for switchable interp filters.
-#define SWITCHABLE_INTERP_RATE_FACTOR 1
-
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for block size.
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 281b676..692367d 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -43,6 +43,9 @@
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
+// Factor to weigh the rate for switchable interp filters.
+#define SWITCHABLE_INTERP_RATE_FACTOR 1
+
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code.
typedef enum {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 9ad1e32..a14f876 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7580,12 +7580,24 @@
restore_dst_buf(xd, *dst_bufs[0], num_planes);
}
+static INLINE int get_switchable_rate(MACROBLOCK *const x,
+ const InterpFilters filters,
+ const int ctx[2]) {
+ int inter_filter_cost;
+ const InterpFilter filter0 = av1_extract_interp_filter(filters, 0);
+ const InterpFilter filter1 = av1_extract_interp_filter(filters, 1);
+ inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0];
+ inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1];
+ return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
+}
+
// calculate the rdcost of given interpolation_filter
static INLINE int64_t interpolation_filter_rd(
MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd,
int *const switchable_rate, int *const skip_txfm_sb,
- int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx) {
+ int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx,
+ const int switchable_ctx[2]) {
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
@@ -7595,7 +7607,8 @@
const InterpFilters last_best = mbmi->interp_filters;
mbmi->interp_filters = filter_sets[filter_idx];
- const int tmp_rs = av1_get_switchable_rate(cm, x, xd);
+ const int tmp_rs =
+ get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate, &tmp_dist,
&tmp_skip_sb, &tmp_skip_sse, NULL, NULL, NULL);
@@ -7677,7 +7690,11 @@
if (!need_search || match_found == -1) {
set_default_interp_filters(mbmi, assign_filter);
}
- *switchable_rate = av1_get_switchable_rate(cm, x, xd);
+ int switchable_ctx[2];
+ switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0);
+ switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
+ *switchable_rate =
+ get_switchable_rate(x, mbmi->interp_filters, switchable_ctx);
av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate, &tmp_dist,
skip_txfm_sb, skip_sse_sb, NULL, NULL, NULL);
@@ -7704,7 +7721,7 @@
for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i)) {
+ dst_bufs, i, switchable_ctx)) {
best_dual_mode = i;
}
}
@@ -7713,7 +7730,7 @@
i += SWITCHABLE_FILTERS) {
interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i);
+ dst_bufs, i, switchable_ctx);
}
} else {
// EIGHTTAP_REGULAR mode is calculated beforehand
@@ -7725,7 +7742,7 @@
}
interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd,
switchable_rate, skip_txfm_sb, skip_sse_sb,
- dst_bufs, i);
+ dst_bufs, i, switchable_ctx);
}
}
swap_dst_buf(xd, dst_bufs, num_planes);