Add speed feature use_fast_interpolation_filter...
Applies to speed >=1. Instead of searching all dual filter space
{R,Sm,Sh}x{R,Sm,Sh}, only check {R}x{R,Sm,Sh} followed by
{R,Sm,Sh}x{best of prev R,Sm,Sh}.
Saves ~6% of cycles by reducing av1_convolve_2d_sse2, with 0.023
overall psnr drop.
Change-Id: I82d7a6321b335293124a007ff4c87f0e260052e1
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 844ee23..55010c2 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7405,38 +7405,118 @@
int best_in_temp = 0;
InterpFilters best_filters = mbmi->interp_filters;
restore_dst_buf(xd, *tmp_dst);
- // EIGHTTAP_REGULAR mode is calculated beforehand
- for (i = 1; i < filter_set_size; ++i) {
+
+#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
+ if (cpi->sf.use_fast_interpolation_filter_search) {
int tmp_skip_sb = 0;
int64_t tmp_skip_sse = INT64_MAX;
int tmp_rs;
int64_t tmp_rd;
+
+ // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
+ int best_dual_mode = 0;
+ // Find best of {R}x{R,Sm,Sh}
+ // EIGHTTAP_REGULAR mode is calculated beforehand
+ for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
+ tmp_skip_sb = 0;
+ tmp_skip_sse = INT64_MAX;
+
+ mbmi->interp_filters =
+ av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
+
+ tmp_rs = av1_get_switchable_rate(cm, x, xd);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
+ bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
+ tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
+
+ if (tmp_rd < *rd) {
+ best_dual_mode = i;
+
+ *rd = tmp_rd;
+ *switchable_rate = av1_get_switchable_rate(cm, x, xd);
+ best_filters = mbmi->interp_filters;
+ *skip_txfm_sb = tmp_skip_sb;
+ *skip_sse_sb = tmp_skip_sse;
+ best_in_temp = !best_in_temp;
+ if (best_in_temp) {
+ restore_dst_buf(xd, *orig_dst);
+ } else {
+ restore_dst_buf(xd, *tmp_dst);
+ }
+ }
+ }
+
+ // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
+ for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
+ i += SWITCHABLE_FILTERS) {
+ tmp_skip_sb = 0;
+ tmp_skip_sse = INT64_MAX;
+
+ mbmi->interp_filters =
+ av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
+
+ tmp_rs = av1_get_switchable_rate(cm, x, xd);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
+ bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
+ tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
+
+ if (tmp_rd < *rd) {
+ *rd = tmp_rd;
+ *switchable_rate = av1_get_switchable_rate(cm, x, xd);
+ best_filters = mbmi->interp_filters;
+ *skip_txfm_sb = tmp_skip_sb;
+ *skip_sse_sb = tmp_skip_sse;
+ best_in_temp = !best_in_temp;
+ if (best_in_temp) {
+ restore_dst_buf(xd, *orig_dst);
+ } else {
+ restore_dst_buf(xd, *tmp_dst);
+ }
+ }
+ }
+ } else {
+#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
+ // EIGHTTAP_REGULAR mode is calculated beforehand
+ for (i = 1; i < filter_set_size; ++i) {
+ int tmp_skip_sb = 0;
+ int64_t tmp_skip_sse = INT64_MAX;
+ int tmp_rs;
+ int64_t tmp_rd;
#if CONFIG_DUAL_FILTER
- mbmi->interp_filters =
- av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
+ mbmi->interp_filters =
+ av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
#else
mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
#endif // CONFIG_DUAL_FILTER
- tmp_rs = av1_get_switchable_rate(cm, x, xd);
- av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
- &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
- tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
+ tmp_rs = av1_get_switchable_rate(cm, x, xd);
+ av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
+ bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
+ tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
- if (tmp_rd < *rd) {
- *rd = tmp_rd;
- *switchable_rate = av1_get_switchable_rate(cm, x, xd);
- best_filters = mbmi->interp_filters;
- *skip_txfm_sb = tmp_skip_sb;
- *skip_sse_sb = tmp_skip_sse;
- best_in_temp = !best_in_temp;
- if (best_in_temp) {
- restore_dst_buf(xd, *orig_dst);
- } else {
- restore_dst_buf(xd, *tmp_dst);
+ if (tmp_rd < *rd) {
+ *rd = tmp_rd;
+ *switchable_rate = av1_get_switchable_rate(cm, x, xd);
+ best_filters = mbmi->interp_filters;
+ *skip_txfm_sb = tmp_skip_sb;
+ *skip_sse_sb = tmp_skip_sse;
+ best_in_temp = !best_in_temp;
+ if (best_in_temp) {
+ restore_dst_buf(xd, *orig_dst);
+ } else {
+ restore_dst_buf(xd, *tmp_dst);
+ }
}
}
+#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
}
+#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
+
if (best_in_temp) {
restore_dst_buf(xd, *tmp_dst);
} else {
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4f75469..8833a7b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -141,6 +141,9 @@
#if CONFIG_EXT_PARTITION_TYPES
sf->prune_ext_partition_types_search = 1;
#endif // CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_DUAL_FILTER
+ sf->use_fast_interpolation_filter_search = 1;
+#endif // CONFIG_DUAL_FILTER
}
if (speed >= 2) {
@@ -453,6 +456,7 @@
// Set this at the appropriate speed levels
sf->use_transform_domain_distortion = 0;
sf->gm_search_type = GM_FULL_SEARCH;
+ sf->use_fast_interpolation_filter_search = 0;
if (oxcf->mode == GOOD
#if CONFIG_XIPHRC
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index b2e5d6e..fedefaa 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -498,6 +498,10 @@
int use_transform_domain_distortion;
GM_SEARCH_TYPE gm_search_type;
+
+ // Do limited interpolation filter search for dual filters, since best choice
+ // usually includes EIGHTTAP_REGULAR.
+ int use_fast_interpolation_filter_search;
} SPEED_FEATURES;
struct AV1_COMP;