Add a speed feature for intra tx type search
Add a speed feature to seperate prediction mode and tx type search
for intra modes: search for best intra prediction mode with fixed
default tx type first, then choose the best tx type for the
selected mode.
Coding performance drop:
baseline
lowres 0.10% midres 0.08% hdres 0.14%
with ext-tx
lowres 0.14% midres 0.25% hdres 0.20%
Speed improvement is 20% for baseline and 17% for ext-tx.
It is turned on for speed >= 1.
Change-Id: Ia5e8d39e8a4e2e42c521bfde938f8b6a98ab24f9
diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index 1ddc3d8..9412526 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -180,6 +180,9 @@
// Strong color activity detection. Used in RTC coding mode to enhance
// the visual quality at the boundary of moving color objects.
uint8_t color_sensitivity[2];
+
+ // use default transform and skip transform type search for intra modes
+ int use_default_intra_tx_type;
};
#ifdef __cplusplus
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index da3efe6..58a7042 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -780,7 +780,7 @@
const int tx_set_1D[TX_TYPES_1D] = {0};
#endif
- switch (cpi->sf.tx_type_search) {
+ switch (cpi->sf.tx_type_search.prune_mode) {
case NO_PRUNE:
return 0;
break;
@@ -1576,6 +1576,9 @@
for (n = start_tx; n >= end_tx; --n) {
if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
continue;
+ if (!is_inter && x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, n))
+ continue;
if (max_tx_size == TX_32X32 && n == TX_4X4)
continue;
#if CONFIG_EXT_TX
@@ -1583,7 +1586,7 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
- if (cpi->sf.tx_type_search > 0) {
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
if (!do_tx_type_search(tx_type, prune))
continue;
}
@@ -1598,7 +1601,7 @@
#else // CONFIG_EXT_TX
if (n >= TX_32X32 && tx_type != DCT_DCT)
continue;
- if (is_inter && cpi->sf.tx_type_search > 0 &&
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
!do_tx_type_search(tx_type, prune))
continue;
#endif // CONFIG_EXT_TX
@@ -1674,7 +1677,7 @@
ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
#endif // CONFIG_EXT_TX
- if (is_inter && cpi->sf.tx_type_search > 0)
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
#if CONFIG_EXT_TX
prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
#else
@@ -1687,11 +1690,14 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
- if (cpi->sf.tx_type_search > 0) {
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
if (!do_tx_type_search(tx_type, prune))
continue;
}
} else {
+ if (x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
continue;
@@ -1740,6 +1746,9 @@
if (mbmi->tx_size < TX_32X32 &&
!xd->lossless[mbmi->segment_id]) {
for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ if (!is_inter && x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
mbmi->tx_type = tx_type;
txfm_rd_in_plane(x,
cpi,
@@ -1750,7 +1759,8 @@
continue;
if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
+ !do_tx_type_search(tx_type, prune))
continue;
} else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
@@ -1817,7 +1827,7 @@
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
- if (is_inter && cpi->sf.tx_type_search > 0)
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
// passing -1 in for tx_type indicates that all 1D
// transforms should be considered for pruning
prune = prune_tx_types(cpi, bs, x, xd, -1);
@@ -2835,7 +2845,7 @@
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize,
int64_t best_rd) {
- PREDICTION_MODE mode;
+ uint8_t mode_idx;
PREDICTION_MODE mode_selected = DC_PRED;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi[0];
@@ -2864,6 +2874,7 @@
const MODE_INFO *left_mi = xd->left_mi;
const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
+ const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
bmode_costs = cpi->y_mode_costs[A][L];
@@ -2889,15 +2900,28 @@
if (left_mi)
palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
+ x->use_default_intra_tx_type = 1;
+ else
+ x->use_default_intra_tx_type = 0;
+
/* Y Search for intra prediction mode */
- for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
- mic->mbmi.mode = mode;
+ for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
+ if (mode_idx == FINAL_MODE_SEARCH) {
+ if (x->use_default_intra_tx_type == 0)
+ break;
+ mic->mbmi.mode = mode_selected;
+ x->use_default_intra_tx_type = 0;
+ } else {
+ mic->mbmi.mode = mode_idx;
+ }
#if CONFIG_EXT_INTRA
- is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
- if (is_directional_mode && directional_mode_skip_mask[mode])
+ is_directional_mode =
+ (mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
+ if (is_directional_mode && directional_mode_skip_mask[mic->mbmi.mode])
continue;
if (is_directional_mode) {
- rate_overhead = bmode_costs[mode] +
+ rate_overhead = bmode_costs[mic->mbmi.mode] +
write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
this_rate_tokenonly = INT_MAX;
this_rd =
@@ -2917,7 +2941,7 @@
if (this_rate_tokenonly == INT_MAX)
continue;
- this_rate = this_rate_tokenonly + bmode_costs[mode];
+ this_rate = this_rate_tokenonly + bmode_costs[mic->mbmi.mode];
if (!xd->lossless[xd->mi[0]->mbmi.segment_id]) {
// super_block_yrd above includes the cost of the tx_size in the
@@ -2928,12 +2952,12 @@
cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
[mic->mbmi.tx_size];
}
- if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
+ if (cpi->common.allow_screen_content_tools && mic->mbmi.mode == DC_PRED)
this_rate +=
vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
[palette_ctx], 0);
#if CONFIG_EXT_INTRA
- if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
+ if (mic->mbmi.mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
if (is_directional_mode) {
int p_angle;
@@ -2950,7 +2974,7 @@
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
- mode_selected = mode;
+ mode_selected = mic->mbmi.mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
#if CONFIG_EXT_INTRA
@@ -3444,7 +3468,7 @@
int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
#endif // CONFIG_EXT_TX
- if (is_inter && cpi->sf.tx_type_search > 0)
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
#if CONFIG_EXT_TX
prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
#else
@@ -3465,7 +3489,7 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
- if (cpi->sf.tx_type_search > 0) {
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
if (!do_tx_type_search(tx_type, prune))
continue;
}
@@ -3480,7 +3504,7 @@
#else // CONFIG_EXT_TX
if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT)
continue;
- if (is_inter && cpi->sf.tx_type_search > 0 &&
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
!do_tx_type_search(tx_type, prune))
continue;
#endif // CONFIG_EXT_TX
@@ -8308,6 +8332,7 @@
MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
+ const int FINAL_MODE_SEARCH = MAX_MODES;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
@@ -8597,8 +8622,14 @@
midx = end_pos;
}
- for (midx = 0; midx < MAX_MODES; ++midx) {
- int mode_index = mode_map[midx];
+
+ if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
+ x->use_default_intra_tx_type = 1;
+ else
+ x->use_default_intra_tx_type = 0;
+
+ for (midx = 0; midx <= FINAL_MODE_SEARCH; ++midx) {
+ int mode_index;
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
int disable_skip = 0;
@@ -8617,6 +8648,17 @@
uint8_t ref_frame_type;
#endif
+ if (midx == FINAL_MODE_SEARCH) {
+ if (!is_inter_mode(best_mbmode.mode) && best_mode_index >= 0 &&
+ x->use_default_intra_tx_type == 1) {
+ mode_index = best_mode_index;
+ x->use_default_intra_tx_type = 0;
+ } else {
+ break;
+ }
+ } else {
+ mode_index = mode_map[midx];
+ }
this_mode = vp10_mode_order[mode_index].mode;
ref_frame = vp10_mode_order[mode_index].ref_frame[0];
second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index b766cae..f395fac 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -159,7 +159,8 @@
sf->tx_size_search_breakout = 1;
sf->partition_search_breakout_rate_thr = 80;
- sf->tx_type_search = PRUNE_ONE;
+ sf->tx_type_search.prune_mode = PRUNE_ONE;
+ sf->tx_type_search.fast_intra_tx_type_search = 1;
// Use transform domain distortion.
// Note var-tx expt always uses pixel domain distortion.
sf->use_transform_domain_distortion = 1;
@@ -183,7 +184,7 @@
sf->allow_partition_search_skip = 1;
sf->use_upsampled_references = 0;
#if CONFIG_EXT_TX
- sf->tx_type_search = PRUNE_TWO;
+ sf->tx_type_search.prune_mode = PRUNE_TWO;
#endif
}
@@ -496,7 +497,8 @@
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->partition_search_type = SEARCH_PARTITION;
- sf->tx_type_search = NO_PRUNE;
+ sf->tx_type_search.prune_mode = NO_PRUNE;
+ sf->tx_type_search.fast_intra_tx_type_search = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index ca6adbe..db57434 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -190,6 +190,11 @@
// eliminates two tx types in each direction
PRUNE_TWO = 2,
#endif
+} TX_TYPE_PRUNE_MODE;
+
+typedef struct {
+ TX_TYPE_PRUNE_MODE prune_mode;
+ int fast_intra_tx_type_search;
} TX_TYPE_SEARCH;
typedef enum {
@@ -310,6 +315,7 @@
PARTITION_SEARCH_TYPE partition_search_type;
TX_TYPE_SEARCH tx_type_search;
+
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;