Adding speed feature interface for ext tx search
This sets up the interface for 3 speed features that progressively
eliminate a greater number of transforms in ext tx using
pre-trained support vector machines.
Each speed feature still needs to be implemented.
Change-Id: Ia508aeadc0cffdc080fb227f357a5d1dfbca08e2
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 96edc0f..b54e402 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -354,6 +354,126 @@
}
}
+#if CONFIG_EXT_TX
+typedef enum {
+ DCT_1D = 0,
+ ADST_1D = 1,
+ FLIPADST_1D = 2,
+ DST_1D = 3,
+ TX_TYPES_1D = 4,
+} TX_TYPE_1D;
+
+static int prune_two_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 3;
+}
+
+static int prune_three_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 7;
+}
+
+#endif // CONFIG_EXT_TX
+
+static int prune_one_for_sby(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ (void) cpi;
+ (void) bsize;
+ (void) x;
+ (void) xd;
+ return 1;
+}
+
+static int prune_tx_types(const VP10_COMP *cpi,
+ BLOCK_SIZE bsize,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd) {
+ switch (cpi->sf.tx_type_search) {
+ case NO_PRUNE:
+ return 0;
+ break;
+ case PRUNE_ONE :
+ return prune_one_for_sby(cpi, bsize, x, xd);
+ break;
+ #if CONFIG_EXT_TX
+ case PRUNE_TWO :
+ return prune_two_for_sby(cpi, bsize, x, xd);
+ break;
+ case PRUNE_THREE :
+ return prune_three_for_sby(cpi, bsize, x, xd);
+ break;
+ #endif
+ }
+ assert(0);
+ return 0;
+}
+
+static int do_tx_type_search(TX_TYPE tx_type,
+ int prune) {
+// TODO(sarahparker) implement for non ext tx
+#if CONFIG_EXT_TX
+ static TX_TYPE_1D vtx_tab[TX_TYPES] = {
+ DCT_1D,
+ ADST_1D,
+ DCT_1D,
+ ADST_1D,
+ FLIPADST_1D,
+ DCT_1D,
+ FLIPADST_1D,
+ ADST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ DCT_1D,
+ DST_1D,
+ ADST_1D,
+ DST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ };
+ static TX_TYPE_1D htx_tab[TX_TYPES] = {
+ DCT_1D,
+ DCT_1D,
+ ADST_1D,
+ ADST_1D,
+ DCT_1D,
+ FLIPADST_1D,
+ FLIPADST_1D,
+ FLIPADST_1D,
+ ADST_1D,
+ DCT_1D,
+ DST_1D,
+ ADST_1D,
+ DST_1D,
+ FLIPADST_1D,
+ DST_1D,
+ DST_1D,
+ };
+ if (tx_type == IDTX)
+ return 1;
+ return !(((prune >> vtx_tab[tx_type]) & 1) |
+ ((prune >> (htx_tab[tx_type] + TX_TYPES_1D)) & 1));
+#else
+ // temporary to avoid compiler warnings
+ (void) tx_type;
+ (void) prune;
+ return 1;
+#endif
+}
+
static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
@@ -394,8 +514,10 @@
// low enough so that we can skip the mode search.
const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
- int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
- int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bw_shift = (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bh_shift = (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
+ int bw = 1 << bw_shift;
+ int bh = 1 << bh_shift;
int idx, idy;
int lw = b_width_log2_lookup[unit_size] + 2;
int lh = b_height_log2_lookup[unit_size] + 2;
@@ -406,7 +528,7 @@
for (idx = 0; idx < bw; ++idx) {
uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
- int block_idx = (idy << 1) + idx;
+ int block_idx = (idy << bw_shift) + idx;
int low_err_skip = 0;
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
@@ -988,11 +1110,14 @@
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int s0 = vp10_cost_bit(skip_prob, 0);
int s1 = vp10_cost_bit(skip_prob, 1);
+ const int is_inter = is_inter_block(mbmi);
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
- const int is_inter = is_inter_block(mbmi);
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bs, x, xd);
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
#if CONFIG_EXT_TX
@@ -1004,6 +1129,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -1011,15 +1145,15 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- continue;
- }
txfm_rd_in_plane(x,
cpi,
@@ -1067,12 +1201,15 @@
cpi->sf.use_fast_coef_costing);
if (r == INT_MAX)
continue;
- if (is_inter)
+ if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
if (s)
this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
else
@@ -1150,13 +1287,17 @@
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
- TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
+ const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
- const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bs, x, xd);
+
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
@@ -1187,6 +1328,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -1194,14 +1344,14 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
txfm_rd_in_plane(x,
cpi,
&r, &d, &s,
@@ -1233,12 +1383,15 @@
if (n < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
r != INT_MAX && !FIXED_TX_TYPE) {
- if (is_inter)
+ if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
}
#endif // CONFIG_EXT_TX
@@ -2803,9 +2956,13 @@
uint8_t best_blk_skip[256];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
int idx, idy;
+ int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
-#endif
+#endif // CONFIG_EXT_TX
+
+ if (is_inter && cpi->sf.tx_type_search > 0)
+ prune = prune_tx_types(cpi, bsize, x, xd);
*distortion = INT64_MAX;
*rate = INT_MAX;
@@ -2821,6 +2978,15 @@
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
+ if (cpi->sf.tx_type_search > 0) {
+ if (!do_tx_type_search(tx_type, prune))
+ continue;
+ } else if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ continue;
+ }
} else {
if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
@@ -2828,17 +2994,16 @@
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
+ if (ext_tx_set == 1 &&
+ tx_type >= DST_ADST && tx_type < IDTX &&
+ best_tx_type == DCT_DCT) {
+ tx_type = IDTX - 1;
+ break;
+ }
}
mbmi->tx_type = tx_type;
- if (ext_tx_set == 1 &&
- mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
- best_tx_type == DCT_DCT) {
- tx_type = IDTX - 1;
- break;
- }
-
inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
bsize, ref_best_rd);
@@ -2867,12 +3032,15 @@
if (max_tx_size < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
this_rate != INT_MAX) {
- if (is_inter)
+ if (is_inter) {
this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
- else
+ if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
this_rate += cpi->intra_tx_type_costs[max_tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
+ }
}
#endif // CONFIG_EXT_TX
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 1f70dcb..ec8acda 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -159,7 +159,7 @@
sf->tx_size_search_breakout = 1;
sf->partition_search_breakout_rate_thr = 80;
-
+ sf->tx_type_search = PRUNE_ONE;
// Use transform domain distortion.
// Note var-tx expt always uses pixel domain distortion.
sf->use_transform_domain_distortion = 1;
@@ -177,6 +177,9 @@
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
+#if CONFIG_EXT_TX
+ sf->tx_type_search = PRUNE_TWO;
+#endif
}
if (speed >= 3) {
@@ -195,6 +198,9 @@
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
+#if CONFIG_EXT_TX
+ sf->tx_type_search = PRUNE_THREE;
+#endif
}
if (speed >= 4) {
@@ -473,6 +479,7 @@
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->partition_search_type = SEARCH_PARTITION;
+ sf->tx_type_search = NO_PRUNE;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 4f931d8..fbb6988 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -182,6 +182,18 @@
} INTERP_FILTER_MASK;
typedef enum {
+ NO_PRUNE = 0,
+ // eliminates one tx type in vertical and horizontal direction
+ PRUNE_ONE = 1,
+#if CONFIG_EXT_TX
+ // eliminates two tx types in each direction
+ PRUNE_TWO = 2,
+ // eliminates three tx types in each direction
+ PRUNE_THREE = 3,
+#endif
+} TX_TYPE_SEARCH;
+
+typedef enum {
// Search partitions using RD criterion
SEARCH_PARTITION,
@@ -298,6 +310,7 @@
PARTITION_SEARCH_TYPE partition_search_type;
+ TX_TYPE_SEARCH tx_type_search;
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;