Enable early termination in the recursive transform block search
It makes the encoder 5% faster for CIF clips and 12% faster for
1080p clips.
Change-Id: I073408dbb4d50675a79db8794fe73975ac957b91
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index ffd84e8..672c73e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1288,7 +1288,8 @@
TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
int *rate, int64_t *dist,
- int64_t *bsse, int *skip) {
+ int64_t *bsse, int *skip,
+ int64_t ref_best_rd, int *is_cost_valid) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
struct macroblock_plane *const p = &x->plane[plane];
@@ -1306,7 +1307,12 @@
int64_t sum_rd = INT64_MAX;
int sum_rate = vp10_cost_bit(128, 1);
int all_skip = 1;
- TX_SIZE swap_tx_size = TX_SIZES;
+ int tmp_eob = 0;
+
+ if (ref_best_rd < 0) {
+ *is_cost_valid = 0;
+ return;
+ }
switch (tx_size) {
case TX_4X4:
@@ -1344,6 +1350,16 @@
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
return;
+ if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
+ mbmi->inter_tx_size[tx_idx] = tx_size;
+ tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ if (tx_size > TX_4X4)
+ *rate += vp10_cost_bit(128, 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ tmp_eob = p->eobs[block];
+ }
+
if (tx_size > TX_4X4) {
BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
int bsl = b_height_log2_lookup[bsize];
@@ -1353,6 +1369,8 @@
int64_t this_dist;
int64_t this_bsse;
int this_skip;
+ int this_cost_valid = 1;
+ int64_t tmp_rd = 0;
--bsl;
for (i = 0; i < 4; ++i) {
@@ -1361,36 +1379,30 @@
select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc,
plane, block + i * sub_step, tx_size - 1,
plane_bsize, ta, tl, &this_rate, &this_dist,
- &this_bsse, &this_skip);
+ &this_bsse, &this_skip,
+ ref_best_rd - tmp_rd, &this_cost_valid);
sum_rate += this_rate;
sum_dist += this_dist;
sum_bsse += this_bsse;
all_skip &= this_skip;
+ tmp_rd += RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
+ if (this_rd < tmp_rd)
+ break;
}
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- }
-
- if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
- swap_tx_size = mbmi->inter_tx_size[tx_idx];
- mbmi->inter_tx_size[tx_idx] = tx_size;
-
- tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block,
- plane_bsize, coeff_ctx, rate, dist, bsse, skip);
- if (tx_size > TX_4X4)
- *rate += vp10_cost_bit(128, 0);
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ if (this_cost_valid)
+ sum_rd = tmp_rd;
}
if (this_rd < sum_rd) {
for (i = 0; i < (1 << tx_size); ++i)
- pta[i] = ptl[i] = !(p->eobs[block] == 0);
+ pta[i] = ptl[i] = !(tmp_eob == 0);
+ mbmi->inter_tx_size[tx_idx] = tx_size;
mbmi->tx_size = tx_size;
} else {
*rate = sum_rate;
*dist = sum_dist;
*bsse = sum_bsse;
*skip = all_skip;
- mbmi->inter_tx_size[tx_idx] = swap_tx_size;
}
}
@@ -1400,7 +1412,7 @@
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
int is_cost_valid = 1;
- int64_t this_rd;
+ int64_t this_rd = 0;
if (ref_best_rd < 0)
is_cost_valid = 0;
@@ -1431,12 +1443,14 @@
for (idx = 0; idx < mi_width; idx += bh) {
select_tx_block(cpi, x, idy, idx, 0, block,
max_txsize_lookup[plane_bsize], plane_bsize,
- ctxa, ctxl, &pnrate, &pndist, &pnsse, &pnskip);
+ ctxa, ctxl, &pnrate, &pndist, &pnsse, &pnskip,
+ ref_best_rd - this_rd, &is_cost_valid);
*rate += pnrate;
*distortion += pndist;
*sse += pnsse;
*skippable &= pnskip;
-
+ this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
+ RDCOST(x->rdmult, x->rddiv, 0, pnsse));
block += step;
}
}