Refactor nonrd_use_partitioning
To avoid extra round on encode_b execution (which is needed for full
RDO) refactor nonrd_use_partitioning. This speed ups encoder for another
~5% on speed 8. No significant quality impact because of VERT and HORZ
partition absense
Change-Id: I1ad3f12bfd0b339236042da2717d9cb4878a043a
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 347579c..a03d8c4 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1951,128 +1951,54 @@
*dist = chosen_rdc.dist;
}
-// TODO(kyslov): now this is very similar to rd_use_partition (except that
-// doesn't do extra search arounf suggested partitioning)
-// consider passing a flag to select non-rd path (similar to
-// encode_sb_row)
static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, MB_MODE_INFO **mib,
TOKENEXTRA **tp, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon, PC_TREE *pc_tree) {
+ BLOCK_SIZE bsize, PC_TREE *pc_tree) {
AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int bs = mi_size_wide[bsize];
const int hbs = bs / 2;
- int i;
- const int pl = (bsize >= BLOCK_8X8)
- ? partition_plane_context(xd, mi_row, mi_col, bsize)
- : 0;
const PARTITION_TYPE partition =
(bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
: PARTITION_NONE;
const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
- RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
- RD_STATS last_part_rdc;
- PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
+ RD_STATS dummy_cost;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
assert(mi_size_wide[bsize] == mi_size_high[bsize]);
- av1_invalid_rd_stats(&last_part_rdc);
-
pc_tree->partitioning = partition;
xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
xd->left_txfm_context =
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
- save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
- if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- x->mb_energy = av1_log_block_var(cpi, x, bsize);
- }
switch (partition) {
case PARTITION_NONE:
- pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1);
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
+ PARTITION_NONE, bsize, &pc_tree->none, INT64_MAX, 1);
+ encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
+ &pc_tree->none, NULL);
break;
+ // TODO(kyslov@) Add HORZ and VERT partitions
case PARTITION_HORZ:
- pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
- 1);
- if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
- mi_row + hbs < cm->mi_rows) {
- RD_STATS tmp_rdc;
- const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
- av1_init_rd_stats(&tmp_rdc);
- update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col, subsize, NULL);
- pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
- PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
- INT64_MAX, 1);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
- last_part_rdc.rdcost += tmp_rdc.rdcost;
- }
- break;
case PARTITION_VERT:
- pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
- PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
- 1);
- if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
- mi_col + hbs < cm->mi_cols) {
- RD_STATS tmp_rdc;
- const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
- av1_init_rd_stats(&tmp_rdc);
- update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col, subsize, NULL);
- pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
- PARTITION_VERT, subsize,
- &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
- last_part_rdc.rdcost += tmp_rdc.rdcost;
- }
+ assert(0 && "Cannot yet handle non-square partition types");
break;
case PARTITION_SPLIT:
- last_part_rdc.rate = 0;
- last_part_rdc.dist = 0;
- last_part_rdc.rdcost = 0;
- for (i = 0; i < 4; i++) {
+ for (int i = 0; i < 4; i++) {
int x_idx = (i & 1) * hbs;
int y_idx = (i >> 1) * hbs;
int jj = i >> 1, ii = i & 0x01;
- RD_STATS tmp_rdc;
if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
continue;
-
- av1_init_rd_stats(&tmp_rdc);
nonrd_use_partition(
cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
- mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
- &tmp_rdc.dist, i != 3, pc_tree->split[i]);
- if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
- av1_invalid_rd_stats(&last_part_rdc);
- break;
- }
- last_part_rdc.rate += tmp_rdc.rate;
- last_part_rdc.dist += tmp_rdc.dist;
+ mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
}
break;
case PARTITION_VERT_A:
@@ -2084,37 +2010,8 @@
assert(0 && "Cannot handle extended partition types");
default: assert(0); break;
}
-
- if (last_part_rdc.rate < INT_MAX) {
- last_part_rdc.rate += x->partition_cost[pl][partition];
- last_part_rdc.rdcost =
- RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
- }
-
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
-
- // We must have chosen a partitioning and encoding or we'll fail later on.
- // No other opportunities for success.
- if (bsize == cm->seq_params.sb_size)
- assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX);
-
- if (do_recon) {
- if (bsize == cm->seq_params.sb_size) {
- // NOTE: To get estimate for rate due to the tokens, use:
- // int rate_coeffs = 0;
- // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
- // bsize, pc_tree, &rate_coeffs);
- x->cb_offset = 0;
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
- pc_tree, NULL);
- } else {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
- }
-
- *rate = last_part_rdc.rate;
- *dist = last_part_rdc.dist;
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
// Checks to see if a super block is on a horizontal image edge.
@@ -4797,8 +4694,9 @@
use_nonrd_mode) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col);
+ td->mb.cb_offset = 0;
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
- &dummy_rate, &dummy_dist, 1, pc_root);
+ pc_root);
} else {
const int orig_rdmult = cpi->rd.RDMULT;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 3cead91..1000fc9 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -227,36 +227,6 @@
set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
return 1;
}
-
- // Check vertical split.
- if (mi_row + block_height / 2 < cm->mi_rows) {
- BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
- get_variance(&vt.part_variances->vert[0]);
- get_variance(&vt.part_variances->vert[1]);
- if (vt.part_variances->vert[0].variance < threshold &&
- vt.part_variances->vert[1].variance < threshold &&
- get_plane_block_size(subsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y) < BLOCK_INVALID) {
- set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
- set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
- return 1;
- }
- }
- // Check horizontal split.
- if (mi_col + block_width / 2 < cm->mi_cols) {
- BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
- get_variance(&vt.part_variances->horz[0]);
- get_variance(&vt.part_variances->horz[1]);
- if (vt.part_variances->horz[0].variance < threshold &&
- vt.part_variances->horz[1].variance < threshold &&
- get_plane_block_size(subsize, xd->plane[1].subsampling_x,
- xd->plane[1].subsampling_y) < BLOCK_INVALID) {
- set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
- set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
- return 1;
- }
- }
-
return 0;
}
return 0;