Enable 2-pass coding block partition search
Obtain the most likely partition range from a first pass square
block base partition search. Use the constrained partition search
region for full rate-distortion optimization search in the second
pass.
Tested on pedestrian 1080p at 2000 kbps, it makes the encoding
speed 40% faster for speed 0 and 30% faster for speed 1. The
average coding performance loss is around 0.15%.
Change-Id: Ifc83d48e6413d1b887e68cd1962084e018a2258f
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 28fa1a7..3751c19 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -169,8 +169,14 @@
// to select transform kernel.
int rd_model;
+ // Indicate if the encoder is running in the first pass partition search.
+ // In that case, apply certain speed features therein to reduce the overhead
+ // cost in the first pass search.
int cb_partition_scan;
+ // Activate constrained coding block partition search range.
+ int use_cb_search_range;
+
// Also save RD info on the TX size search level for square TX sizes.
TX_SIZE_RD_RECORD
tx_size_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)];
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index a14e1f2..c99a9c3 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -23,6 +23,17 @@
struct AV1Common;
struct ThreadData;
+typedef enum {
+ // Search all the partition types in this plane.
+ SEARCH_FULL_PLANE = 0,
+ // Only search none_partition coding block.
+ NONE_PARTITION_PLANE = 1,
+ // Search all the partition types in this plane except split.
+ SEARCH_SAME_PLANE = 2,
+ // Skip search partition on this plane. Go split directly.
+ SPLIT_PLANE = 3,
+} CB_TREE_SEARCH;
+
// Structure to hold snapshot of coding context during the mode picking process
typedef struct {
MODE_INFO mic;
@@ -80,6 +91,7 @@
PICK_MODE_CONTEXT horizontal4[4];
PICK_MODE_CONTEXT vertical4[4];
#endif
+ CB_TREE_SEARCH cb_search_range;
struct PC_TREE *split[4];
} PC_TREE;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 0a9c189..015bf0c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2418,6 +2418,17 @@
}
#endif // CONFIG_DIST_8X8
+static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
+ pc_tree->partitioning = PARTITION_NONE;
+ pc_tree->cb_search_range = SEARCH_FULL_PLANE;
+
+ if (bsize >= BLOCK_8X8) {
+ BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ for (int idx = 0; idx < 4; ++idx)
+ reset_partition(pc_tree->split[idx], subsize);
+ }
+}
+
static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -2433,7 +2444,7 @@
PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
int tmp_partition_cost[PARTITION_TYPES];
BLOCK_SIZE subsize;
- RD_STATS this_rdc, sum_rdc, best_rdc;
+ RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc;
const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
int do_square_split = bsize_at_least_8x8;
const int pl = bsize_at_least_8x8
@@ -2508,6 +2519,8 @@
// PARTITION_NONE
if (partition_none_allowed) {
+ if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
+
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
#if CONFIG_EXT_PARTITION_TYPES
PARTITION_NONE,
@@ -2536,6 +2549,8 @@
best_rdc = this_rdc;
if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
+ pc_tree->cb_search_range = SEARCH_FULL_PLANE;
+
// If all y, u, v transform blocks in this partition are skippable, and
// the dist & rate are within the thresholds, the partition search is
// terminated for current branch of the partition search tree.
@@ -2556,6 +2571,7 @@
if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
int64_t temp_best_rdcost = best_rdc.rdcost;
+ pn_rdc = best_rdc;
#if CONFIG_DIST_8X8
uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
@@ -2623,6 +2639,26 @@
}
}
+ int has_split = 0;
+ if (pc_tree->partitioning == PARTITION_SPLIT) {
+ for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) {
+ if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT)
+ ++has_split;
+ }
+
+ if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) {
+ pc_tree->cb_search_range = SPLIT_PLANE;
+ }
+ }
+
+ if (pc_tree->partitioning == PARTITION_NONE) {
+ pc_tree->cb_search_range = SEARCH_SAME_PLANE;
+ if (pn_rdc.dist <= sum_rdc.dist)
+ pc_tree->cb_search_range = NONE_PARTITION_PLANE;
+ }
+
+ if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE;
+
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
} // if (do_split)
@@ -2796,6 +2832,24 @@
partition_vert_allowed &= !has_cols;
}
+ if (x->use_cb_search_range && cpi->sf.auto_min_max_partition_size == 0) {
+ if (pc_tree->cb_search_range == SPLIT_PLANE) {
+ partition_none_allowed = 0;
+ partition_horz_allowed = 0;
+ partition_vert_allowed = 0;
+ }
+
+ if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) {
+ do_square_split = 0;
+ }
+
+ if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) {
+ do_square_split = 0;
+ partition_horz_allowed = 0;
+ partition_vert_allowed = 0;
+ }
+ }
+
xd->above_txfm_context =
cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2);
xd->left_txfm_context = xd->left_txfm_context_buffer +
@@ -3657,9 +3711,12 @@
&x->min_partition_size, &x->max_partition_size);
}
+ reset_partition(pc_root, cm->sb_size);
+ x->use_cb_search_range = 0;
if (cpi->sf.two_pass_partition_search &&
mi_row + mi_size_high[cm->sb_size] < cm->mi_rows &&
- mi_col + mi_size_wide[cm->sb_size] < cm->mi_cols) {
+ mi_col + mi_size_wide[cm->sb_size] < cm->mi_cols &&
+ cm->frame_type != KEY_FRAME) {
x->cb_partition_scan = 1;
rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col,
cm->sb_size, &dummy_rdc, INT64_MAX, pc_root,
@@ -3693,6 +3750,7 @@
}
}
+ x->use_cb_search_range = 1;
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
&dummy_rdc, INT64_MAX, pc_root, NULL);
} else {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0764608..ef04729 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9708,7 +9708,7 @@
#if CONFIG_FRAME_MARKER
if (sf->selective_ref_frame) {
- if (sf->selective_ref_frame == 2) {
+ if (sf->selective_ref_frame == 2 || x->cb_partition_scan) {
if (mbmi->ref_frame[0] == ALTREF2_FRAME ||
mbmi->ref_frame[1] == ALTREF2_FRAME)
if (cm->cur_frame->alt2_frame_offset < cm->frame_offset) continue;