AV1 RT: Implement nonRD partition merge
Change-Id: Id1be0aa99d967c598d97bcd5bfec1633f860e352
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 05e6055..d4961d4 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2120,6 +2120,24 @@
x->rdmult = orig_rdmult;
}
+static int is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const int bs = mi_size_wide[bsize];
+ const int hbs = bs / 2;
+ assert(bsize >= BLOCK_8X8);
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
+ for (int i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
+ return 0;
+ if (get_partition(cm, mi_row + y_idx, mi_col + x_idx, subsize) !=
+ PARTITION_NONE)
+ return 0;
+ }
+ return 1;
+}
+
static AOM_INLINE void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data,
MB_MODE_INFO **mib, TOKENEXTRA **tp,
@@ -2139,6 +2157,10 @@
: PARTITION_NONE;
const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
assert(subsize <= BLOCK_LARGEST);
+ const int pl = (bsize >= BLOCK_8X8)
+ ? partition_plane_context(xd, mi_row, mi_col, bsize)
+ : 0;
+
RD_STATS dummy_cost;
av1_invalid_rd_stats(&dummy_cost);
RD_STATS invalid_rd;
@@ -2200,15 +2222,92 @@
}
break;
case PARTITION_SPLIT:
- for (int i = 0; i < 4; i++) {
- int x_idx = (i & 1) * hbs;
- int y_idx = (i >> 1) * hbs;
- int jj = i >> 1, ii = i & 0x01;
- if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
- continue;
- nonrd_use_partition(
- cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
- mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
+ if (cpi->sf.nonrd_merge_partition &&
+ is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
+ !frame_is_intra_only(cm)) {
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+ RD_STATS split_rdc, none_rdc;
+ av1_init_rd_stats(&split_rdc);
+ av1_invalid_rd_stats(&none_rdc);
+ save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
+ xd->above_txfm_context =
+ cm->above_txfm_context[tile_info->tile_row] + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ pc_tree->partitioning = PARTITION_NONE;
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
+ PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
+ none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
+
+ for (int i = 0; i < 4; i++) {
+ RD_STATS block_rdc;
+ av1_invalid_rd_stats(&block_rdc);
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
+ xd->above_txfm_context =
+ cm->above_txfm_context[tile_info->tile_row] + mi_col + x_idx;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
+ pc_tree->split[i]->partitioning = PARTITION_NONE;
+ pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
+ &block_rdc, PARTITION_NONE, subsize,
+ &pc_tree->split[i]->none, invalid_rd,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ split_rdc.rate += block_rdc.rate;
+ split_rdc.dist += block_rdc.dist;
+
+ encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
+ subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+ }
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
+ split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
+ split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
+ if (none_rdc.rdcost < split_rdc.rdcost) {
+ set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
+ mi_col, bsize);
+ mib[0]->sb_type = bsize;
+ pc_tree->partitioning = PARTITION_NONE;
+ encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
+ &pc_tree->none, NULL);
+ } else {
+ set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
+ mi_col, bsize);
+ mib[0]->sb_type = subsize;
+ pc_tree->partitioning = PARTITION_SPLIT;
+ for (int i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
+
+ set_offsets_without_segment_id(cpi, &tile_data->tile_info, x,
+ mi_row + y_idx, mi_col + x_idx,
+ subsize);
+ encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
+ subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+ }
+ }
+ } else {
+ for (int i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ int jj = i >> 1, ii = i & 0x01;
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
+ nonrd_use_partition(
+ cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
+ mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
+ }
}
break;
case PARTITION_VERT_A:
@@ -2220,8 +2319,6 @@
assert(0 && "Cannot handle extended partition types");
default: assert(0); break;
}
- if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
- update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
#if !CONFIG_REALTIME_ONLY
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 244a455..3c58d50 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -930,6 +930,7 @@
ctx->best_mode_index = mode_index;
#endif // CONFIG_INTERNAL_STATS
ctx->mic = *xd->mi[0];
+ ctx->skippable = x->skip;
ctx->mbmi_ext = *x->mbmi_ext;
ctx->comp_pred_diff = 0;
ctx->hybrid_pred_diff = 0;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c690a8f..5eb8993 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -683,6 +683,10 @@
}
if (speed >= 8) {
sf->use_fast_nonrd_pick_mode = 1;
+ // Consider moving nonrd_merge_partition to speed 7
+#if 0
+ sf->nonrd_merge_partition = 1;
+#endif
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->tx_size_search_level = 1;
sf->estimate_motion_for_var_based_partition = 0;
@@ -911,6 +915,7 @@
sf->disable_smooth_intra = 0;
sf->perform_best_rd_based_gating_for_chroma = 0;
sf->prune_obmc_prob_thresh = 0;
+ sf->nonrd_merge_partition = 0;
if (oxcf->mode == GOOD)
set_good_speed_features_framesize_independent(cpi, sf, speed);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 96eaf5f..1345ebf 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -808,6 +808,8 @@
// Number of best inter modes to search transform. INT_MAX - search all.
int num_inter_modes_for_tx_search;
+
+ int nonrd_merge_partition;
} SPEED_FEATURES;
struct AV1_COMP;