AV1 RT: Implementing VERT and HORZ var partition
Gives 0.02 BDRate improvement and ~1-2% speedup
Change-Id: Id876f904fdc086f72dc537845ce772d5e9c533fa
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 4ac4b4d..f2e961b 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2008,10 +2008,39 @@
encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
&pc_tree->none, NULL);
break;
- // TODO(kyslov@) Add HORZ and VERT partitions
- case PARTITION_HORZ:
case PARTITION_VERT:
- assert(0 && "Cannot yet handle non-square partition types");
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
+ PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
+ PARTITION_VERT, &pc_tree->vertical[0], NULL);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &dummy_cost,
+ PARTITION_VERT, subsize, &pc_tree->vertical[1], INT64_MAX,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
+ PARTITION_VERT, &pc_tree->vertical[1], NULL);
+ }
+ break;
+ case PARTITION_HORZ:
+ pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
+ PARTITION_HORZ, &pc_tree->horizontal[0], NULL);
+
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &dummy_cost,
+ PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
+ INT64_MAX,
+ sf->use_fast_nonrd_pick_mode ? PICK_MODE_FAST_NONRD
+ : PICK_MODE_NONRD);
+ encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
+ PARTITION_HORZ, &pc_tree->horizontal[1], NULL);
+ }
break;
case PARTITION_SPLIT:
for (int i = 0; i < 4; i++) {
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 437e9fd..a5772dc 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -194,18 +194,14 @@
if (force_split == 1) return 0;
- if (mi_col + block_width > tile->mi_col_end ||
- mi_row + block_height > tile->mi_row_end)
- return 0;
-
// For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
// variance is below threshold, otherwise split will be selected.
// No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_min) {
// Variance already computed to set the force_split.
if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
- if (mi_col + block_width / 2 < cm->mi_cols &&
- mi_row + block_height / 2 < cm->mi_rows &&
+ if (mi_col + block_width <= tile->mi_col_end &&
+ mi_row + block_height <= tile->mi_row_end &&
vt.part_variances->none.variance < threshold) {
set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
return 1;
@@ -221,12 +217,42 @@
return 0;
}
// If variance is low, take the bsize (no split).
- if (mi_col + block_width / 2 < cm->mi_cols &&
- mi_row + block_height / 2 < cm->mi_rows &&
+ if (mi_col + block_width <= tile->mi_col_end &&
+ mi_row + block_height <= tile->mi_row_end &&
vt.part_variances->none.variance < threshold) {
set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
return 1;
}
+ // Check vertical split.
+ if (mi_row + block_height <= tile->mi_row_end &&
+ mi_col + block_width / 2 <= tile->mi_col_end) {
+ BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
+ get_variance(&vt.part_variances->vert[0]);
+ get_variance(&vt.part_variances->vert[1]);
+ if (vt.part_variances->vert[0].variance < threshold &&
+ vt.part_variances->vert[1].variance < threshold &&
+ get_plane_block_size(subsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
+ return 1;
+ }
+ }
+ // Check horizontal split.
+ if (mi_col + block_width <= tile->mi_col_end &&
+ mi_row + block_height / 2 <= tile->mi_row_end) {
+ BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+ get_variance(&vt.part_variances->horz[0]);
+ get_variance(&vt.part_variances->horz[1]);
+ if (vt.part_variances->horz[0].variance < threshold &&
+ vt.part_variances->horz[1].variance < threshold &&
+ get_plane_block_size(subsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
+ return 1;
+ }
+ }
return 0;
}
return 0;
@@ -687,7 +713,8 @@
force_split[0] = 1;
}
- if (!set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
+ if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
+ !set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
thresholds[0], BLOCK_16X16, force_split[0])) {
for (m = 0; m < num_64x64_blocks; ++m) {
const int x64_idx = ((m & 1) << 4);