Allintra: Prune SPLIT in var based partitioning
The sf vbp_prune_16x16_split_using_min_max_sub_blk_var is
introduced in var based partitioning to choose between SPLIT
or NONE partitioning based on the minimum and maximum sub-block
variances. This sf is currently enabled only for bsize 16X16.
For AVIF still-image encode,
Encode Time BD-Rate Loss(%)
cpu-used Reduction(%) psnr ssim
9 8.436 -0.7805 -0.4677
STATS_CHANGED
Change-Id: I8dc8726d25a15fd4579273851499ffaa4edf2d59
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index de276b1..a671ea7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -536,6 +536,7 @@
sf->rt_sf.nonrd_check_partition_merge_mode = 0;
sf->rt_sf.hybrid_intra_pickmode = 0;
sf->rt_sf.var_part_split_threshold_shift = 9;
+ sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
}
}
@@ -1973,6 +1974,7 @@
rt_sf->sad_based_comp_prune = 0;
rt_sf->tx_size_level_based_on_qstep = 0;
rt_sf->reduce_zeromv_mvres = false;
+ rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
}
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index ba84f30..97a5cf6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1508,6 +1508,16 @@
// Reduce the mv resolution for zero mv if the variance is low.
bool reduce_zeromv_mvres;
+
+ // Avoid the partitioning of a 16x16 block in variance based partitioning
+ // (VBP) by making use of minimum and maximum sub-block variances.
+ // For allintra encode, this speed feature reduces instruction count by 5.39%
+ // for speed 9 on a typical video dataset with coding performance gain
+ // of 1.44%.
+ // For AVIF image encode, this speed feature reduces encode time
+ // by 8.44% for speed 9 on a typical image dataset with coding performance
+ // gain of 0.78%.
+ bool vbp_prune_16x16_split_using_min_max_sub_blk_var;
} REAL_TIME_SPEED_FEATURES;
/*!\endcond */
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index c0eabad..b63ee03 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -39,6 +39,8 @@
PART_EVAL_ALL = 0,
// Force PARTITION_SPLIT
PART_EVAL_ONLY_SPLIT = 1,
+ // Force PARTITION_NONE
+ PART_EVAL_ONLY_NONE = 2
} UENUM1BYTE(PART_EVAL_STATUS);
typedef struct {
@@ -174,6 +176,12 @@
assert(block_height == block_width);
tree_to_node(data, bsize, &vt);
+ if (mi_col + bs_width_check <= tile->mi_col_end &&
+ mi_row + bs_height_check <= tile->mi_row_end &&
+ force_split == PART_EVAL_ONLY_NONE) {
+ set_block_size(cpi, mi_row, mi_col, bsize);
+ return 1;
+ }
if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
// For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
@@ -1100,6 +1108,26 @@
}
}
+// Decides whether to split or merge a 16x16 partition block in variance based
+// partitioning based on the 8x8 sub-block variances.
+static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
+ VP16x16 *var_16x16_info, int64_t threshold16) {
+ int max_8x8_var = 0, min_8x8_var = INT_MAX;
+ for (int k = 0; k < 4; k++) {
+ get_variance(&var_16x16_info->split[k].part_variances.none);
+ int this_8x8_var = var_16x16_info->split[k].part_variances.none.variance;
+ max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
+ min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
+ }
+ // If the difference between maximum and minimum sub-block variances is high,
+ // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
+ // only PARTITION_NONE. The shift factor for threshold16 has been derived
+ // empirically.
+ return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
+ ? PART_EVAL_ONLY_SPLIT
+ : PART_EVAL_ONLY_NONE;
+}
+
int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
ThreadData *td, MACROBLOCK *x, int mi_row,
int mi_col) {
@@ -1278,7 +1306,10 @@
// to split. This also forces a split on the upper levels.
get_variance(&vtemp->part_variances.none);
if (vtemp->part_variances.none.variance > thresholds[3]) {
- force_split[split_index] = PART_EVAL_ONLY_SPLIT;
+ force_split[split_index] =
+ cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
+ ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
+ : PART_EVAL_ONLY_SPLIT;
force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
force_split[0] = PART_EVAL_ONLY_SPLIT;