Fix variance-based partitioning
Fix calculation of min_max and average variance over 16x16 and 32x32 sub
blocks. This gives more reasonable partitioning and speeds up current RT
implementation
Change-Id: Ie029c71b633e5b0d573d654b0ab687e75e5d99a3
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 1000fc9..84c3ac4 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -299,22 +299,15 @@
}
}
+// TODO(kyslov) Bring back threshold adjustment based on content state
static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
int width, int height,
int content_state) {
+ (void)width;
+ (void)height;
+ (void)content_state;
if (speed >= 8) {
- if (width <= 640 && height <= 480)
- return (5 * threshold_base) >> 2;
- else if ((content_state == kLowSadLowSumdiff) ||
- (content_state == kHighSadLowSumdiff) ||
- (content_state == kLowVarHighSumdiff))
- return (5 * threshold_base) >> 2;
- } else if (speed == 7) {
- if ((content_state == kLowSadLowSumdiff) ||
- (content_state == kHighSadLowSumdiff) ||
- (content_state == kLowVarHighSumdiff)) {
- return (5 * threshold_base) >> 2;
- }
+ return (5 * threshold_base) >> 2;
}
return threshold_base;
}
@@ -342,7 +335,8 @@
threshold_base = scale_part_thresh_sumdiff(
threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
- thresholds[1] = threshold_base;
+ thresholds[0] = threshold_base;
+ thresholds[1] = threshold_base << 1;
thresholds[3] = threshold_base << cpi->oxcf.speed;
if (cm->width >= 1280 && cm->height >= 720)
thresholds[3] = thresholds[3] << 1;
@@ -411,22 +405,23 @@
v16x16 *vt2 = NULL;
unsigned char force_split[85];
int avg_32x32;
- int max_var_32x32 = 0;
- int min_var_32x32 = INT_MAX;
+ int max_var_32x32[4];
+ int min_var_32x32[4];
int var_32x32;
int var_64x64;
int min_var_64x64 = INT_MAX;
int max_var_64x64 = 0;
- int avg_16x16[4];
- int maxvar_16x16[4];
- int minvar_16x16[4];
+ int avg_16x16[4][4];
+ int maxvar_16x16[4][4];
+ int minvar_16x16[4][4];
int64_t threshold_4x4avg;
int content_state = 0;
uint8_t *s;
const uint8_t *d;
int sp;
int dp;
- int compute_minmax_variance = 1;
+ // TODO(kyslov) Bring back compute_minmax_variance with content type detection
+ int compute_minmax_variance = 0;
int is_key_frame = frame_is_intra_only(cm);
int pixels_wide = 128, pixels_high = 128;
assert(cm->seq_params.sb_size == BLOCK_64X64 ||
@@ -531,14 +526,16 @@
const int y64_idx = ((m >> 1) << 6);
const int m2 = m << 2;
force_split[m + 1] = 0;
+ max_var_32x32[m] = 0;
+ min_var_32x32[m] = INT_MAX;
for (i = 0; i < 4; i++) {
const int x32_idx = x64_idx + ((i & 1) << 5);
const int y32_idx = y64_idx + ((i >> 1) << 5);
const int i2 = (m2 + i) << 2;
force_split[5 + m2 + i] = 0;
- avg_16x16[i] = 0;
- maxvar_16x16[i] = 0;
- minvar_16x16[i] = INT_MAX;
+ avg_16x16[m][i] = 0;
+ maxvar_16x16[m][i] = 0;
+ minvar_16x16[m][i] = INT_MAX;
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
@@ -551,15 +548,15 @@
pixels_high, is_key_frame);
fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
get_variance(&vt->split[m].split[i].split[j].part_variances.none);
- avg_16x16[i] +=
+ avg_16x16[m][i] +=
vt->split[m].split[i].split[j].part_variances.none.variance;
if (vt->split[m].split[i].split[j].part_variances.none.variance <
- minvar_16x16[i])
- minvar_16x16[i] =
+ minvar_16x16[m][i])
+ minvar_16x16[m][i] =
vt->split[m].split[i].split[j].part_variances.none.variance;
if (vt->split[m].split[i].split[j].part_variances.none.variance >
- maxvar_16x16[i])
- maxvar_16x16[i] =
+ maxvar_16x16[m][i])
+ maxvar_16x16[m][i] =
vt->split[m].split[i].split[j].part_variances.none.variance;
if (vt->split[m].split[i].split[j].part_variances.none.variance >
thresholds[3]) {
@@ -639,21 +636,22 @@
if (!force_split[5 + m2 + i]) {
get_variance(&vt->split[m].split[i].part_variances.none);
var_32x32 = vt->split[m].split[i].part_variances.none.variance;
- max_var_32x32 = AOMMAX(var_32x32, max_var_32x32);
- min_var_32x32 = AOMMIN(var_32x32, min_var_32x32);
+ max_var_32x32[m] = AOMMAX(var_32x32, max_var_32x32[m]);
+ min_var_32x32[m] = AOMMIN(var_32x32, min_var_32x32[m]);
if (vt->split[m].split[i].part_variances.none.variance >
thresholds[2] ||
(!is_key_frame &&
vt->split[m].split[i].part_variances.none.variance >
(thresholds[2] >> 1) &&
vt->split[m].split[i].part_variances.none.variance >
- (avg_16x16[i] >> 1))) {
+ (avg_16x16[m][i] >> 1))) {
force_split[5 + m2 + i] = 1;
force_split[m + 1] = 1;
force_split[0] = 1;
} else if (!is_key_frame && cm->height <= 360 &&
- (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) &&
- maxvar_16x16[i] > thresholds[2]) {
+ (maxvar_16x16[m][i] - minvar_16x16[m][i]) >
+ (thresholds[2] >> 1) &&
+ maxvar_16x16[m][i] > thresholds[2]) {
force_split[5 + m2 + i] = 1;
force_split[m + 1] = 1;
force_split[0] = 1;
@@ -672,8 +670,8 @@
// split. Only checking this for noise level >= medium for now.
if (!is_key_frame &&
- (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) &&
- max_var_32x32 > thresholds[1] >> 1)
+ (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
+ max_var_32x32[m] > thresholds[1] >> 1)
force_split[1 + m] = 1;
}
if (is_small_sb) force_split[0] = 1;