Turn off bgsprite depending on firstpass metrics.
Changes:
- Add block size define.
- Set default to mean filtering.
- Add flag in AV1_COMP for bgsprite.
- Determine to use bgsprite or not based on firstpass metrics in
define_gf_group.
- Add outlier removal for bgsprite mean blending (off by default)
Results:
lowres: -0.009 avg_psnr, -0.029 ovr_psnr, -0.102 ssim
Clips with maximum gain:
- brdige_far_cif: -2.071 avg_psnr, -2.409 ovr_psnr, -1.977 ssim
- deadline_cif: -0.148 avg_psnr, -0.137 ovr_psnr, -0.113 ssim
- brdige_close_cif: +1.741 avg_psnr, -0.258 ovr_psnr, -2.534 ssim
Change-Id: I809406020f7786e49cc80329511e22d25379d7a2
diff --git a/av1/encoder/bgsprite.c b/av1/encoder/bgsprite.c
index bdb58c9..e66736e 100644
--- a/av1/encoder/bgsprite.c
+++ b/av1/encoder/bgsprite.c
@@ -32,7 +32,12 @@
* 0 = Median
* 1 = Mean
*/
-#define BGSPRITE_BLENDING_MODE 0
+#define BGSPRITE_BLENDING_MODE 1
+
+// Enable removal of outliers from mean blending mode.
+#if BGSPRITE_BLENDING_MODE == 1
+#define BGSPRITE_MEAN_REMOVE_OUTLIERS 0
+#endif // BGSPRITE_BLENDING_MODE == 1
/* Interpolation for panorama alignment sampling:
* 0 = Nearest neighbor
@@ -40,10 +45,17 @@
*/
#define BGSPRITE_INTERPOLATION 0
+// Enable turning off bgsprite from firstpass metrics in define_gf_group.
+#define BGSPRITE_ENABLE_METRICS 1
+
+// Enable foreground/backgrond segmentation and combine with temporal filter.
#define BGSPRITE_ENABLE_SEGMENTATION 1
+
+// Enable alignment using global motion.
#define BGSPRITE_ENABLE_GME 0
-#define TRANSFORM_MAT_DIM 3
+// Block size for foreground mask.
+#define BGSPRITE_MASK_BLOCK_SIZE 4
typedef struct {
#if CONFIG_HIGHBITDEPTH
@@ -99,6 +111,8 @@
}
}
+#define TRANSFORM_MAT_DIM 3
+
// Do matrix multiplication on params.
static void multiply_params(double *const m1, double *const m2,
double *target) {
@@ -228,7 +242,7 @@
const int *const y_min, const int *const y_max,
int pano_x_min, int pano_y_min,
YuvPixel ***img_stack) {
- // Re-sample images onto panorama (pre-median filtering).
+ // Re-sample images onto panorama (pre-filtering).
const int x_offset = -pano_x_min;
const int y_offset = -pano_y_min;
const int frame_width = frames[0]->y_width;
@@ -481,6 +495,37 @@
}
}
+#if BGSPRITE_MEAN_REMOVE_OUTLIERS
+ if (count > 1) {
+ double stdev = 0;
+ double y_mean = (double)y_sum / count;
+ for (int i = 0; i < num_frames; ++i) {
+ if (image_stack[y][x][i].exists) {
+ stdev += pow(y_mean - image_stack[y][x][i].y, 2);
+ }
+ }
+ stdev = sqrt(stdev / count);
+
+ uint32_t inlier_y_sum = 0;
+ uint32_t inlier_u_sum = 0;
+ uint32_t inlier_v_sum = 0;
+ uint32_t inlier_count = 0;
+ for (int i = 0; i < num_frames; ++i) {
+ if (image_stack[y][x][i].exists &&
+ fabs(image_stack[y][x][i].y - y_mean) <= 1.5 * stdev) {
+ inlier_y_sum += image_stack[y][x][i].y;
+ inlier_u_sum += image_stack[y][x][i].u;
+ inlier_v_sum += image_stack[y][x][i].v;
+ ++inlier_count;
+ }
+ }
+ count = inlier_count;
+ y_sum = inlier_y_sum;
+ u_sum = inlier_u_sum;
+ v_sum = inlier_v_sum;
+ }
+#endif // BGSPRITE_MEAN_REMOVE_OUTLIERS
+
if (count != 0) {
blended_img[y][x].exists = 1;
#if CONFIG_HIGHBITDEPTH
@@ -833,18 +878,19 @@
blended_img[i] = aom_malloc(width * sizeof(**blended_img));
}
- const int block_2_size = 4;
- const int block_2_height = (height / block_2_size + 1);
- const int block_2_width = (width / block_2_size + 1);
+ const int block_2_height = (height / BGSPRITE_MASK_BLOCK_SIZE) +
+ (height % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+ const int block_2_width = (width / BGSPRITE_MASK_BLOCK_SIZE) +
+ (width % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
for (int block_y = 0; block_y < block_2_height; ++block_y) {
for (int block_x = 0; block_x < block_2_width; ++block_x) {
int count = 0;
int total = 0;
- for (int sub_y = 0; sub_y < block_2_size; ++sub_y) {
- for (int sub_x = 0; sub_x < block_2_size; ++sub_x) {
- const int img_y = block_y * block_2_size + sub_y;
- const int img_x = block_x * block_2_size + sub_x;
+ for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+ for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+ const int img_y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+ const int img_x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
const int mask_y = (y_offset + img_y) / block_size;
const int mask_x = (x_offset + img_x) / block_size;
@@ -859,10 +905,10 @@
const double threshold = 0.30;
const int amount = (int)(threshold * total);
- for (int sub_y = 0; sub_y < block_2_size; ++sub_y) {
- for (int sub_x = 0; sub_x < block_2_size; ++sub_x) {
- const int y = block_y * block_2_size + sub_y;
- const int x = block_x * block_2_size + sub_x;
+ for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+ for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+ const int y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+ const int x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
if (y < height && x < width) {
blended_img[y][x].exists = 1;
const int ychannel_idx = y * temporal_arf->y_stride + x;
@@ -964,7 +1010,7 @@
#if BGSPRITE_BLENDING_MODE == 1
blend_mean(width, height, num_frames, (const YuvPixel ***)pano_stack,
blended_img, panorama->flags & YV12_FLAG_HIGHBITDEPTH);
-#else
+#else // BGSPRITE_BLENDING_MODE != 1
blend_median(width, height, num_frames, (const YuvPixel ***)pano_stack,
blended_img);
#endif // BGSPRITE_BLENDING_MODE == 1
@@ -1004,8 +1050,8 @@
// Block size constants for gaussian model.
const int N_1 = 2;
- const int y_block_height = (height / N_1) + 1;
- const int x_block_width = (width / N_1) + 1;
+ const int y_block_height = (height / N_1) + (height % N_1 != 0 ? 1 : 0);
+ const int x_block_width = (width / N_1) + (height % N_1 != 0 ? 1 : 0);
YuvPixelGaussian **gauss = aom_malloc(y_block_height * sizeof(*gauss));
for (int i = 0; i < y_block_height; ++i) {
gauss[i] = aom_calloc(x_block_width, sizeof(**gauss));
@@ -1051,7 +1097,7 @@
}
aom_free(gauss);
aom_free(mask);
-#else
+#else // !BGSPRITE_ENABLE_SEGMENTATION
av1_temporal_filter(cpi, &bgsprite, panorama, distance);
#endif // BGSPRITE_ENABLE_SEGMENTATION
@@ -1068,6 +1114,13 @@
}
int av1_background_sprite(AV1_COMP *cpi, int distance) {
+#if BGSPRITE_ENABLE_METRICS
+ // Do temporal filter if firstpass stats disable bgsprite.
+ if (!cpi->bgsprite_allowed) {
+ return 1;
+ }
+#endif // BGSPRITE_ENABLE_METRICS
+
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
static const double identity_params[MAX_PARAMDIM - 1] = {
0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
@@ -1193,3 +1246,12 @@
return 0;
}
+
+#undef _POSIX_C_SOURCE
+#undef BGSPRITE_BLENDING_MODE
+#undef BGSPRITE_INTERPOLATION
+#undef BGSPRITE_ENABLE_METRICS
+#undef BGSPRITE_ENABLE_SEGMENTATION
+#undef BGSPRITE_ENABLE_GME
+#undef BGSPRITE_MASK_BLOCK_SIZE
+#undef TRANSFORM_MAT_DIM
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index e081012..8239f30 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -599,6 +599,10 @@
int extra_arf_allowed;
int bwd_ref_allowed;
#endif // CONFIG_EXT_REFS
+
+#if CONFIG_BGSPRITE
+ int bgsprite_allowed;
+#endif // CONFIG_BGSPRITE
} AV1_COMP;
void av1_initialize_enc(void);
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 082c906..ae55833 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -30,8 +30,8 @@
#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
#endif
-#include "av1/encoder/av1_quantize.h"
#include "av1/encoder/aq_variance.h"
+#include "av1/encoder/av1_quantize.h"
#include "av1/encoder/block.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encodemb.h"
@@ -2157,11 +2157,15 @@
}
}
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
double avg_sr_coded_error = 0;
double avg_raw_err_stdev = 0;
int non_zero_stdev_count = 0;
-#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+ double avg_pcnt_second_ref = 0;
+ int non_zero_pcnt_second_ref_count = 0;
+#endif
i = 0;
while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
@@ -2186,14 +2190,20 @@
accumulate_frame_motion_stats(
&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
// sum up the metric values of current gf group
avg_sr_coded_error += next_frame.sr_coded_error;
if (fabs(next_frame.raw_error_stdev) > 0.000001) {
non_zero_stdev_count++;
avg_raw_err_stdev += next_frame.raw_error_stdev;
}
-#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+ if (this_frame->pcnt_second_ref) {
+ avg_pcnt_second_ref += this_frame->pcnt_second_ref;
+ }
+ non_zero_pcnt_second_ref_count++;
+#endif // CONFIG_BGSPRITE
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
@@ -2255,6 +2265,13 @@
// Was the group length constrained by the requirement for a new KF?
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+ : cpi->common.MBs;
+ assert(num_mbs > 0);
+ if (i) avg_sr_coded_error /= i;
+#endif // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+
// Should we use the alternate reference frame.
if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
(i >= rc->min_gf_interval)) {
@@ -2269,6 +2286,17 @@
(zero_motion_accumulator < 0.995))
? 1
: 0;
+#if CONFIG_BGSPRITE
+ if (non_zero_pcnt_second_ref_count) {
+ avg_pcnt_second_ref /= non_zero_pcnt_second_ref_count;
+ }
+
+ cpi->bgsprite_allowed = 1;
+ if (abs_mv_in_out_accumulator > 0.30 || decay_accumulator < 0.90 ||
+ avg_sr_coded_error / num_mbs < 20 || avg_pcnt_second_ref < 0.30) {
+ cpi->bgsprite_allowed = 0;
+ }
+#endif // CONFIG_BGSPRITE
} else {
rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST);
rc->source_alt_ref_pending = 0;
@@ -2277,9 +2305,6 @@
// Set the interval until the next gf.
rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
#if CONFIG_EXT_REFS
- const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
- : cpi->common.MBs;
- if (i) avg_sr_coded_error /= i;
if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
// Disable extra altrefs and backward refs for "still" gf group:
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index 1c077f4..b371228 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -82,8 +82,8 @@
double new_mv_count;
double duration;
double count;
-#if CONFIG_EXT_REFS
- // Standard deviation for (0,0) motion prediction error
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
+ // standard deviation for (0, 0) motion prediction error
double raw_error_stdev;
#endif // CONFIG_EXT_REFS
} FIRSTPASS_STATS;