Turn off bgsprite depending on firstpass metrics.

Changes:
- Add block size define.
- Set default to mean filtering.
- Add flag in AV1_COMP for bgsprite.
- Determine to use bgsprite or not based on firstpass metrics in
define_gf_group.
- Add outlier removal for bgsprite mean blending (off by default)

Results:
lowres: -0.009 avg_psnr, -0.029 ovr_psnr, -0.102 ssim

Clips with maximum gain:
- brdige_far_cif: -2.071 avg_psnr, -2.409 ovr_psnr, -1.977 ssim
- deadline_cif: -0.148 avg_psnr, -0.137 ovr_psnr, -0.113 ssim
- brdige_close_cif: +1.741 avg_psnr, -0.258 ovr_psnr, -2.534 ssim

Change-Id: I809406020f7786e49cc80329511e22d25379d7a2
diff --git a/av1/encoder/bgsprite.c b/av1/encoder/bgsprite.c
index bdb58c9..e66736e 100644
--- a/av1/encoder/bgsprite.c
+++ b/av1/encoder/bgsprite.c
@@ -32,7 +32,12 @@
  * 0 = Median
  * 1 = Mean
  */
-#define BGSPRITE_BLENDING_MODE 0
+#define BGSPRITE_BLENDING_MODE 1
+
+// Enable removal of outliers from mean blending mode.
+#if BGSPRITE_BLENDING_MODE == 1
+#define BGSPRITE_MEAN_REMOVE_OUTLIERS 0
+#endif  // BGSPRITE_BLENDING_MODE == 1
 
 /* Interpolation for panorama alignment sampling:
  * 0 = Nearest neighbor
@@ -40,10 +45,17 @@
  */
 #define BGSPRITE_INTERPOLATION 0
 
+// Enable turning off bgsprite from firstpass metrics in define_gf_group.
+#define BGSPRITE_ENABLE_METRICS 1
+
+// Enable foreground/backgrond segmentation and combine with temporal filter.
 #define BGSPRITE_ENABLE_SEGMENTATION 1
+
+// Enable alignment using global motion.
 #define BGSPRITE_ENABLE_GME 0
 
-#define TRANSFORM_MAT_DIM 3
+// Block size for foreground mask.
+#define BGSPRITE_MASK_BLOCK_SIZE 4
 
 typedef struct {
 #if CONFIG_HIGHBITDEPTH
@@ -99,6 +111,8 @@
   }
 }
 
+#define TRANSFORM_MAT_DIM 3
+
 // Do matrix multiplication on params.
 static void multiply_params(double *const m1, double *const m2,
                             double *target) {
@@ -228,7 +242,7 @@
                               const int *const y_min, const int *const y_max,
                               int pano_x_min, int pano_y_min,
                               YuvPixel ***img_stack) {
-  // Re-sample images onto panorama (pre-median filtering).
+  // Re-sample images onto panorama (pre-filtering).
   const int x_offset = -pano_x_min;
   const int y_offset = -pano_y_min;
   const int frame_width = frames[0]->y_width;
@@ -481,6 +495,37 @@
         }
       }
 
+#if BGSPRITE_MEAN_REMOVE_OUTLIERS
+      if (count > 1) {
+        double stdev = 0;
+        double y_mean = (double)y_sum / count;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists) {
+            stdev += pow(y_mean - image_stack[y][x][i].y, 2);
+          }
+        }
+        stdev = sqrt(stdev / count);
+
+        uint32_t inlier_y_sum = 0;
+        uint32_t inlier_u_sum = 0;
+        uint32_t inlier_v_sum = 0;
+        uint32_t inlier_count = 0;
+        for (int i = 0; i < num_frames; ++i) {
+          if (image_stack[y][x][i].exists &&
+              fabs(image_stack[y][x][i].y - y_mean) <= 1.5 * stdev) {
+            inlier_y_sum += image_stack[y][x][i].y;
+            inlier_u_sum += image_stack[y][x][i].u;
+            inlier_v_sum += image_stack[y][x][i].v;
+            ++inlier_count;
+          }
+        }
+        count = inlier_count;
+        y_sum = inlier_y_sum;
+        u_sum = inlier_u_sum;
+        v_sum = inlier_v_sum;
+      }
+#endif  // BGSPRITE_MEAN_REMOVE_OUTLIERS
+
       if (count != 0) {
         blended_img[y][x].exists = 1;
 #if CONFIG_HIGHBITDEPTH
@@ -833,18 +878,19 @@
     blended_img[i] = aom_malloc(width * sizeof(**blended_img));
   }
 
-  const int block_2_size = 4;
-  const int block_2_height = (height / block_2_size + 1);
-  const int block_2_width = (width / block_2_size + 1);
+  const int block_2_height = (height / BGSPRITE_MASK_BLOCK_SIZE) +
+                             (height % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
+  const int block_2_width = (width / BGSPRITE_MASK_BLOCK_SIZE) +
+                            (width % BGSPRITE_MASK_BLOCK_SIZE != 0 ? 1 : 0);
 
   for (int block_y = 0; block_y < block_2_height; ++block_y) {
     for (int block_x = 0; block_x < block_2_width; ++block_x) {
       int count = 0;
       int total = 0;
-      for (int sub_y = 0; sub_y < block_2_size; ++sub_y) {
-        for (int sub_x = 0; sub_x < block_2_size; ++sub_x) {
-          const int img_y = block_y * block_2_size + sub_y;
-          const int img_x = block_x * block_2_size + sub_x;
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int img_y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int img_x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
           const int mask_y = (y_offset + img_y) / block_size;
           const int mask_x = (x_offset + img_x) / block_size;
 
@@ -859,10 +905,10 @@
 
       const double threshold = 0.30;
       const int amount = (int)(threshold * total);
-      for (int sub_y = 0; sub_y < block_2_size; ++sub_y) {
-        for (int sub_x = 0; sub_x < block_2_size; ++sub_x) {
-          const int y = block_y * block_2_size + sub_y;
-          const int x = block_x * block_2_size + sub_x;
+      for (int sub_y = 0; sub_y < BGSPRITE_MASK_BLOCK_SIZE; ++sub_y) {
+        for (int sub_x = 0; sub_x < BGSPRITE_MASK_BLOCK_SIZE; ++sub_x) {
+          const int y = block_y * BGSPRITE_MASK_BLOCK_SIZE + sub_y;
+          const int x = block_x * BGSPRITE_MASK_BLOCK_SIZE + sub_x;
           if (y < height && x < width) {
             blended_img[y][x].exists = 1;
             const int ychannel_idx = y * temporal_arf->y_stride + x;
@@ -964,7 +1010,7 @@
 #if BGSPRITE_BLENDING_MODE == 1
   blend_mean(width, height, num_frames, (const YuvPixel ***)pano_stack,
              blended_img, panorama->flags & YV12_FLAG_HIGHBITDEPTH);
-#else
+#else   // BGSPRITE_BLENDING_MODE != 1
   blend_median(width, height, num_frames, (const YuvPixel ***)pano_stack,
                blended_img);
 #endif  // BGSPRITE_BLENDING_MODE == 1
@@ -1004,8 +1050,8 @@
 
   // Block size constants for gaussian model.
   const int N_1 = 2;
-  const int y_block_height = (height / N_1) + 1;
-  const int x_block_width = (width / N_1) + 1;
+  const int y_block_height = (height / N_1) + (height % N_1 != 0 ? 1 : 0);
+  const int x_block_width = (width / N_1) + (height % N_1 != 0 ? 1 : 0);
   YuvPixelGaussian **gauss = aom_malloc(y_block_height * sizeof(*gauss));
   for (int i = 0; i < y_block_height; ++i) {
     gauss[i] = aom_calloc(x_block_width, sizeof(**gauss));
@@ -1051,7 +1097,7 @@
   }
   aom_free(gauss);
   aom_free(mask);
-#else
+#else   // !BGSPRITE_ENABLE_SEGMENTATION
   av1_temporal_filter(cpi, &bgsprite, panorama, distance);
 #endif  // BGSPRITE_ENABLE_SEGMENTATION
 
@@ -1068,6 +1114,13 @@
 }
 
 int av1_background_sprite(AV1_COMP *cpi, int distance) {
+#if BGSPRITE_ENABLE_METRICS
+  // Do temporal filter if firstpass stats disable bgsprite.
+  if (!cpi->bgsprite_allowed) {
+    return 1;
+  }
+#endif  // BGSPRITE_ENABLE_METRICS
+
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
   static const double identity_params[MAX_PARAMDIM - 1] = {
     0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
@@ -1193,3 +1246,12 @@
 
   return 0;
 }
+
+#undef _POSIX_C_SOURCE
+#undef BGSPRITE_BLENDING_MODE
+#undef BGSPRITE_INTERPOLATION
+#undef BGSPRITE_ENABLE_METRICS
+#undef BGSPRITE_ENABLE_SEGMENTATION
+#undef BGSPRITE_ENABLE_GME
+#undef BGSPRITE_MASK_BLOCK_SIZE
+#undef TRANSFORM_MAT_DIM
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index e081012..8239f30 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -599,6 +599,10 @@
   int extra_arf_allowed;
   int bwd_ref_allowed;
 #endif  // CONFIG_EXT_REFS
+
+#if CONFIG_BGSPRITE
+  int bgsprite_allowed;
+#endif  // CONFIG_BGSPRITE
 } AV1_COMP;
 
 void av1_initialize_enc(void);
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 082c906..ae55833 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -30,8 +30,8 @@
 #if CONFIG_LV_MAP
 #include "av1/common/txb_common.h"
 #endif
-#include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/aq_variance.h"
+#include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/block.h"
 #include "av1/encoder/encodeframe.h"
 #include "av1/encoder/encodemb.h"
@@ -2157,11 +2157,15 @@
     }
   }
 
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
   double avg_sr_coded_error = 0;
   double avg_raw_err_stdev = 0;
   int non_zero_stdev_count = 0;
-#endif  // CONFIG_EXT_REFS
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+  double avg_pcnt_second_ref = 0;
+  int non_zero_pcnt_second_ref_count = 0;
+#endif
 
   i = 0;
   while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
@@ -2186,14 +2190,20 @@
     accumulate_frame_motion_stats(
         &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
         &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
     // sum up the metric values of current gf group
     avg_sr_coded_error += next_frame.sr_coded_error;
     if (fabs(next_frame.raw_error_stdev) > 0.000001) {
       non_zero_stdev_count++;
       avg_raw_err_stdev += next_frame.raw_error_stdev;
     }
-#endif  // CONFIG_EXT_REFS
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+#if CONFIG_BGSPRITE
+    if (this_frame->pcnt_second_ref) {
+      avg_pcnt_second_ref += this_frame->pcnt_second_ref;
+    }
+    non_zero_pcnt_second_ref_count++;
+#endif  // CONFIG_BGSPRITE
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
@@ -2255,6 +2265,13 @@
   // Was the group length constrained by the requirement for a new KF?
   rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
 
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
+  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+                                                             : cpi->common.MBs;
+  assert(num_mbs > 0);
+  if (i) avg_sr_coded_error /= i;
+#endif  // CONFIG_EXT_REFS || CONFIG_BGSPRITE
+
   // Should we use the alternate reference frame.
   if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
       (i >= rc->min_gf_interval)) {
@@ -2269,6 +2286,17 @@
          (zero_motion_accumulator < 0.995))
             ? 1
             : 0;
+#if CONFIG_BGSPRITE
+    if (non_zero_pcnt_second_ref_count) {
+      avg_pcnt_second_ref /= non_zero_pcnt_second_ref_count;
+    }
+
+    cpi->bgsprite_allowed = 1;
+    if (abs_mv_in_out_accumulator > 0.30 || decay_accumulator < 0.90 ||
+        avg_sr_coded_error / num_mbs < 20 || avg_pcnt_second_ref < 0.30) {
+      cpi->bgsprite_allowed = 0;
+    }
+#endif  // CONFIG_BGSPRITE
   } else {
     rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST);
     rc->source_alt_ref_pending = 0;
@@ -2277,9 +2305,6 @@
   // Set the interval until the next gf.
   rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
 #if CONFIG_EXT_REFS
-  const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
-                                                             : cpi->common.MBs;
-  if (i) avg_sr_coded_error /= i;
   if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count;
 
   // Disable extra altrefs and backward refs for "still" gf group:
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
index 1c077f4..b371228 100644
--- a/av1/encoder/firstpass.h
+++ b/av1/encoder/firstpass.h
@@ -82,8 +82,8 @@
   double new_mv_count;
   double duration;
   double count;
-#if CONFIG_EXT_REFS
-  // Standard deviation for (0,0) motion prediction error
+#if CONFIG_EXT_REFS || CONFIG_BGSPRITE
+  // standard deviation for (0, 0) motion prediction error
   double raw_error_stdev;
 #endif  // CONFIG_EXT_REFS
 } FIRSTPASS_STATS;