Use segmented error metric for global motion

This computes the error metric over blocks that contain inliers
from the motion model. If an insufficient number of blocks contain
inliers, the old error metric is used.

This currently gives 0.15% BDRATE gain and 1% speedup on
cam_lowres, speed 2. Parameters still need to be tuned for
speed 1 and 0.

STATS_CHANGED

Change-Id: I44e60d9d17cc0eb44f560e699167848afb298f7b
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index aea9c32..0439710 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -20,8 +20,6 @@
 #include "av1/common/warped_motion.h"
 #include "av1/common/scale.h"
 
-#define WARP_ERROR_BLOCK 32
-
 // For warping, we really use a 6-tap filter, but we do blocks of 8 pixels
 // at a time. The zoom/rotation/shear in the model are applied to the
 // "fractional" position of each pixel, which therefore varies within
@@ -484,11 +482,38 @@
   return sum_error;
 }
 
+static int64_t highbd_segmented_frame_error(
+    const uint16_t *const ref, int stride, const uint16_t *const dst,
+    int p_width, int p_height, int p_stride, int bd, uint8_t *segment_map,
+    int segment_map_stride) {
+  int patch_w, patch_h;
+  const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+  const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+  int64_t sum_error = 0;
+  for (int i = 0; i < p_height; i += WARP_ERROR_BLOCK) {
+    for (int j = 0; j < p_width; j += WARP_ERROR_BLOCK) {
+      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
+      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
+      // Only compute the error if this block contains inliers from the motion
+      // model
+      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
+
+      // avoid computing error into the frame padding
+      patch_w = AOMMIN(error_bsize_w, p_width - j);
+      patch_h = AOMMIN(error_bsize_h, p_height - i);
+      sum_error += av1_calc_highbd_frame_error(ref + j + i * stride, stride,
+                                               dst + j + i * p_stride, patch_w,
+                                               patch_h, p_stride, bd);
+    }
+  }
+  return sum_error;
+}
+
 static int64_t highbd_warp_error(
     WarpedMotionParams *wm, const uint8_t *const ref8, int width, int height,
     int stride, const uint8_t *const dst8, int p_col, int p_row, int p_width,
     int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd,
-    int64_t best_error) {
+    int64_t best_error, uint8_t *segment_map, int segment_map_stride) {
   int64_t gm_sumerr = 0;
   const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
   const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
@@ -498,6 +523,11 @@
   conv_params.use_dist_wtd_comp_avg = 0;
   for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
     for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
+      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
+      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
+      // Only compute the error if this block contains inliers from the motion
+      // model
+      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
       // avoid warping extra 8x8 blocks in the padded region of the frame
       // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
       const int warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
@@ -761,22 +791,56 @@
   return sum_error;
 }
 
+static int64_t segmented_frame_error(const uint8_t *const ref, int stride,
+                                     const uint8_t *const dst, int p_width,
+                                     int p_height, int p_stride,
+                                     uint8_t *segment_map,
+                                     int segment_map_stride) {
+  int patch_w, patch_h;
+  const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+  const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+  int64_t sum_error = 0;
+  for (int i = 0; i < p_height; i += WARP_ERROR_BLOCK) {
+    for (int j = 0; j < p_width; j += WARP_ERROR_BLOCK) {
+      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
+      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
+      // Only compute the error if this block contains inliers from the motion
+      // model
+      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
+
+      // avoid computing error into the frame padding
+      patch_w = AOMMIN(error_bsize_w, p_width - j);
+      patch_h = AOMMIN(error_bsize_h, p_height - i);
+      sum_error += av1_calc_frame_error(ref + j + i * stride, stride,
+                                        dst + j + i * p_stride, patch_w,
+                                        patch_h, p_stride);
+    }
+  }
+  return sum_error;
+}
+
 static int64_t warp_error(WarpedMotionParams *wm, const uint8_t *const ref,
                           int width, int height, int stride,
                           const uint8_t *const dst, int p_col, int p_row,
                           int p_width, int p_height, int p_stride,
                           int subsampling_x, int subsampling_y,
-                          int64_t best_error) {
+                          int64_t best_error, uint8_t *segment_map,
+                          int segment_map_stride) {
   int64_t gm_sumerr = 0;
   int warp_w, warp_h;
-  int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
-  int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
+  const int error_bsize_w = AOMMIN(p_width, WARP_ERROR_BLOCK);
+  const int error_bsize_h = AOMMIN(p_height, WARP_ERROR_BLOCK);
   uint8_t tmp[WARP_ERROR_BLOCK * WARP_ERROR_BLOCK];
   ConvolveParams conv_params = get_conv_params(0, 0, 8);
   conv_params.use_dist_wtd_comp_avg = 0;
 
   for (int i = p_row; i < p_row + p_height; i += WARP_ERROR_BLOCK) {
     for (int j = p_col; j < p_col + p_width; j += WARP_ERROR_BLOCK) {
+      int seg_x = j >> WARP_ERROR_BLOCK_LOG;
+      int seg_y = i >> WARP_ERROR_BLOCK_LOG;
+      // Only compute the error if this block contains inliers from the motion
+      // model
+      if (!segment_map[seg_y * segment_map_stride + seg_x]) continue;
       // avoid warping extra 8x8 blocks in the padded region of the frame
       // when p_width and p_height are not multiples of WARP_ERROR_BLOCK
       warp_w = AOMMIN(error_bsize_w, p_col + p_width - j);
@@ -803,20 +867,36 @@
   return av1_calc_frame_error(ref, stride, dst, p_width, p_height, p_stride);
 }
 
+int64_t av1_segmented_frame_error(int use_hbd, int bd, const uint8_t *ref,
+                                  int stride, uint8_t *dst, int p_width,
+                                  int p_height, int p_stride,
+                                  uint8_t *segment_map,
+                                  int segment_map_stride) {
+  if (use_hbd) {
+    return highbd_segmented_frame_error(
+        CONVERT_TO_SHORTPTR(ref), stride, CONVERT_TO_SHORTPTR(dst), p_width,
+        p_height, p_stride, bd, segment_map, segment_map_stride);
+  }
+  return segmented_frame_error(ref, stride, dst, p_width, p_height, p_stride,
+                               segment_map, segment_map_stride);
+}
+
 int64_t av1_warp_error(WarpedMotionParams *wm, int use_hbd, int bd,
                        const uint8_t *ref, int width, int height, int stride,
                        uint8_t *dst, int p_col, int p_row, int p_width,
                        int p_height, int p_stride, int subsampling_x,
-                       int subsampling_y, int64_t best_error) {
+                       int subsampling_y, int64_t best_error,
+                       uint8_t *segment_map, int segment_map_stride) {
   if (wm->wmtype <= AFFINE)
     if (!av1_get_shear_params(wm)) return 1;
   if (use_hbd)
     return highbd_warp_error(wm, ref, width, height, stride, dst, p_col, p_row,
                              p_width, p_height, p_stride, subsampling_x,
-                             subsampling_y, bd, best_error);
+                             subsampling_y, bd, best_error, segment_map,
+                             segment_map_stride);
   return warp_error(wm, ref, width, height, stride, dst, p_col, p_row, p_width,
                     p_height, p_stride, subsampling_x, subsampling_y,
-                    best_error);
+                    best_error, segment_map, segment_map_stride);
 }
 
 void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index d05d96d..3b9c4a6 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -31,6 +31,8 @@
 #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
 #define WARPED_MOTION_DEBUG 0
 #define DEFAULT_WMTYPE AFFINE
+#define WARP_ERROR_BLOCK_LOG 5
+#define WARP_ERROR_BLOCK (1 << WARP_ERROR_BLOCK_LOG)
 
 extern const int16_t av1_warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
 
@@ -151,13 +153,19 @@
                        const uint8_t *ref, int width, int height, int stride,
                        uint8_t *dst, int p_col, int p_row, int p_width,
                        int p_height, int p_stride, int subsampling_x,
-                       int subsampling_y, int64_t best_error);
+                       int subsampling_y, int64_t best_error,
+                       uint8_t *segment_map, int segment_map_stride);
 
 // Returns the error between the frame described by 'ref' and the frame
 // described by 'dst'.
 int64_t av1_frame_error(int use_hbd, int bd, const uint8_t *ref, int stride,
                         uint8_t *dst, int p_width, int p_height, int p_stride);
 
+int64_t av1_segmented_frame_error(int use_hbd, int bd, const uint8_t *ref,
+                                  int stride, uint8_t *dst, int p_width,
+                                  int p_height, int p_stride,
+                                  uint8_t *segment_map, int segment_map_stride);
+
 void av1_warp_plane(WarpedMotionParams *wm, int use_hbd, int bd,
                     const uint8_t *ref, int width, int height, int stride,
                     uint8_t *pred, int p_col, int p_row, int p_width,
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index b9a430e..b988725 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5065,6 +5065,7 @@
     int frame;
     MotionModel params_by_motion[RANSAC_NUM_MOTIONS];
     for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
+      memset(&params_by_motion[m], 0, sizeof(params_by_motion[m]));
       params_by_motion[m].inliers =
           aom_malloc(sizeof(*(params_by_motion[m].inliers)) * 2 * MAX_CORNERS);
     }
@@ -5087,6 +5088,18 @@
       frm_buffer =
           av1_downconvert_frame(cpi->source, cpi->common.seq_params.bit_depth);
     }
+    const int segment_map_w =
+        (cpi->source->y_width + (WARP_ERROR_BLOCK >> 1)) >>
+        WARP_ERROR_BLOCK_LOG;
+    const int segment_map_h =
+        (cpi->source->y_height + (WARP_ERROR_BLOCK >> 1)) >>
+        WARP_ERROR_BLOCK_LOG;
+
+    uint8_t *segment_map =
+        aom_malloc(sizeof(*segment_map) * segment_map_w * segment_map_h);
+    memset(segment_map, 0,
+           sizeof(*segment_map) * segment_map_w * segment_map_h);
+
     for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
       ref_buf[frame] = NULL;
       RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
@@ -5115,12 +5128,6 @@
               cpi->source->y_stride, frm_corners, MAX_CORNERS);
         }
         TransformationType model;
-        const int64_t ref_frame_error = av1_frame_error(
-            is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
-            ref_buf[frame]->y_stride, cpi->source->y_buffer,
-            cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride);
-
-        if (ref_frame_error == 0) continue;
 
         aom_clear_system_state();
 
@@ -5157,13 +5164,17 @@
             av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
 
             if (tmp_wm_params.wmtype != IDENTITY) {
+              av1_compute_feature_segmentation_map(
+                  segment_map, segment_map_w, segment_map_h,
+                  params_by_motion[i].inliers, params_by_motion[i].num_inliers);
+
               const int64_t warp_error = av1_refine_integerized_param(
                   &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd),
                   xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
                   ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
                   cpi->source->y_buffer, cpi->source->y_width,
                   cpi->source->y_height, cpi->source->y_stride, 5,
-                  best_warp_error);
+                  best_warp_error, segment_map, segment_map_w);
               if (warp_error < best_warp_error) {
                 best_warp_error = warp_error;
                 // Save the wm_params modified by
@@ -5189,6 +5200,16 @@
                 GM_TRANS_ONLY_DECODE_FACTOR;
           }
 
+          if (cm->global_motion[frame].wmtype == IDENTITY) continue;
+
+          const int64_t ref_frame_error = av1_segmented_frame_error(
+              is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
+              ref_buf[frame]->y_stride, cpi->source->y_buffer,
+              cpi->source->y_width, cpi->source->y_height,
+              cpi->source->y_stride, segment_map, segment_map_w);
+
+          if (ref_frame_error == 0) continue;
+
           // If the best error advantage found doesn't meet the threshold for
           // this motion type, revert to IDENTITY.
           if (!av1_is_enough_erroradvantage(
@@ -5209,6 +5230,7 @@
           cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
           cpi->gmtype_cost[IDENTITY];
     }
+    aom_free(segment_map);
     // clear disabled ref_frames
     for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
       const int ref_disabled =
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index d7591ba..0120490 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c
@@ -64,7 +64,9 @@
   double *level_dy_buffer;
 } ImagePyramid;
 
-static const double erroradv_tr[] = { 0.65, 0.60, 0.55 };
+// TODO(sarahparker) These need to be retuned for speed 0 and 1 to
+// maximize gains from segmented error metric
+static const double erroradv_tr[] = { 0.65, 0.60, 0.65 };
 static const double erroradv_prod_tr[] = { 20000, 18000, 16000 };
 
 int av1_is_enough_erroradvantage(double best_erroradvantage, int params_cost,
@@ -164,13 +166,11 @@
   wm->wmtype = wmtype;
 }
 
-int64_t av1_refine_integerized_param(WarpedMotionParams *wm,
-                                     TransformationType wmtype, int use_hbd,
-                                     int bd, uint8_t *ref, int r_width,
-                                     int r_height, int r_stride, uint8_t *dst,
-                                     int d_width, int d_height, int d_stride,
-                                     int n_refinements,
-                                     int64_t best_frame_error) {
+int64_t av1_refine_integerized_param(
+    WarpedMotionParams *wm, TransformationType wmtype, int use_hbd, int bd,
+    uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst,
+    int d_width, int d_height, int d_stride, int n_refinements,
+    int64_t best_frame_error, uint8_t *segment_map, int segment_map_stride) {
   static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
   const int border = ERRORADV_BORDER;
   int i = 0, p;
@@ -183,10 +183,11 @@
   int32_t best_param;
 
   force_wmtype(wm, wmtype);
-  best_error = av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                              dst + border * d_stride + border, border, border,
-                              d_width - 2 * border, d_height - 2 * border,
-                              d_stride, 0, 0, best_frame_error);
+  best_error =
+      av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
+                     dst + border * d_stride + border, border, border,
+                     d_width - 2 * border, d_height - 2 * border, d_stride, 0,
+                     0, best_frame_error, segment_map, segment_map_stride);
   best_error = AOMMIN(best_error, best_frame_error);
   step = 1 << (n_refinements - 1);
   for (i = 0; i < n_refinements; i++, step >>= 1) {
@@ -202,7 +203,7 @@
           av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
                          dst + border * d_stride + border, border, border,
                          d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, best_error);
+                         0, 0, best_error, segment_map, segment_map_stride);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -215,7 +216,7 @@
           av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
                          dst + border * d_stride + border, border, border,
                          d_width - 2 * border, d_height - 2 * border, d_stride,
-                         0, 0, best_error);
+                         0, 0, best_error, segment_map, segment_map_stride);
       if (step_error < best_error) {
         best_error = step_error;
         best_param = *param;
@@ -227,11 +228,11 @@
       // for the biggest step size
       while (step_dir) {
         *param = add_param_offset(p, best_param, step * step_dir);
-        step_error =
-            av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride,
-                           dst + border * d_stride + border, border, border,
-                           d_width - 2 * border, d_height - 2 * border,
-                           d_stride, 0, 0, best_error);
+        step_error = av1_warp_error(
+            wm, use_hbd, bd, ref, r_width, r_height, r_stride,
+            dst + border * d_stride + border, border, border,
+            d_width - 2 * border, d_height - 2 * border, d_stride, 0, 0,
+            best_error, segment_map, segment_map_stride);
         if (step_error < best_error) {
           best_error = step_error;
           best_param = *param;
@@ -264,6 +265,50 @@
   return buf_8bit;
 }
 
+static void get_inliers_from_indices(MotionModel *params,
+                                     int *correspondences) {
+  int *inliers_tmp = (int *)aom_malloc(2 * MAX_CORNERS * sizeof(*inliers_tmp));
+  memset(inliers_tmp, 0, 2 * MAX_CORNERS * sizeof(*inliers_tmp));
+
+  for (int i = 0; i < params->num_inliers; i++) {
+    int index = params->inliers[i];
+    inliers_tmp[2 * i] = correspondences[4 * index];
+    inliers_tmp[2 * i + 1] = correspondences[4 * index + 1];
+  }
+  memcpy(params->inliers, inliers_tmp, sizeof(*inliers_tmp) * 2 * MAX_CORNERS);
+  aom_free(inliers_tmp);
+}
+
+#define FEAT_COUNT_TR 3
+#define SEG_COUNT_TR 0.40
+void av1_compute_feature_segmentation_map(uint8_t *segment_map, int width,
+                                          int height, int *inliers,
+                                          int num_inliers) {
+  int seg_count = 0;
+  memset(segment_map, 0, sizeof(*segment_map) * width * height);
+
+  for (int i = 0; i < num_inliers; i++) {
+    int x = inliers[i * 2];
+    int y = inliers[i * 2 + 1];
+    int seg_x = x >> WARP_ERROR_BLOCK_LOG;
+    int seg_y = y >> WARP_ERROR_BLOCK_LOG;
+    segment_map[seg_y * width + seg_x] += 1;
+  }
+
+  for (int i = 0; i < height; i++) {
+    for (int j = 0; j < width; j++) {
+      uint8_t feat_count = segment_map[i * width + j];
+      segment_map[i * width + j] = (feat_count >= FEAT_COUNT_TR);
+      seg_count += (segment_map[i * width + j]);
+    }
+  }
+
+  // If this motion does not make up a large enough portion of the frame,
+  // use the unsegmented version of the error metric
+  if (seg_count < (width * height * SEG_COUNT_TR))
+    memset(segment_map, 1, width * height * sizeof(*segment_map));
+}
+
 static int compute_global_motion_feature_based(
     TransformationType type, unsigned char *frm_buffer, int frm_width,
     int frm_height, int frm_stride, int *frm_corners, int num_frm_corners,
@@ -296,15 +341,18 @@
   ransac(correspondences, num_correspondences, num_inliers_by_motion,
          params_by_motion, num_motions);
 
-  free(correspondences);
-
   // Set num_inliers = 0 for motions with too few inliers so they are ignored.
   for (i = 0; i < num_motions; ++i) {
-    if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences) {
+    if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences ||
+        num_correspondences == 0) {
       num_inliers_by_motion[i] = 0;
+    } else {
+      get_inliers_from_indices(&params_by_motion[i], correspondences);
     }
   }
 
+  free(correspondences);
+
   // Return true if any one of the motions has inliers.
   for (i = 0; i < num_motions; ++i) {
     if (num_inliers_by_motion[i] > 0) return 1;
diff --git a/av1/encoder/global_motion.h b/av1/encoder/global_motion.h
index b5c2201..ee09676 100644
--- a/av1/encoder/global_motion.h
+++ b/av1/encoder/global_motion.h
@@ -43,16 +43,18 @@
 int av1_is_enough_erroradvantage(double best_erroradvantage, int params_cost,
                                  int erroradv_type);
 
+void av1_compute_feature_segmentation_map(uint8_t *segment_map, int width,
+                                          int height, int *inliers,
+                                          int num_inliers);
+
 // Returns the av1_warp_error between "dst" and the result of applying the
 // motion params that result from fine-tuning "wm" to "ref". Note that "wm" is
 // modified in place.
-int64_t av1_refine_integerized_param(WarpedMotionParams *wm,
-                                     TransformationType wmtype, int use_hbd,
-                                     int bd, uint8_t *ref, int r_width,
-                                     int r_height, int r_stride, uint8_t *dst,
-                                     int d_width, int d_height, int d_stride,
-                                     int n_refinements,
-                                     int64_t best_frame_error);
+int64_t av1_refine_integerized_param(
+    WarpedMotionParams *wm, TransformationType wmtype, int use_hbd, int bd,
+    uint8_t *ref, int r_width, int r_height, int r_stride, uint8_t *dst,
+    int d_width, int d_height, int d_stride, int n_refinements,
+    int64_t best_frame_error, uint8_t *segment_map, int segment_map_stride);
 
 /*
   Computes "num_motions" candidate global motion parameters between two frames.