Compute image pyramid for optical flow Computing an image pyramid is the first step in the implemention of DISFlow, which is a fast optical flow technique being implemented to speed up global motion computation. Change-Id: Icca1e16dfff7776f22e446da79526842d4b1d510

commit: c9c7379d237966dc0f13a31c11e04a760a95ae0f [log] [tgz]
author: Sarah Parker <sarahparker@google.com> Wed Oct 17 15:44:52 2018 -0700
committer: Sarah Parker <sarahparker@google.com> Mon Oct 22 20:37:32 2018 +0000
tree: 1c237dfb69eef003d26b431e432333f42d4cda9b
parent: b5ed1e629ff9a267b81c1137f5a0c23de05114f6 [diff]
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index e9f8b0b..e70d889 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c

@@ -17,6 +17,7 @@
 
 #include "av1/encoder/global_motion.h"
 
+#include "av1/common/resize.h"
 #include "av1/common/warped_motion.h"
 
 #include "av1/encoder/segmentation.h"
@@ -32,6 +33,20 @@
 // Border over which to compute the global motion
 #define ERRORADV_BORDER 0
 
+// Number of pyramid levels in disflow computation
+#define N_LEVELS 5
+// Size of square patches in the disflow dense grid
+#define PATCH_SIZE 5
+
+// Struct for an image pyramid
+typedef struct {
+  int n_levels;
+  int widths[N_LEVELS];
+  int heights[N_LEVELS];
+  int level_loc[N_LEVELS];
+  unsigned char *level_buffer;
+} ImagePyramid;
+
 static const double erroradv_tr[] = { 0.65, 0.60, 0.55 };
 static const double erroradv_prod_tr[] = { 20000, 18000, 16000 };
 
@@ -296,3 +311,110 @@
   }
   return 0;
 }
+
+static ImagePyramid *alloc_pyramid(int width, int height) {
+  ImagePyramid *pyr = aom_malloc(sizeof(*pyr));
+  // 2 * width * height is the upper bound for a buffer that fits
+  // all pyramid levels
+  pyr->level_buffer =
+      aom_malloc(sizeof(*pyr->level_buffer) * 2 * width * height);
+  return pyr;
+}
+
+static void free_pyramid(ImagePyramid *pyr) {
+  aom_free(pyr->level_buffer);
+  aom_free(pyr);
+}
+
+static INLINE void update_level_dims(ImagePyramid *frm_pyr, int level) {
+  frm_pyr->widths[level] = frm_pyr->widths[level - 1] >> 1;
+  frm_pyr->heights[level] = frm_pyr->heights[level - 1] >> 1;
+  frm_pyr->level_loc[level] =
+      frm_pyr->level_loc[level - 1] +
+      frm_pyr->widths[level - 1] * frm_pyr->heights[level - 1];
+}
+
+// Compute coarse to fine pyramids for a frame
+static void compute_flow_pyramids(unsigned char *frm, const int frm_width,
+                                  const int frm_height, const int frm_stride,
+                                  int n_levels, ImagePyramid *frm_pyr) {
+  int cur_width, cur_height, cur_loc;
+  assert((frm_width >> n_levels) > 0);
+  assert((frm_height >> n_levels) > 0);
+
+  // Initialize first level
+  frm_pyr->n_levels = n_levels;
+  frm_pyr->widths[0] = frm_width;
+  frm_pyr->heights[0] = frm_height;
+  frm_pyr->level_loc[0] = 0;
+  // This essentially copies the original buffer into the pyramid buffer
+  // without the original padding
+  av1_resize_plane(frm, frm_height, frm_width, frm_stride,
+                   frm_pyr->level_buffer, frm_pyr->heights[0],
+                   frm_pyr->widths[0], frm_pyr->widths[0]);
+
+  // Start at the finest level and resize down to the coarsest level
+  for (int level = 1; level < n_levels; ++level) {
+    update_level_dims(frm_pyr, level);
+    cur_width = frm_pyr->widths[level];
+    cur_height = frm_pyr->heights[level];
+    cur_loc = frm_pyr->level_loc[level];
+
+    av1_resize_plane(frm_pyr->level_buffer + frm_pyr->level_loc[level - 1],
+                     frm_pyr->heights[level - 1], frm_pyr->widths[level - 1],
+                     frm_pyr->widths[level - 1],
+                     frm_pyr->level_buffer + cur_loc, cur_height, cur_width,
+                     cur_width);
+
+    // TODO(sarahparker) Add computation of gradient pyramids here
+  }
+}
+
+int compute_global_motion_disflow_based(TransformationType type,
+                                        YV12_BUFFER_CONFIG *frm,
+                                        YV12_BUFFER_CONFIG *ref, int bit_depth,
+                                        int *num_inliers_by_motion,
+                                        double *params_by_motion,
+                                        int num_motions) {
+  unsigned char *frm_buffer = frm->y_buffer;
+  unsigned char *ref_buffer = ref->y_buffer;
+  const int frm_width = frm->y_width;
+  const int frm_height = frm->y_height;
+  const int ref_width = ref->y_width;
+  const int ref_height = ref->y_height;
+  assert(frm_width == ref_width);
+  assert(frm_height == ref_height);
+
+  // Ensure the number of pyramid levels will work with the frame resolution
+  const int msb =
+      frm_width < frm_height ? get_msb(frm_width) : get_msb(frm_height);
+  const int n_levels = AOMMIN(msb, N_LEVELS);
+
+  if (frm->flags & YV12_FLAG_HIGHBITDEPTH) {
+    // The frame buffer is 16-bit, so we need to convert to 8 bits for the
+    // following code. We cache the result until the frame is released.
+    frm_buffer = downconvert_frame(frm, bit_depth);
+  }
+  if (ref->flags & YV12_FLAG_HIGHBITDEPTH) {
+    ref_buffer = downconvert_frame(ref, bit_depth);
+  }
+
+  // Allocate frm image pyramids
+  ImagePyramid *frm_pyr = alloc_pyramid(frm_width, frm_height);
+  compute_flow_pyramids(frm_buffer, frm_width, frm_height, frm->y_stride,
+                        n_levels, frm_pyr);
+  // Allocate ref image pyramids
+  ImagePyramid *ref_pyr = alloc_pyramid(ref_width, ref_height);
+  compute_flow_pyramids(ref_buffer, ref_width, ref_height, ref->y_stride,
+                        n_levels, ref_pyr);
+
+  // TODO(sarahparker) Implement the rest of DISFlow, currently only the image
+  // pyramid is implemented.
+  (void)num_inliers_by_motion;
+  (void)params_by_motion;
+  (void)num_motions;
+  (void)type;
+  free_pyramid(frm_pyr);
+  free_pyramid(ref_pyr);
+  return 0;
+}

diff --git a/av1/encoder/global_motion.h b/av1/encoder/global_motion.h
index c7c016c..87f80a7 100644
--- a/av1/encoder/global_motion.h
+++ b/av1/encoder/global_motion.h

@@ -58,6 +58,13 @@
                                         int *num_inliers_by_motion,
                                         double *params_by_motion,
                                         int num_motions);
+
+int compute_global_motion_disflow_based(TransformationType type,
+                                        YV12_BUFFER_CONFIG *frm,
+                                        YV12_BUFFER_CONFIG *ref, int bit_depth,
+                                        int *num_inliers_by_motion,
+                                        double *params_by_motion,
+                                        int num_motions);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
commit	c9c7379d237966dc0f13a31c11e04a760a95ae0f	[log] [tgz]
author	Sarah Parker <sarahparker@google.com>	Wed Oct 17 15:44:52 2018 -0700
committer	Sarah Parker <sarahparker@google.com>	Mon Oct 22 20:37:32 2018 +0000
tree	1c237dfb69eef003d26b431e432333f42d4cda9b
parent	b5ed1e629ff9a267b81c1137f5a0c23de05114f6 [diff]