aom_dsp/flow_estimation/disflow.h - aom - Git at Google

 /*
  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 2 Clause License and
  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
  * was not distributed with this source code in the LICENSE file, you can
  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */

 #ifndef AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_
 #define AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_

 #include <stdbool.h>

 #include "aom_dsp/flow_estimation/flow_estimation.h"
 #include "aom_scale/yv12config.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 // Number of pyramid levels in disflow computation
 #define DISFLOW_PYRAMID_LEVELS 12

 // Size of square patches in the disflow dense grid
 // Must be a power of 2
 #define DISFLOW_PATCH_SIZE_LOG2 3
 #define DISFLOW_PATCH_SIZE (1 << DISFLOW_PATCH_SIZE_LOG2)
 // Center point of square patch
 #define DISFLOW_PATCH_CENTER ((DISFLOW_PATCH_SIZE / 2) - 1)

 // Overall scale of the `dx`, `dy` and `dt` arrays in the disflow code
 // In other words, the various derivatives are calculated with an internal
 // precision of (8 + DISFLOW_DERIV_SCALE_LOG2) bits, from an 8-bit input.
 //
 // This must be carefully synchronized with the code in sobel_filter()
 // (which fills the dx and dy arrays) and compute_flow_error() (which
 // fills dt); see the comments in those functions for more details
 #define DISFLOW_DERIV_SCALE_LOG2 3
 #define DISFLOW_DERIV_SCALE (1 << DISFLOW_DERIV_SCALE_LOG2)

 // Scale factor applied to each step in the main refinement loop
 //
 // This should be <= 1.0 to avoid overshoot. Values below 1.0
 // may help in some cases, but slow convergence overall, so
 // will require careful tuning.
 // TODO(rachelbarker): Tune this value
 #define DISFLOW_STEP_SIZE 1.0

 // Step size at which we should terminate iteration
 // The idea here is that, if we take a step which is much smaller than 1px in
 // size, then the values won't change much from iteration to iteration, so
 // many future steps will also be small, and that won't have much effect
 // on the ultimate result. So we can terminate early.
 //
 // To look at it another way, when we take a small step, that means that
 // either we're near to convergence (so can stop), or we're stuck in a
 // shallow valley and will take many iterations to get unstuck.
 //
 // Solving the latter properly requires fancier methods, such as "gradient
 // descent with momentum". For now, we terminate to avoid wasting a ton of
 // time on points which are either nearly-converged or stuck.
 //
 // Terminating at 1/8 px seems to give good results for global motion estimation
 #define DISFLOW_STEP_SIZE_THRESOLD (1. / 8.)

 // Max number of iterations if warp convergence is not found
 #define DISFLOW_MAX_ITR 4

 // Internal precision of cubic interpolation filters
 // The limiting factor here is that:
 // * Before integerizing, the maximum value of any kernel tap is 1.0
 // * After integerizing, each tap must fit into an int16_t.
 // Thus the largest multiplier we can get away with is 2^14 = 16384,
 // as 2^15 = 32768 is too large to fit in an int16_t.
 #define DISFLOW_INTERP_BITS 14

 typedef struct {
   // Start of allocation for u and v buffers
   double *buf0;

   // x and y directions of flow, per patch
   double *u;
   double *v;

   // Sizes of the above arrays
   int width;
   int height;
   int stride;
 } FlowField;

 bool av1_compute_global_motion_disflow(
     TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
     int bit_depth, int downsample_level, MotionModel *motion_models,
     int num_motion_models, bool *mem_alloc_failed);

 #ifdef __cplusplus
 }
 #endif

 #endif  // AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_
	/*
	* Copyright (c) 2016, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 2 Clause License and
	* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
	* was not distributed with this source code in the LICENSE file, you can
	* obtain it at www.aomedia.org/license/software. If the Alliance for Open
	* Media Patent License 1.0 was not distributed with this source code in the
	* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
	*/

	#ifndef AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_
	#define AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_

	#include <stdbool.h>

	#include "aom_dsp/flow_estimation/flow_estimation.h"
	#include "aom_scale/yv12config.h"

	#ifdef __cplusplus
	extern "C" {
	#endif

	// Number of pyramid levels in disflow computation
	#define DISFLOW_PYRAMID_LEVELS 12

	// Size of square patches in the disflow dense grid
	// Must be a power of 2
	#define DISFLOW_PATCH_SIZE_LOG2 3
	#define DISFLOW_PATCH_SIZE (1 << DISFLOW_PATCH_SIZE_LOG2)
	// Center point of square patch
	#define DISFLOW_PATCH_CENTER ((DISFLOW_PATCH_SIZE / 2) - 1)

	// Overall scale of the `dx`, `dy` and `dt` arrays in the disflow code
	// In other words, the various derivatives are calculated with an internal
	// precision of (8 + DISFLOW_DERIV_SCALE_LOG2) bits, from an 8-bit input.
	//
	// This must be carefully synchronized with the code in sobel_filter()
	// (which fills the dx and dy arrays) and compute_flow_error() (which
	// fills dt); see the comments in those functions for more details
	#define DISFLOW_DERIV_SCALE_LOG2 3
	#define DISFLOW_DERIV_SCALE (1 << DISFLOW_DERIV_SCALE_LOG2)

	// Scale factor applied to each step in the main refinement loop
	//
	// This should be <= 1.0 to avoid overshoot. Values below 1.0
	// may help in some cases, but slow convergence overall, so
	// will require careful tuning.
	// TODO(rachelbarker): Tune this value
	#define DISFLOW_STEP_SIZE 1.0

	// Step size at which we should terminate iteration
	// The idea here is that, if we take a step which is much smaller than 1px in
	// size, then the values won't change much from iteration to iteration, so
	// many future steps will also be small, and that won't have much effect
	// on the ultimate result. So we can terminate early.
	//
	// To look at it another way, when we take a small step, that means that
	// either we're near to convergence (so can stop), or we're stuck in a
	// shallow valley and will take many iterations to get unstuck.
	//
	// Solving the latter properly requires fancier methods, such as "gradient
	// descent with momentum". For now, we terminate to avoid wasting a ton of
	// time on points which are either nearly-converged or stuck.
	//
	// Terminating at 1/8 px seems to give good results for global motion estimation
	#define DISFLOW_STEP_SIZE_THRESOLD (1. / 8.)

	// Max number of iterations if warp convergence is not found
	#define DISFLOW_MAX_ITR 4

	// Internal precision of cubic interpolation filters
	// The limiting factor here is that:
	// * Before integerizing, the maximum value of any kernel tap is 1.0
	// * After integerizing, each tap must fit into an int16_t.
	// Thus the largest multiplier we can get away with is 2^14 = 16384,
	// as 2^15 = 32768 is too large to fit in an int16_t.
	#define DISFLOW_INTERP_BITS 14

	typedef struct {
	// Start of allocation for u and v buffers
	double *buf0;

	// x and y directions of flow, per patch
	double *u;
	double *v;

	// Sizes of the above arrays
	int width;
	int height;
	int stride;
	} FlowField;

	bool av1_compute_global_motion_disflow(
	TransformationType type, YV12_BUFFER_CONFIG src, YV12_BUFFER_CONFIG ref,
	int bit_depth, int downsample_level, MotionModel *motion_models,
	int num_motion_models, bool *mem_alloc_failed);

	#ifdef __cplusplus
	}
	#endif

	#endif // AOM_AOM_DSP_FLOW_ESTIMATION_DISFLOW_H_