blob: f1243240e0fe38e95f1dc3be27d5efbdf13059c4 [file] [log] [blame]
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_AOM_DSP_FLOW_ESTIMATION_H_
#define AOM_AOM_DSP_FLOW_ESTIMATION_H_
#include <math.h>
#include <assert.h>
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/pyramid.h"
#include "aom_dsp/rect.h"
#include "aom_dsp/flow_estimation/corner_detect.h"
#include "aom_ports/mem.h"
#include "aom_scale/yv12config.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MAX_PARAMDIM 9
#define MAX_CORNERS 4096
#define MIN_INLIER_PROB 0.1
/* clang-format off */
enum {
IDENTITY = 0, // identity transformation, 0-parameter
TRANSLATION = 1, // translational motion 2-parameter
ROTATION = 2, // rotation about some point, 3-parameter
ZOOM = 3, // zoom in/out on some point, 3-parameter
VERTSHEAR = 4, // translation + vertical shear, 3-parameter
HORZSHEAR = 5, // translation + horizontal shear, 3-parameter
UZOOM = 6, // unequal zoom, 4-parameter
ROTZOOM = 7, // equal zoom, then rotate, 4-parameter
ROTUZOOM = 8, // unequal zoom, then rotate, 5-parameter
AFFINE = 9, // general affine, 6-parameter
VERTRAPEZOID = 10, // vertical-only perspective, 6-parameter
HORTRAPEZOID = 11, // horizontal-only perspective, 6-parameter
HOMOGRAPHY = 12, // general perspective transformation, 8-parameter
TRANS_TYPES,
} UENUM1BYTE(TransformationType);
/* clang-format on */
// number of parameters used by each transformation in TransformationTypes
static const int trans_model_params[TRANS_TYPES] = { 0, 2, 3, 3, 3, 3, 4,
4, 5, 6, 6, 6, 8 };
typedef enum {
GLOBAL_MOTION_METHOD_FEATURE_MATCH,
GLOBAL_MOTION_METHOD_DISFLOW,
#if CONFIG_TENSORFLOW_LITE
GLOBAL_MOTION_METHOD_DEEPFLOW,
#endif // CONFIG_TENSORFLOW_LITE
GLOBAL_MOTION_METHODS,
GLOBAL_MOTION_METHOD_LAST = GLOBAL_MOTION_METHODS - 1,
} GlobalMotionMethod;
typedef struct {
double params[MAX_PARAMDIM - 1];
int *inliers;
int num_inliers;
} MotionModel;
// Data structure to store a single correspondence point during global
// motion search.
//
// A correspondence (x, y) -> (rx, ry) means that point (x, y) in the
// source frame corresponds to point (rx, ry) in the ref frame.
typedef struct {
double x, y;
double rx, ry;
} Correspondence;
typedef struct {
int num_correspondences;
Correspondence *correspondences;
} CorrespondenceList;
typedef struct {
// x and y directions of flow, per patch
double *u;
double *v;
// Sizes of the above arrays
int width;
int height;
int stride;
} FlowField;
// We want to present external code with a generic type, which holds whatever
// data is needed for the desired motion estimation method.
// As different methods use different data, we store this in a tagged union,
// with the selected motion estimation method as the tag.
typedef struct {
GlobalMotionMethod method;
union {
CorrespondenceList *corrs;
FlowField *flow;
};
} FlowData;
// For each global motion method, how many pyramid levels should we allocate?
// Note that this is a maximum, and fewer levels will be allocated if the frame
// is not large enough to need all of the specified levels
extern const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS];
// Amount to downsample the flow field by.
// eg. DOWNSAMPLE_SHIFT = 2 (DOWNSAMPLE_FACTOR == 4) means we calculate
// one flow point for each 4x4 pixel region of the frame
// Must be a power of 2
#define DOWNSAMPLE_SHIFT 3
#define DOWNSAMPLE_FACTOR (1 << DOWNSAMPLE_SHIFT)
// When downsampling the flow field, each flow field entry covers a square
// region of pixels in the image pyramid. This value is equal to the position
// of the center of that region, as an offset from the top/left edge.
//
// Note: Using ((DOWNSAMPLE_FACTOR - 1) / 2) is equivalent to the more
// natural expression ((DOWNSAMPLE_FACTOR / 2) - 1),
// unless DOWNSAMPLE_FACTOR == 1 (ie, no downsampling), in which case
// this gives the correct offset of 0 instead of -1.
#define UPSAMPLE_CENTER_OFFSET ((DOWNSAMPLE_FACTOR - 1) / 2)
// Internal precision of cubic interpolation filters
// The limiting factor here is that:
// * Before integerizing, the maximum value of any kernel tap is 1.0
// * After integerizing, each tap must fit into an int16_t.
// Thus the largest multiplier we can get away with is 2^14 = 16384,
// as 2^15 = 32768 is too large to fit in an int16_t.
#define FLOW_INTERP_BITS 14
FlowData *aom_compute_flow_data(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *ref, int bit_depth,
GlobalMotionMethod gm_method);
// Fit one or several models of a given type to the specified flow data.
// This function fits models to the entire frame, using the RANSAC method
// to fit models in a noise-resilient way.
//
// As is standard for video codecs, the resulting model maps from (x, y)
// coordinates in `src` to the corresponding points in `ref`, regardless
// of the temporal order of the two frames.
bool aom_fit_global_motion_model(FlowData *flow_data, TransformationType type,
YV12_BUFFER_CONFIG *src,
MotionModel *motion_models,
int num_motion_models);
// Fit a model of a given type to part of a frame.
// This method does not use the RANSAC method, and only returns a single model,
// which is more suitable for fitting per-block or per-superblock models.
bool aom_fit_local_motion_model(FlowData *flow_data, PixelRect *rect,
TransformationType type, double *mat);
bool aom_fit_global_model_to_flow_field(FlowField *flow,
TransformationType type,
YV12_BUFFER_CONFIG *frm,
MotionModel *motion_models,
int num_motion_models);
bool aom_fit_local_model_to_flow_field(const FlowField *flow,
const PixelRect *rect,
TransformationType type, double *mat);
FlowField *aom_alloc_flow_field(int frame_width, int frame_height);
void aom_free_flow_field(FlowField *flow);
void aom_free_flow_data(FlowData *flow_data);
static INLINE void get_cubic_kernel_dbl(double x, double *kernel) {
assert(0 <= x && x < 1);
double x2 = x * x;
double x3 = x2 * x;
kernel[0] = -0.5 * x + x2 - 0.5 * x3;
kernel[1] = 1.0 - 2.5 * x2 + 1.5 * x3;
kernel[2] = 0.5 * x + 2.0 * x2 - 1.5 * x3;
kernel[3] = -0.5 * x2 + 0.5 * x3;
}
static INLINE void get_cubic_kernel_int(double x, int *kernel) {
double kernel_dbl[4];
get_cubic_kernel_dbl(x, kernel_dbl);
kernel[0] = (int)rint(kernel_dbl[0] * (1 << FLOW_INTERP_BITS));
kernel[1] = (int)rint(kernel_dbl[1] * (1 << FLOW_INTERP_BITS));
kernel[2] = (int)rint(kernel_dbl[2] * (1 << FLOW_INTERP_BITS));
kernel[3] = (int)rint(kernel_dbl[3] * (1 << FLOW_INTERP_BITS));
}
static INLINE double get_cubic_value_dbl(const double *p,
const double *kernel) {
return kernel[0] * p[0] + kernel[1] * p[1] + kernel[2] * p[2] +
kernel[3] * p[3];
}
static INLINE int get_cubic_value_int(const int *p, const int *kernel) {
return kernel[0] * p[0] + kernel[1] * p[1] + kernel[2] * p[2] +
kernel[3] * p[3];
}
static INLINE double bicubic_interp_one(const double *arr, int stride,
double *h_kernel, double *v_kernel) {
double tmp[1 * 4];
// Horizontal convolution
for (int i = -1; i < 3; ++i) {
tmp[i + 1] = get_cubic_value_dbl(&arr[i * stride - 1], h_kernel);
}
// Vertical convolution
return get_cubic_value_dbl(tmp, v_kernel);
}
#ifdef __cplusplus
}
#endif
#endif // AOM_AOM_DSP_FLOW_ESTIMATION_H_