Add encoder arg to select global motion method
The default method is the same as before (feature-match),
but this allows the user to select disflow as an alternative
method if desired
Change-Id: Ifc28b57059e69a7048564c289156085e735ba12b
diff --git a/aom_dsp/flow_estimation/corner_match.c b/aom_dsp/flow_estimation/corner_match.c
index 8331a06..423e569 100644
--- a/aom_dsp/flow_estimation/corner_match.c
+++ b/aom_dsp/flow_estimation/corner_match.c
@@ -199,7 +199,7 @@
return num_correspondences;
}
-int av1_compute_global_motion_feature_based(
+int av1_compute_global_motion_feature_match(
TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
int bit_depth, MotionModel *motion_models, int num_motion_models) {
int i;
diff --git a/aom_dsp/flow_estimation/corner_match.h b/aom_dsp/flow_estimation/corner_match.h
index 77cc501..25f1787 100644
--- a/aom_dsp/flow_estimation/corner_match.h
+++ b/aom_dsp/flow_estimation/corner_match.h
@@ -36,7 +36,7 @@
int ref_stride,
Correspondence *correspondences);
-int av1_compute_global_motion_feature_based(
+int av1_compute_global_motion_feature_match(
TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
int bit_depth, MotionModel *motion_models, int num_motion_models);
diff --git a/aom_dsp/flow_estimation/disflow.c b/aom_dsp/flow_estimation/disflow.c
index fe038d2..18a0d2b 100644
--- a/aom_dsp/flow_estimation/disflow.c
+++ b/aom_dsp/flow_estimation/disflow.c
@@ -593,9 +593,11 @@
// Following the convention in flow_estimation.h, the flow vectors are computed
// at fixed points in `src` and point to the corresponding locations in `ref`,
// regardless of the temporal ordering of the frames.
-int av1_compute_global_motion_disflow_based(
- TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
- int bit_depth, MotionModel *motion_models, int num_motion_models) {
+int av1_compute_global_motion_disflow(TransformationType type,
+ YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *ref, int bit_depth,
+ MotionModel *motion_models,
+ int num_motion_models) {
// Precompute information we will need about each frame
ImagePyramid *src_pyramid = src->y_pyramid;
CornerList *src_corners = src->corners;
diff --git a/aom_dsp/flow_estimation/disflow.h b/aom_dsp/flow_estimation/disflow.h
index 7286717..87dfdb4 100644
--- a/aom_dsp/flow_estimation/disflow.h
+++ b/aom_dsp/flow_estimation/disflow.h
@@ -89,9 +89,11 @@
int stride;
} FlowField;
-int av1_compute_global_motion_disflow_based(
- TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
- int bit_depth, MotionModel *motion_models, int num_motion_models);
+int av1_compute_global_motion_disflow(TransformationType type,
+ YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *ref, int bit_depth,
+ MotionModel *motion_models,
+ int num_motion_models);
#ifdef __cplusplus
}
diff --git a/aom_dsp/flow_estimation/flow_estimation.c b/aom_dsp/flow_estimation/flow_estimation.c
index 966d670..c7134b7 100644
--- a/aom_dsp/flow_estimation/flow_estimation.c
+++ b/aom_dsp/flow_estimation/flow_estimation.c
@@ -18,6 +18,14 @@
#include "aom_ports/mem.h"
#include "aom_scale/yv12config.h"
+// For each global motion method, how many pyramid levels should we allocate?
+// Note that this is a maximum, and fewer levels will be allocated if the frame
+// is not large enough to need all of the specified levels
+const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS] = {
+ 1, // GLOBAL_MOTION_METHOD_FEATURE_MATCH
+ 16, // GLOBAL_MOTION_METHOD_DISFLOW
+};
+
// Compute a global motion model between the given source and ref frames.
//
// As is standard for video codecs, the resulting model maps from (x, y)
@@ -25,15 +33,15 @@
// of the temporal order of the two frames.
int aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *ref, int bit_depth,
- GlobalMotionEstimationType gm_estimation_type,
+ GlobalMotionMethod gm_method,
MotionModel *motion_models,
int num_motion_models) {
- switch (gm_estimation_type) {
- case GLOBAL_MOTION_FEATURE_BASED:
- return av1_compute_global_motion_feature_based(
+ switch (gm_method) {
+ case GLOBAL_MOTION_METHOD_FEATURE_MATCH:
+ return av1_compute_global_motion_feature_match(
type, src, ref, bit_depth, motion_models, num_motion_models);
- case GLOBAL_MOTION_DISFLOW_BASED:
- return av1_compute_global_motion_disflow_based(
+ case GLOBAL_MOTION_METHOD_DISFLOW:
+ return av1_compute_global_motion_disflow(
type, src, ref, bit_depth, motion_models, num_motion_models);
default: assert(0 && "Unknown global motion estimation type");
}
diff --git a/aom_dsp/flow_estimation/flow_estimation.h b/aom_dsp/flow_estimation/flow_estimation.h
index 0eabfed..7cf947c 100644
--- a/aom_dsp/flow_estimation/flow_estimation.h
+++ b/aom_dsp/flow_estimation/flow_estimation.h
@@ -39,9 +39,11 @@
static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
typedef enum {
- GLOBAL_MOTION_FEATURE_BASED,
- GLOBAL_MOTION_DISFLOW_BASED,
-} GlobalMotionEstimationType;
+ GLOBAL_MOTION_METHOD_FEATURE_MATCH,
+ GLOBAL_MOTION_METHOD_DISFLOW,
+ GLOBAL_MOTION_METHOD_LAST = GLOBAL_MOTION_METHOD_DISFLOW,
+ GLOBAL_MOTION_METHODS
+} GlobalMotionMethod;
typedef struct {
double params[MAX_PARAMDIM - 1];
@@ -59,6 +61,11 @@
double rx, ry;
} Correspondence;
+// For each global motion method, how many pyramid levels should we allocate?
+// Note that this is a maximum, and fewer levels will be allocated if the frame
+// is not large enough to need all of the specified levels
+extern const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS];
+
// Compute a global motion model between the given source and ref frames.
//
// As is standard for video codecs, the resulting model maps from (x, y)
@@ -66,7 +73,7 @@
// of the temporal order of the two frames.
int aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *ref, int bit_depth,
- GlobalMotionEstimationType gm_estimation_type,
+ GlobalMotionMethod gm_method,
MotionModel *motion_models,
int num_motion_models);
diff --git a/apps/aomenc.c b/apps/aomenc.c
index ef208fd..1cc45cd 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -453,6 +453,7 @@
&g_av1_codec_arg_defs.sb_qp_sweep,
&g_av1_codec_arg_defs.dist_metric,
&g_av1_codec_arg_defs.kf_max_pyr_height,
+ &g_av1_codec_arg_defs.global_motion_method,
NULL,
};
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index abfd4b3..a37958e 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -139,6 +139,12 @@
{ "ebu3213", AOM_CICP_CP_EBU_3213 },
{ NULL, 0 }
};
+
+static const struct arg_enum_list global_motion_method_enum[] = {
+ { "feature-match", GLOBAL_MOTION_METHOD_FEATURE_MATCH },
+ { "disflow", GLOBAL_MOTION_METHOD_DISFLOW },
+ { NULL, 0 }
+};
#endif // CONFIG_AV1_ENCODER
const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
@@ -683,5 +689,9 @@
ARG_DEF(NULL, "sb-qp-sweep", 1,
"When set to 1, enable the superblock level qp sweep for a "
"given lambda to minimize the rdcost."),
+ .global_motion_method = ARG_DEF_ENUM(NULL, "global-motion-method", 1,
+ "Global motion search method "
+ "(default: feature-match)",
+ global_motion_method_enum),
#endif // CONFIG_AV1_ENCODER
};
diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index e15a84c..730b422 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h
@@ -21,6 +21,7 @@
#include "common/webmenc.h"
#endif
#include "aom/aomcx.h"
+#include "aom_dsp/flow_estimation/flow_estimation.h"
enum TestDecodeFatality {
TEST_DECODE_OFF,
@@ -233,6 +234,7 @@
arg_def_t strict_level_conformance;
arg_def_t kf_max_pyr_height;
arg_def_t sb_qp_sweep;
+ arg_def_t global_motion_method;
#endif // CONFIG_AV1_ENCODER
} av1_codec_arg_definitions_t;
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index a1f66ca..6f331db 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -20,6 +20,8 @@
#include "aom/aom_encoder.h"
#include "aom/internal/aom_codec_internal.h"
+#include "aom_dsp/flow_estimation/flow_estimation.h"
+
#include "av1/av1_iface_common.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/encoder.h"
@@ -194,6 +196,7 @@
int strict_level_conformance;
int kf_max_pyr_height;
int sb_qp_sweep;
+ GlobalMotionMethod global_motion_method;
};
#if CONFIG_REALTIME_ONLY
@@ -338,25 +341,26 @@
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
- }, // target_seq_level_idx
- 0, // tier_mask
- 0, // min_cr
- COST_UPD_OFF, // coeff_cost_upd_freq
- COST_UPD_OFF, // mode_cost_upd_freq
- COST_UPD_OFF, // mv_cost_upd_freq
- COST_UPD_OFF, // dv_cost_upd_freq
- 0, // ext_tile_debug
- 0, // sb_multipass_unit_test
- -1, // passes
- -1, // fwd_kf_dist
- LOOPFILTER_ALL, // loopfilter_control
- 0, // skip_postproc_filtering
- NULL, // two_pass_output
- NULL, // second_pass_log
- 0, // auto_intra_tools_off
- 0, // strict_level_conformance
- -1, // kf_max_pyr_height
- 0, // sb_qp_sweep
+ }, // target_seq_level_idx
+ 0, // tier_mask
+ 0, // min_cr
+ COST_UPD_OFF, // coeff_cost_upd_freq
+ COST_UPD_OFF, // mode_cost_upd_freq
+ COST_UPD_OFF, // mv_cost_upd_freq
+ COST_UPD_OFF, // dv_cost_upd_freq
+ 0, // ext_tile_debug
+ 0, // sb_multipass_unit_test
+ -1, // passes
+ -1, // fwd_kf_dist
+ LOOPFILTER_ALL, // loopfilter_control
+ 0, // skip_postproc_filtering
+ NULL, // two_pass_output
+ NULL, // second_pass_log
+ 0, // auto_intra_tools_off
+ 0, // strict_level_conformance
+ -1, // kf_max_pyr_height
+ 0, // sb_qp_sweep
+ GLOBAL_MOTION_METHOD_FEATURE_MATCH, // global_motion_method
};
#else
static const struct av1_extracfg default_extra_cfg = {
@@ -487,25 +491,26 @@
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
- }, // target_seq_level_idx
- 0, // tier_mask
- 0, // min_cr
- COST_UPD_SB, // coeff_cost_upd_freq
- COST_UPD_SB, // mode_cost_upd_freq
- COST_UPD_SB, // mv_cost_upd_freq
- COST_UPD_SB, // dv_cost_upd_freq
- 0, // ext_tile_debug
- 0, // sb_multipass_unit_test
- -1, // passes
- -1, // fwd_kf_dist
- LOOPFILTER_ALL, // loopfilter_control
- 0, // skip_postproc_filtering
- NULL, // two_pass_output
- NULL, // second_pass_log
- 0, // auto_intra_tools_off
- 0, // strict_level_conformance
- -1, // kf_max_pyr_height
- 0, // sb_qp_sweep
+ }, // target_seq_level_idx
+ 0, // tier_mask
+ 0, // min_cr
+ COST_UPD_SB, // coeff_cost_upd_freq
+ COST_UPD_SB, // mode_cost_upd_freq
+ COST_UPD_SB, // mv_cost_upd_freq
+ COST_UPD_SB, // dv_cost_upd_freq
+ 0, // ext_tile_debug
+ 0, // sb_multipass_unit_test
+ -1, // passes
+ -1, // fwd_kf_dist
+ LOOPFILTER_ALL, // loopfilter_control
+ 0, // skip_postproc_filtering
+ NULL, // two_pass_output
+ NULL, // second_pass_log
+ 0, // auto_intra_tools_off
+ 0, // strict_level_conformance
+ -1, // kf_max_pyr_height
+ 0, // sb_qp_sweep
+ GLOBAL_MOTION_METHOD_FEATURE_MATCH, // global_motion_method
};
#endif
@@ -862,6 +867,8 @@
RANGE_CHECK_BOOL(extra_cfg, auto_intra_tools_off);
RANGE_CHECK_BOOL(extra_cfg, strict_level_conformance);
RANGE_CHECK_BOOL(extra_cfg, sb_qp_sweep);
+ RANGE_CHECK(extra_cfg, global_motion_method,
+ GLOBAL_MOTION_METHOD_FEATURE_MATCH, GLOBAL_MOTION_METHOD_LAST);
RANGE_CHECK(extra_cfg, kf_max_pyr_height, -1, 5);
if (extra_cfg->kf_max_pyr_height != -1 &&
@@ -1460,6 +1467,8 @@
oxcf->sb_qp_sweep = extra_cfg->sb_qp_sweep;
+ oxcf->global_motion_method = extra_cfg->global_motion_method;
+
return AOM_CODEC_OK;
}
@@ -4023,6 +4032,9 @@
err_string)) {
ctx->cfg.tile_height_count = arg_parse_list_helper(
&arg, ctx->cfg.tile_heights, MAX_TILE_HEIGHTS, err_string);
+ } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.global_motion_method,
+ argv, err_string)) {
+ extra_cfg.global_motion_method = arg_parse_enum_helper(&arg, err_string);
} else {
match = 0;
snprintf(err_string, ARG_ERR_MSG_MAX_LEN, "Cannot find aom option %s",
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c21c920..e9a744f 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -651,8 +651,6 @@
init_buffer_indices(&cpi->force_intpel_info, cm->remapped_ref_idx);
av1_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
-
- cpi->image_pyramid_levels = oxcf->tool_cfg.enable_global_motion ? 1 : 0;
}
void av1_change_config_seq(struct AV1_PRIMARY *ppi,
@@ -906,6 +904,18 @@
if (lap_lag_in_frames != -1) {
cpi->oxcf.gf_cfg.lag_in_frames = lap_lag_in_frames;
}
+
+#if CONFIG_REALTIME_ONLY
+ assert(!oxcf->tool_cfg.enable_global_motion);
+ cpi->image_pyramid_levels = 0;
+#else
+ if (oxcf->tool_cfg.enable_global_motion) {
+ cpi->image_pyramid_levels =
+ global_motion_pyr_levels[oxcf->global_motion_method];
+ } else {
+ cpi->image_pyramid_levels = 0;
+ }
+#endif // CONFIG_REALTIME_ONLY
}
static INLINE void init_frame_info(FRAME_INFO *frame_info,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index eb484c8..6852539 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1079,6 +1079,9 @@
// A flag to control if we enable the superblock qp sweep for a given lambda
int sb_qp_sweep;
+
+ // Selected global motion search method
+ GlobalMotionMethod global_motion_method;
/*!\endcond */
} AV1EncoderConfig;
diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index ee3b525..0de9546 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c
@@ -102,18 +102,8 @@
assert(ref_buf[frame] != NULL);
TransformationType model;
int bit_depth = cpi->common.seq_params->bit_depth;
+ GlobalMotionMethod global_motion_method = cpi->oxcf.global_motion_method;
- // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
- const int do_adaptive_gm_estimation = 0;
-
- const int ref_frame_dist = get_relative_dist(
- &cm->seq_params->order_hint_info, cm->current_frame.order_hint,
- cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
- const GlobalMotionEstimationType gm_estimation_type =
- cm->seq_params->order_hint_info.enable_order_hint &&
- abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
- ? GLOBAL_MOTION_DISFLOW_BASED
- : GLOBAL_MOTION_FEATURE_BASED;
for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
int64_t best_warp_error = INT64_MAX;
// Initially set all params to identity.
@@ -124,7 +114,7 @@
}
aom_compute_global_motion(model, cpi->source, ref_buf[frame], bit_depth,
- gm_estimation_type, motion_models,
+ global_motion_method, motion_models,
RANSAC_NUM_MOTIONS);
int64_t ref_frame_error = 0;
for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {