Add encoder arg to select global motion method The default method is the same as before (feature-match), but this allows the user to select disflow as an alternative method if desired Change-Id: Ifc28b57059e69a7048564c289156085e735ba12b

commit: 6ebcbc15a1bfbae00af78e5050855be45a2c9452 [log] [tgz]
author: Rachel Barker <rachelbarker@google.com> Tue Dec 20 19:13:26 2022 +0000
committer: Rachel Barker <rachelbarker@google.com> Thu Jan 19 19:15:20 2023 +0000
tree: f39b2d806793e8a3941457b833d7265f048c7726
parent: 40da75ba593168fc7d19a88a56bac9917dc65e97 [diff]
diff --git a/aom_dsp/flow_estimation/corner_match.c b/aom_dsp/flow_estimation/corner_match.c
index 8331a06..423e569 100644
--- a/aom_dsp/flow_estimation/corner_match.c
+++ b/aom_dsp/flow_estimation/corner_match.c

@@ -199,7 +199,7 @@
   return num_correspondences;
 }
 
-int av1_compute_global_motion_feature_based(
+int av1_compute_global_motion_feature_match(
     TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
     int bit_depth, MotionModel *motion_models, int num_motion_models) {
   int i;

diff --git a/aom_dsp/flow_estimation/corner_match.h b/aom_dsp/flow_estimation/corner_match.h
index 77cc501..25f1787 100644
--- a/aom_dsp/flow_estimation/corner_match.h
+++ b/aom_dsp/flow_estimation/corner_match.h

@@ -36,7 +36,7 @@
                                  int ref_stride,
                                  Correspondence *correspondences);
 
-int av1_compute_global_motion_feature_based(
+int av1_compute_global_motion_feature_match(
     TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
     int bit_depth, MotionModel *motion_models, int num_motion_models);
 

diff --git a/aom_dsp/flow_estimation/disflow.c b/aom_dsp/flow_estimation/disflow.c
index fe038d2..18a0d2b 100644
--- a/aom_dsp/flow_estimation/disflow.c
+++ b/aom_dsp/flow_estimation/disflow.c

@@ -593,9 +593,11 @@
 // Following the convention in flow_estimation.h, the flow vectors are computed
 // at fixed points in `src` and point to the corresponding locations in `ref`,
 // regardless of the temporal ordering of the frames.
-int av1_compute_global_motion_disflow_based(
-    TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
-    int bit_depth, MotionModel *motion_models, int num_motion_models) {
+int av1_compute_global_motion_disflow(TransformationType type,
+                                      YV12_BUFFER_CONFIG *src,
+                                      YV12_BUFFER_CONFIG *ref, int bit_depth,
+                                      MotionModel *motion_models,
+                                      int num_motion_models) {
   // Precompute information we will need about each frame
   ImagePyramid *src_pyramid = src->y_pyramid;
   CornerList *src_corners = src->corners;

diff --git a/aom_dsp/flow_estimation/disflow.h b/aom_dsp/flow_estimation/disflow.h
index 7286717..87dfdb4 100644
--- a/aom_dsp/flow_estimation/disflow.h
+++ b/aom_dsp/flow_estimation/disflow.h

@@ -89,9 +89,11 @@
   int stride;
 } FlowField;
 
-int av1_compute_global_motion_disflow_based(
-    TransformationType type, YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *ref,
-    int bit_depth, MotionModel *motion_models, int num_motion_models);
+int av1_compute_global_motion_disflow(TransformationType type,
+                                      YV12_BUFFER_CONFIG *src,
+                                      YV12_BUFFER_CONFIG *ref, int bit_depth,
+                                      MotionModel *motion_models,
+                                      int num_motion_models);
 
 #ifdef __cplusplus
 }

diff --git a/aom_dsp/flow_estimation/flow_estimation.c b/aom_dsp/flow_estimation/flow_estimation.c
index 966d670..c7134b7 100644
--- a/aom_dsp/flow_estimation/flow_estimation.c
+++ b/aom_dsp/flow_estimation/flow_estimation.c

@@ -18,6 +18,14 @@
 #include "aom_ports/mem.h"
 #include "aom_scale/yv12config.h"
 
+// For each global motion method, how many pyramid levels should we allocate?
+// Note that this is a maximum, and fewer levels will be allocated if the frame
+// is not large enough to need all of the specified levels
+const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS] = {
+  1,   // GLOBAL_MOTION_METHOD_FEATURE_MATCH
+  16,  // GLOBAL_MOTION_METHOD_DISFLOW
+};
+
 // Compute a global motion model between the given source and ref frames.
 //
 // As is standard for video codecs, the resulting model maps from (x, y)
@@ -25,15 +33,15 @@
 // of the temporal order of the two frames.
 int aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
                               YV12_BUFFER_CONFIG *ref, int bit_depth,
-                              GlobalMotionEstimationType gm_estimation_type,
+                              GlobalMotionMethod gm_method,
                               MotionModel *motion_models,
                               int num_motion_models) {
-  switch (gm_estimation_type) {
-    case GLOBAL_MOTION_FEATURE_BASED:
-      return av1_compute_global_motion_feature_based(
+  switch (gm_method) {
+    case GLOBAL_MOTION_METHOD_FEATURE_MATCH:
+      return av1_compute_global_motion_feature_match(
           type, src, ref, bit_depth, motion_models, num_motion_models);
-    case GLOBAL_MOTION_DISFLOW_BASED:
-      return av1_compute_global_motion_disflow_based(
+    case GLOBAL_MOTION_METHOD_DISFLOW:
+      return av1_compute_global_motion_disflow(
           type, src, ref, bit_depth, motion_models, num_motion_models);
     default: assert(0 && "Unknown global motion estimation type");
   }

diff --git a/aom_dsp/flow_estimation/flow_estimation.h b/aom_dsp/flow_estimation/flow_estimation.h
index 0eabfed..7cf947c 100644
--- a/aom_dsp/flow_estimation/flow_estimation.h
+++ b/aom_dsp/flow_estimation/flow_estimation.h

@@ -39,9 +39,11 @@
 static const int trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 };
 
 typedef enum {
-  GLOBAL_MOTION_FEATURE_BASED,
-  GLOBAL_MOTION_DISFLOW_BASED,
-} GlobalMotionEstimationType;
+  GLOBAL_MOTION_METHOD_FEATURE_MATCH,
+  GLOBAL_MOTION_METHOD_DISFLOW,
+  GLOBAL_MOTION_METHOD_LAST = GLOBAL_MOTION_METHOD_DISFLOW,
+  GLOBAL_MOTION_METHODS
+} GlobalMotionMethod;
 
 typedef struct {
   double params[MAX_PARAMDIM - 1];
@@ -59,6 +61,11 @@
   double rx, ry;
 } Correspondence;
 
+// For each global motion method, how many pyramid levels should we allocate?
+// Note that this is a maximum, and fewer levels will be allocated if the frame
+// is not large enough to need all of the specified levels
+extern const int global_motion_pyr_levels[GLOBAL_MOTION_METHODS];
+
 // Compute a global motion model between the given source and ref frames.
 //
 // As is standard for video codecs, the resulting model maps from (x, y)
@@ -66,7 +73,7 @@
 // of the temporal order of the two frames.
 int aom_compute_global_motion(TransformationType type, YV12_BUFFER_CONFIG *src,
                               YV12_BUFFER_CONFIG *ref, int bit_depth,
-                              GlobalMotionEstimationType gm_estimation_type,
+                              GlobalMotionMethod gm_method,
                               MotionModel *motion_models,
                               int num_motion_models);
 

diff --git a/apps/aomenc.c b/apps/aomenc.c
index ef208fd..1cc45cd 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c

@@ -453,6 +453,7 @@
   &g_av1_codec_arg_defs.sb_qp_sweep,
   &g_av1_codec_arg_defs.dist_metric,
   &g_av1_codec_arg_defs.kf_max_pyr_height,
+  &g_av1_codec_arg_defs.global_motion_method,
   NULL,
 };
 

diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index abfd4b3..a37958e 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c

@@ -139,6 +139,12 @@
   { "ebu3213", AOM_CICP_CP_EBU_3213 },
   { NULL, 0 }
 };
+
+static const struct arg_enum_list global_motion_method_enum[] = {
+  { "feature-match", GLOBAL_MOTION_METHOD_FEATURE_MATCH },
+  { "disflow", GLOBAL_MOTION_METHOD_DISFLOW },
+  { NULL, 0 }
+};
 #endif  // CONFIG_AV1_ENCODER
 
 const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
@@ -683,5 +689,9 @@
       ARG_DEF(NULL, "sb-qp-sweep", 1,
               "When set to 1, enable the superblock level qp sweep for a "
               "given lambda to minimize the rdcost."),
+  .global_motion_method = ARG_DEF_ENUM(NULL, "global-motion-method", 1,
+                                       "Global motion search method "
+                                       "(default: feature-match)",
+                                       global_motion_method_enum),
 #endif  // CONFIG_AV1_ENCODER
 };

diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index e15a84c..730b422 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h

@@ -21,6 +21,7 @@
 #include "common/webmenc.h"
 #endif
 #include "aom/aomcx.h"
+#include "aom_dsp/flow_estimation/flow_estimation.h"
 
 enum TestDecodeFatality {
   TEST_DECODE_OFF,
@@ -233,6 +234,7 @@
   arg_def_t strict_level_conformance;
   arg_def_t kf_max_pyr_height;
   arg_def_t sb_qp_sweep;
+  arg_def_t global_motion_method;
 #endif  // CONFIG_AV1_ENCODER
 } av1_codec_arg_definitions_t;
 

diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index a1f66ca..6f331db 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c

@@ -20,6 +20,8 @@
 #include "aom/aom_encoder.h"
 #include "aom/internal/aom_codec_internal.h"
 
+#include "aom_dsp/flow_estimation/flow_estimation.h"
+
 #include "av1/av1_iface_common.h"
 #include "av1/encoder/bitstream.h"
 #include "av1/encoder/encoder.h"
@@ -194,6 +196,7 @@
   int strict_level_conformance;
   int kf_max_pyr_height;
   int sb_qp_sweep;
+  GlobalMotionMethod global_motion_method;
 };
 
 #if CONFIG_REALTIME_ONLY
@@ -338,25 +341,26 @@
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
-  },               // target_seq_level_idx
-  0,               // tier_mask
-  0,               // min_cr
-  COST_UPD_OFF,    // coeff_cost_upd_freq
-  COST_UPD_OFF,    // mode_cost_upd_freq
-  COST_UPD_OFF,    // mv_cost_upd_freq
-  COST_UPD_OFF,    // dv_cost_upd_freq
-  0,               // ext_tile_debug
-  0,               // sb_multipass_unit_test
-  -1,              // passes
-  -1,              // fwd_kf_dist
-  LOOPFILTER_ALL,  // loopfilter_control
-  0,               // skip_postproc_filtering
-  NULL,            // two_pass_output
-  NULL,            // second_pass_log
-  0,               // auto_intra_tools_off
-  0,               // strict_level_conformance
-  -1,              // kf_max_pyr_height
-  0,               // sb_qp_sweep
+  },                                   // target_seq_level_idx
+  0,                                   // tier_mask
+  0,                                   // min_cr
+  COST_UPD_OFF,                        // coeff_cost_upd_freq
+  COST_UPD_OFF,                        // mode_cost_upd_freq
+  COST_UPD_OFF,                        // mv_cost_upd_freq
+  COST_UPD_OFF,                        // dv_cost_upd_freq
+  0,                                   // ext_tile_debug
+  0,                                   // sb_multipass_unit_test
+  -1,                                  // passes
+  -1,                                  // fwd_kf_dist
+  LOOPFILTER_ALL,                      // loopfilter_control
+  0,                                   // skip_postproc_filtering
+  NULL,                                // two_pass_output
+  NULL,                                // second_pass_log
+  0,                                   // auto_intra_tools_off
+  0,                                   // strict_level_conformance
+  -1,                                  // kf_max_pyr_height
+  0,                                   // sb_qp_sweep
+  GLOBAL_MOTION_METHOD_FEATURE_MATCH,  // global_motion_method
 };
 #else
 static const struct av1_extracfg default_extra_cfg = {
@@ -487,25 +491,26 @@
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
       SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
-  },               // target_seq_level_idx
-  0,               // tier_mask
-  0,               // min_cr
-  COST_UPD_SB,     // coeff_cost_upd_freq
-  COST_UPD_SB,     // mode_cost_upd_freq
-  COST_UPD_SB,     // mv_cost_upd_freq
-  COST_UPD_SB,     // dv_cost_upd_freq
-  0,               // ext_tile_debug
-  0,               // sb_multipass_unit_test
-  -1,              // passes
-  -1,              // fwd_kf_dist
-  LOOPFILTER_ALL,  // loopfilter_control
-  0,               // skip_postproc_filtering
-  NULL,            // two_pass_output
-  NULL,            // second_pass_log
-  0,               // auto_intra_tools_off
-  0,               // strict_level_conformance
-  -1,              // kf_max_pyr_height
-  0,               // sb_qp_sweep
+  },                                   // target_seq_level_idx
+  0,                                   // tier_mask
+  0,                                   // min_cr
+  COST_UPD_SB,                         // coeff_cost_upd_freq
+  COST_UPD_SB,                         // mode_cost_upd_freq
+  COST_UPD_SB,                         // mv_cost_upd_freq
+  COST_UPD_SB,                         // dv_cost_upd_freq
+  0,                                   // ext_tile_debug
+  0,                                   // sb_multipass_unit_test
+  -1,                                  // passes
+  -1,                                  // fwd_kf_dist
+  LOOPFILTER_ALL,                      // loopfilter_control
+  0,                                   // skip_postproc_filtering
+  NULL,                                // two_pass_output
+  NULL,                                // second_pass_log
+  0,                                   // auto_intra_tools_off
+  0,                                   // strict_level_conformance
+  -1,                                  // kf_max_pyr_height
+  0,                                   // sb_qp_sweep
+  GLOBAL_MOTION_METHOD_FEATURE_MATCH,  // global_motion_method
 };
 #endif
 
@@ -862,6 +867,8 @@
   RANGE_CHECK_BOOL(extra_cfg, auto_intra_tools_off);
   RANGE_CHECK_BOOL(extra_cfg, strict_level_conformance);
   RANGE_CHECK_BOOL(extra_cfg, sb_qp_sweep);
+  RANGE_CHECK(extra_cfg, global_motion_method,
+              GLOBAL_MOTION_METHOD_FEATURE_MATCH, GLOBAL_MOTION_METHOD_LAST);
 
   RANGE_CHECK(extra_cfg, kf_max_pyr_height, -1, 5);
   if (extra_cfg->kf_max_pyr_height != -1 &&
@@ -1460,6 +1467,8 @@
 
   oxcf->sb_qp_sweep = extra_cfg->sb_qp_sweep;
 
+  oxcf->global_motion_method = extra_cfg->global_motion_method;
+
   return AOM_CODEC_OK;
 }
 
@@ -4023,6 +4032,9 @@
                               err_string)) {
     ctx->cfg.tile_height_count = arg_parse_list_helper(
         &arg, ctx->cfg.tile_heights, MAX_TILE_HEIGHTS, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.global_motion_method,
+                              argv, err_string)) {
+    extra_cfg.global_motion_method = arg_parse_enum_helper(&arg, err_string);
   } else {
     match = 0;
     snprintf(err_string, ARG_ERR_MSG_MAX_LEN, "Cannot find aom option %s",

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index c21c920..e9a744f 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -651,8 +651,6 @@
   init_buffer_indices(&cpi->force_intpel_info, cm->remapped_ref_idx);
 
   av1_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
-
-  cpi->image_pyramid_levels = oxcf->tool_cfg.enable_global_motion ? 1 : 0;
 }
 
 void av1_change_config_seq(struct AV1_PRIMARY *ppi,
@@ -906,6 +904,18 @@
   if (lap_lag_in_frames != -1) {
     cpi->oxcf.gf_cfg.lag_in_frames = lap_lag_in_frames;
   }
+
+#if CONFIG_REALTIME_ONLY
+  assert(!oxcf->tool_cfg.enable_global_motion);
+  cpi->image_pyramid_levels = 0;
+#else
+  if (oxcf->tool_cfg.enable_global_motion) {
+    cpi->image_pyramid_levels =
+        global_motion_pyr_levels[oxcf->global_motion_method];
+  } else {
+    cpi->image_pyramid_levels = 0;
+  }
+#endif  // CONFIG_REALTIME_ONLY
 }
 
 static INLINE void init_frame_info(FRAME_INFO *frame_info,

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index eb484c8..6852539 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h

@@ -1079,6 +1079,9 @@
 
   // A flag to control if we enable the superblock qp sweep for a given lambda
   int sb_qp_sweep;
+
+  // Selected global motion search method
+  GlobalMotionMethod global_motion_method;
   /*!\endcond */
 } AV1EncoderConfig;
 

diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index ee3b525..0de9546 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c

@@ -102,18 +102,8 @@
   assert(ref_buf[frame] != NULL);
   TransformationType model;
   int bit_depth = cpi->common.seq_params->bit_depth;
+  GlobalMotionMethod global_motion_method = cpi->oxcf.global_motion_method;
 
-  // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
-  const int do_adaptive_gm_estimation = 0;
-
-  const int ref_frame_dist = get_relative_dist(
-      &cm->seq_params->order_hint_info, cm->current_frame.order_hint,
-      cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
-  const GlobalMotionEstimationType gm_estimation_type =
-      cm->seq_params->order_hint_info.enable_order_hint &&
-              abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
-          ? GLOBAL_MOTION_DISFLOW_BASED
-          : GLOBAL_MOTION_FEATURE_BASED;
   for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
     int64_t best_warp_error = INT64_MAX;
     // Initially set all params to identity.
@@ -124,7 +114,7 @@
     }
 
     aom_compute_global_motion(model, cpi->source, ref_buf[frame], bit_depth,
-                              gm_estimation_type, motion_models,
+                              global_motion_method, motion_models,
                               RANSAC_NUM_MOTIONS);
     int64_t ref_frame_error = 0;
     for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
commit	6ebcbc15a1bfbae00af78e5050855be45a2c9452	[log] [tgz]
author	Rachel Barker <rachelbarker@google.com>	Tue Dec 20 19:13:26 2022 +0000
committer	Rachel Barker <rachelbarker@google.com>	Thu Jan 19 19:15:20 2023 +0000
tree	f39b2d806793e8a3941457b833d7265f048c7726
parent	40da75ba593168fc7d19a88a56bac9917dc65e97 [diff]