Add two-pass input params and scaling

for three-pass encoding.

The idea of three-pass encoding is to use the bitstream from the
second pass to help with encoding the third pass. For speed
efficiency, a down-scaled video can be used for the second pass
for this purpose.

Two possible paths are designed for running three-pass encoding:

1. Running three-pass encoding using separate passes with libaom.

This CL adds the parameter "two-pass-output" to libaom, which
specifies the second-pass bitstream to be used by the third
pass. For example, one could run three-pass encoding by:

aomenc orig_input.y4m --passes=3 --pass=1 --fpf=fpf.txt \
       -o pass2.ivf
aomenc downscaled_input.y4m --passes=3 --pass=2 --fpf=fpf.txt \
       -o pass2.ivf
aomenc orig_input.y4m --passes=3 --pass=3 --fpf=fpf.txt \
       --two-pass-output=pass2.ivf -o output.ivf

This parameter is also exposed to the key & value API so one
could also use other programs (such as FFMpeg) to perform
three-pass encoding.

2. Running 3 passes altogether with aomenc.

In addition to "two-pass-output", this CL also adds the
parameters "two-pass-input", "two-pass-width" and
"two-pass-height" to aomenc. One could run three-pass encoding
by:

aomenc orig_input.y4m --passes=3 --two-pass-output=pass2.ivf \
       -o output.ivf

If two-pass-input is provided, the second pass will use it as the
input. If it is not provided, aomenc will rescale the frames in
the original input for the second pass.

The rescaled dimensions for the second pass is decided by the
following order:
- given by two-pass-width and two-pass-height
- parsed from the video file provided by two-pass-input
- downscale the original video file by 2 in both height and width

If two-pass-output is not provided, libaom will default to a
temporary file name "tmp_2pass_output_%d.ivf" where %d is the
stream index.

Note that currently this is a framework only and the third pass
does not do anything different from the second pass.

Bug=aomedia:3101

Change-Id: Ie0b6b8c57b9e0eefa1441805fbefdc41a7298b43
diff --git a/apps/aomenc.c b/apps/aomenc.c
index 14f6ace..d2062a8 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -435,6 +435,7 @@
 
 const arg_def_t *av1_key_val_args[] = {
   &g_av1_codec_arg_defs.passes,
+  &g_av1_codec_arg_defs.two_pass_output,
   &g_av1_codec_arg_defs.fwd_kf_dist,
   NULL,
 };
@@ -517,6 +518,10 @@
 #endif
   const char *partition_info_path;
   aom_color_range_t color_range;
+  const char *two_pass_input;
+  const char *two_pass_output;
+  int two_pass_width;
+  int two_pass_height;
 };
 
 struct stream_state {
@@ -541,6 +546,10 @@
   int mismatch_seen;
   unsigned int chroma_subsampling_x;
   unsigned int chroma_subsampling_y;
+  const char *orig_out_fn;
+  unsigned int orig_width;
+  unsigned int orig_height;
+  char tmp_out_fn[40];
 };
 
 static void validate_positive_rational(const char *msg,
@@ -807,6 +816,10 @@
 
   /* Output files must be specified for each stream */
   stream->config.out_fn = NULL;
+  stream->config.two_pass_input = NULL;
+  stream->config.two_pass_output = NULL;
+  stream->config.two_pass_width = 0;
+  stream->config.two_pass_height = 0;
 
   stream->next = NULL;
   return stream;
@@ -1110,6 +1123,14 @@
       if (arg_parse_uint(&arg) == 1) {
         warn("non-zero %s option ignored in realtime mode.\n", arg.name);
       }
+    } else if (arg_match(&arg, &g_av1_codec_arg_defs.two_pass_input, argi)) {
+      config->two_pass_input = arg.val;
+    } else if (arg_match(&arg, &g_av1_codec_arg_defs.two_pass_output, argi)) {
+      config->two_pass_output = arg.val;
+    } else if (arg_match(&arg, &g_av1_codec_arg_defs.two_pass_width, argi)) {
+      config->two_pass_width = arg_parse_int(&arg);
+    } else if (arg_match(&arg, &g_av1_codec_arg_defs.two_pass_height, argi)) {
+      config->two_pass_height = arg_parse_int(&arg);
     } else {
       int i, match = 0;
       // check if the control ID API supports this arg
@@ -1169,6 +1190,18 @@
   }
   config->arg_key_val_cnt++;
 
+  // set the two_pass_output field
+  if (!config->two_pass_output && global->passes == 3) {
+    snprintf(stream->tmp_out_fn, sizeof(stream->tmp_out_fn),
+             "tmp_2pass_output_%d.ivf", stream->index);
+    stream->config.two_pass_output = stream->tmp_out_fn;
+  }
+  if (config->two_pass_output) {
+    config->arg_key_vals[config->arg_key_val_cnt][0] = "two-pass-output";
+    config->arg_key_vals[config->arg_key_val_cnt][1] = config->two_pass_output;
+    config->arg_key_val_cnt++;
+  }
+
   return eos_mark_found;
 }
 
@@ -1879,6 +1912,30 @@
   }
 }
 
+static void clear_stream_count_state(struct stream_state *stream) {
+  // PSNR counters
+  for (int k = 0; k < 2; k++) {
+    stream->psnr_sse_total[k] = 0;
+    stream->psnr_samples_total[k] = 0;
+    for (int i = 0; i < 4; i++) {
+      stream->psnr_totals[k][i] = 0;
+    }
+    stream->psnr_count[k] = 0;
+  }
+  // q hist
+  memset(stream->counts, 0, sizeof(stream->counts));
+}
+
+// aomenc will downscale the second pass if:
+// 1. the specific pass is not given by commandline (aomenc will perform all
+//    passes)
+// 2. there are more than 2 passes in total
+// 3. current pass is the second pass (the parameter pass starts with 0 so
+//    pass == 1)
+static int pass_need_downscale(int global_pass, int global_passes, int pass) {
+  return !global_pass && global_passes > 2 && pass == 1;
+}
+
 int main(int argc, const char **argv_) {
   int pass;
   aom_image_t raw;
@@ -1953,6 +2010,12 @@
 
   /* Handle non-option arguments */
   input.filename = argv[0];
+  const char *orig_input_filename = input.filename;
+  FOREACH_STREAM(stream, streams) {
+    stream->orig_out_fn = stream->config.out_fn;
+    stream->orig_width = stream->config.cfg.g_w;
+    stream->orig_height = stream->config.cfg.g_h;
+  }
 
   if (!input.filename) {
     fprintf(stderr, "No input file specified!\n");
@@ -1964,10 +2027,43 @@
     input.only_i420 = 0;
 
   for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
+    if (pass > 1) {
+      FOREACH_STREAM(stream, streams) { clear_stream_count_state(stream); }
+    }
+
     int frames_in = 0, seen_frames = 0;
     int64_t estimated_time_left = -1;
     int64_t average_rate = -1;
     int64_t lagged_count = 0;
+    const int need_downscale =
+        pass_need_downscale(global.pass, global.passes, pass);
+
+    // Set the output to the specified two-pass output file, and
+    // restore the width and height to the original values.
+    FOREACH_STREAM(stream, streams) {
+      if (need_downscale) {
+        stream->config.out_fn = stream->config.two_pass_output;
+      } else {
+        stream->config.out_fn = stream->orig_out_fn;
+      }
+      stream->config.cfg.g_w = stream->orig_width;
+      stream->config.cfg.g_h = stream->orig_height;
+    }
+
+    // For second pass in three-pass encoding, set the input to
+    // the given two-pass-input file if available. If the scaled input is not
+    // given, we will attempt to re-scale the original input.
+    input.filename = orig_input_filename;
+    const char *two_pass_input = NULL;
+    if (need_downscale) {
+      FOREACH_STREAM(stream, streams) {
+        if (stream->config.two_pass_input) {
+          two_pass_input = stream->config.two_pass_input;
+          input.filename = two_pass_input;
+          break;
+        }
+      }
+    }
 
     open_input_file(&input, global.csp);
 
@@ -1975,20 +2071,55 @@
      * the data from the first stream's configuration.
      */
     if (!input.width || !input.height) {
-      FOREACH_STREAM(stream, streams) {
-        if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
-          input.width = stream->config.cfg.g_w;
-          input.height = stream->config.cfg.g_h;
-          break;
+      if (two_pass_input) {
+        FOREACH_STREAM(stream, streams) {
+          if (stream->config.two_pass_width && stream->config.two_pass_height) {
+            input.width = stream->config.two_pass_width;
+            input.height = stream->config.two_pass_height;
+            break;
+          }
         }
-      };
+      } else {
+        FOREACH_STREAM(stream, streams) {
+          if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
+            input.width = stream->config.cfg.g_w;
+            input.height = stream->config.cfg.g_h;
+            break;
+          }
+        }
+      }
     }
 
     /* Update stream configurations from the input file's parameters */
-    if (!input.width || !input.height)
-      fatal(
-          "Specify stream dimensions with --width (-w) "
-          " and --height (-h)");
+    if (!input.width || !input.height) {
+      if (two_pass_input) {
+        fatal(
+            "Specify downscaled stream dimensions with --two-pass-width "
+            " and --two-pass-height");
+      } else {
+        fatal(
+            "Specify stream dimensions with --width (-w) "
+            " and --height (-h)");
+      }
+    }
+
+    if (need_downscale) {
+      FOREACH_STREAM(stream, streams) {
+        if (stream->config.two_pass_width && stream->config.two_pass_height) {
+          stream->config.cfg.g_w = stream->config.two_pass_width;
+          stream->config.cfg.g_h = stream->config.two_pass_height;
+        } else if (two_pass_input) {
+          stream->config.cfg.g_w = input.width;
+          stream->config.cfg.g_h = input.height;
+        } else if (stream->orig_width && stream->orig_height) {
+          stream->config.cfg.g_w = (stream->orig_width + 1) / 2;
+          stream->config.cfg.g_h = (stream->orig_height + 1) / 2;
+        } else {
+          stream->config.cfg.g_w = (input.width + 1) / 2;
+          stream->config.cfg.g_h = (input.height + 1) / 2;
+        }
+      }
+    }
 
     /* If input file does not specify bit-depth but input-bit-depth parameter
      * exists, assume that to be the input bit-depth. However, if the
@@ -2136,17 +2267,14 @@
     FOREACH_STREAM(stream, streams) { validate_stream_config(stream, &global); }
 
     /* Ensure that --passes and --pass are consistent. If --pass is set and
-     * --passes=2, ensure --fpf was set.
+     * --passes >= 2, ensure --fpf was set.
      */
-    // TODO(bohanli): with passes == 3 and pass == 3, we could use either
-    // fpf or second pass bitstream. This should be updated when that option
-    // is added.
-    if (global.pass && global.passes >= 2) {
+    if (global.pass > 0 && global.pass <= 3 && global.passes >= 2) {
       FOREACH_STREAM(stream, streams) {
         if (!stream->config.stats_fn)
           die("Stream %d: Must specify --fpf when --pass=%d"
-              " and --passes=2\n",
-              stream->index, global.pass);
+              " and --passes=%d\n",
+              stream->index, global.pass, global.passes);
       }
     }
 
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index 998085e..663c889 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -630,6 +630,17 @@
       NULL, "enable-tx-size-search", 1,
       "Enable transform size search to find the best size for each block. "
       "If false, transforms always have the largest possible size "
-      "(0: false, 1: true (default))")
+      "(0: false, 1: true (default))"),
+
+  .two_pass_input =
+      ARG_DEF(NULL, "two-pass-input", 1,
+              "The input file for the second pass for three-pass encoding."),
+  .two_pass_output = ARG_DEF(
+      NULL, "two-pass-output", 1,
+      "The output file for the first two passes for three-pass encoding."),
+  .two_pass_width =
+      ARG_DEF(NULL, "two-pass-width", 1, "The width of two-pass-input."),
+  .two_pass_height =
+      ARG_DEF(NULL, "two-pass-height", 1, "The height of two-pass-input."),
 #endif  // CONFIG_AV1_ENCODER
 };
diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index 637c6a7..c89dbba 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h
@@ -219,6 +219,10 @@
   arg_def_t vbr_corpus_complexity_lap;
   arg_def_t fwd_kf_dist;
   arg_def_t enable_tx_size_search;
+  arg_def_t two_pass_input;
+  arg_def_t two_pass_output;
+  arg_def_t two_pass_width;
+  arg_def_t two_pass_height;
 #endif  // CONFIG_AV1_ENCODER
 } av1_codec_arg_definitions_t;
 
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 658667c..490f3aa 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -164,6 +164,8 @@
   // 2 (passes = 1 if pass == AOM_RC_ONE_PASS and passes = 2 otherwise).
   int passes;
   int fwd_kf_dist;
+  // the name of the second pass output file when passes > 2
+  const char *two_pass_output;
 };
 
 #if CONFIG_REALTIME_ONLY
@@ -312,6 +314,7 @@
   0,             // sb_multipass_unit_test
   -1,            // passes
   -1,            // fwd_kf_dist
+  NULL,          // two_pass_output
 };
 #else
 static struct av1_extracfg default_extra_cfg = {
@@ -447,6 +450,7 @@
   0,            // sb_multipass_unit_test
   -1,           // passes
   -1,           // fwd_kf_dist
+  NULL,         // two_pass_output
 };
 #endif
 
@@ -1117,6 +1121,9 @@
   // Set two-pass stats configuration.
   oxcf->twopass_stats_in = cfg->rc_twopass_stats_in;
 
+  if (extra_cfg->two_pass_output)
+    oxcf->two_pass_output = extra_cfg->two_pass_output;
+
   // Set Key frame configuration.
   kf_cfg->fwd_kf_enabled = cfg->fwd_kf_enabled;
   kf_cfg->auto_key =
@@ -3608,6 +3615,9 @@
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.fwd_kf_dist, argv,
                               err_string)) {
     extra_cfg.fwd_kf_dist = arg_parse_int_helper(&arg, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.two_pass_output, argv,
+                              err_string)) {
+    extra_cfg.two_pass_output = value;
   } else {
     match = 0;
     snprintf(err_string, ARG_ERR_MSG_MAX_LEN, "Cannot find aom option %s",
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 53374c0..7736ae0 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -976,6 +976,9 @@
   // Total number of encoding passes.
   int passes;
 
+  // the name of the second pass output file when passes > 2
+  const char *two_pass_output;
+
   // Indicates if the encoding is GOOD or REALTIME.
   MODE mode;
 
@@ -994,7 +997,6 @@
   // The path for partition stats reading and writing, used in the experiment
   // CONFIG_PARTITION_SEARCH_ORDER.
   const char *partition_info_path;
-
   /*!\endcond */
 } AV1EncoderConfig;