Convert input frames to monochrome before encoding

When encoding a color input to a monochrome output, convert input frames
to monochrome frames in the AOM_IMG_FMT_I420 format before passing them
to aom_codec_encode(). The libaom encoder apparently has some problems
with encoding input frames in the AOM_IMG_FMT_I422 format to a
monochrome output. Although I could not get to the bottom of those
problems, this change allows us to bypass those problems.

Do not upgrade profile 0 to profile 2 when encoding to a monochrome
output, even if the input is YUV 4:2:2.

BUG=aomedia:2913

Change-Id: I86cc6a5a533092e241ba26b95d65a9cbe5fdd67f
diff --git a/apps/aomenc.c b/apps/aomenc.c
index f6969d2..d56594a 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -1475,6 +1475,33 @@
 #endif
 }
 
+// Convert the input image 'img' to a monochrome image. The Y plane of the
+// output image is a shallow copy of the Y plane of the input image, therefore
+// the input image must remain valid for the lifetime of the output image. The U
+// and V planes of the output image are set to null pointers. The output image
+// format is AOM_IMG_FMT_I420 because libaom does not have AOM_IMG_FMT_I400.
+static void convert_image_to_monochrome(const struct aom_image *img,
+                                        struct aom_image *monochrome_img) {
+  *monochrome_img = *img;
+  monochrome_img->fmt = AOM_IMG_FMT_I420;
+  if (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+    monochrome_img->fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
+  }
+  monochrome_img->monochrome = 1;
+  monochrome_img->csp = AOM_CSP_UNKNOWN;
+  monochrome_img->x_chroma_shift = 1;
+  monochrome_img->y_chroma_shift = 1;
+  monochrome_img->planes[AOM_PLANE_U] = NULL;
+  monochrome_img->planes[AOM_PLANE_V] = NULL;
+  monochrome_img->stride[AOM_PLANE_U] = 0;
+  monochrome_img->stride[AOM_PLANE_V] = 0;
+  monochrome_img->sz = 0;
+  monochrome_img->bps = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8;
+  monochrome_img->img_data = NULL;
+  monochrome_img->img_data_owner = 0;
+  monochrome_img->self_allocd = 0;
+}
+
 static void encode_frame(struct stream_state *stream,
                          struct AvxEncoderConfig *global, struct aom_image *img,
                          unsigned int frames_in) {
@@ -1554,6 +1581,12 @@
 #endif
   }
 
+  struct aom_image monochrome_img;
+  if (img && cfg->monochrome) {
+    convert_image_to_monochrome(img, &monochrome_img);
+    img = &monochrome_img;
+  }
+
   aom_usec_timer_start(&timer);
   aom_codec_encode(&stream->encoder, img, frame_start,
                    (uint32_t)(next_frame_start - frame_start), 0);
@@ -1943,8 +1976,10 @@
                 stream->config.cfg.g_profile = 1;
                 profile_updated = 1;
               }
-            } else if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
-                       input.fmt == AOM_IMG_FMT_I42216) {
+            } else if (input.bit_depth == 12 ||
+                       ((input.fmt == AOM_IMG_FMT_I422 ||
+                         input.fmt == AOM_IMG_FMT_I42216) &&
+                        !stream->config.cfg.monochrome)) {
               stream->config.cfg.g_profile = 2;
               profile_updated = 1;
             }