Add fuzzing for avifReadImage. (#1627)

Also add a utility function to guess the file format of a buffer (used in the fuzzer).
diff --git a/apps/shared/avifutil.c b/apps/shared/avifutil.c
index c5bbd06..e19f202 100644
--- a/apps/shared/avifutil.c
+++ b/apps/shared/avifutil.c
@@ -201,39 +201,8 @@
         fclose(f);
 
         if (bytesRead > 0) {
-            avifROData header;
-            header.data = headerBuffer;
-            header.size = bytesRead;
-
-            if (avifPeekCompatibleFileType(&header)) {
-                return AVIF_APP_FILE_FORMAT_AVIF;
-            }
-
-            static const uint8_t signatureJPEG[2] = { 0xFF, 0xD8 };
-            static const uint8_t signaturePNG[8] = { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A };
-            static const uint8_t signatureY4M[9] = { 0x59, 0x55, 0x56, 0x34, 0x4D, 0x50, 0x45, 0x47, 0x32 }; // "YUV4MPEG2"
-            struct avifHeaderSignature
-            {
-                avifAppFileFormat format;
-                const uint8_t * magic;
-                size_t magicSize;
-            } signatures[] = { { AVIF_APP_FILE_FORMAT_JPEG, signatureJPEG, sizeof(signatureJPEG) },
-                               { AVIF_APP_FILE_FORMAT_PNG, signaturePNG, sizeof(signaturePNG) },
-                               { AVIF_APP_FILE_FORMAT_Y4M, signatureY4M, sizeof(signatureY4M) } };
-            const size_t signaturesCount = sizeof(signatures) / sizeof(signatures[0]);
-
-            for (size_t signatureIndex = 0; signatureIndex < signaturesCount; ++signatureIndex) {
-                struct avifHeaderSignature * signature = &signatures[signatureIndex];
-                if (header.size < signature->magicSize) {
-                    continue;
-                }
-                if (!memcmp(header.data, signature->magic, signature->magicSize)) {
-                    return signature->format;
-                }
-            }
-
-            // If none of these signatures match, bail out here. Guessing by extension won't help.
-            return AVIF_APP_FILE_FORMAT_UNKNOWN;
+            // If the file could be read, use the first bytes to guess the file format.
+            return avifGuessBufferFileFormat(headerBuffer, bytesRead);
         }
     }
 
@@ -268,6 +237,46 @@
     return AVIF_APP_FILE_FORMAT_UNKNOWN;
 }
 
+avifAppFileFormat avifGuessBufferFileFormat(const uint8_t * data, size_t size)
+{
+    if (size == 0) {
+        return AVIF_APP_FILE_FORMAT_UNKNOWN;
+    }
+
+    avifROData header;
+    header.data = data;
+    header.size = size;
+
+    if (avifPeekCompatibleFileType(&header)) {
+        return AVIF_APP_FILE_FORMAT_AVIF;
+    }
+
+    static const uint8_t signatureJPEG[2] = { 0xFF, 0xD8 };
+    static const uint8_t signaturePNG[8] = { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A };
+    static const uint8_t signatureY4M[9] = { 0x59, 0x55, 0x56, 0x34, 0x4D, 0x50, 0x45, 0x47, 0x32 }; // "YUV4MPEG2"
+    struct avifHeaderSignature
+    {
+        avifAppFileFormat format;
+        const uint8_t * magic;
+        size_t magicSize;
+    } signatures[] = { { AVIF_APP_FILE_FORMAT_JPEG, signatureJPEG, sizeof(signatureJPEG) },
+                       { AVIF_APP_FILE_FORMAT_PNG, signaturePNG, sizeof(signaturePNG) },
+                       { AVIF_APP_FILE_FORMAT_Y4M, signatureY4M, sizeof(signatureY4M) } };
+    const size_t signaturesCount = sizeof(signatures) / sizeof(signatures[0]);
+
+    for (size_t signatureIndex = 0; signatureIndex < signaturesCount; ++signatureIndex) {
+        const struct avifHeaderSignature * const signature = &signatures[signatureIndex];
+        if (header.size < signature->magicSize) {
+            continue;
+        }
+        if (!memcmp(header.data, signature->magic, signature->magicSize)) {
+            return signature->format;
+        }
+    }
+
+    return AVIF_APP_FILE_FORMAT_UNKNOWN;
+}
+
 avifAppFileFormat avifReadImage(const char * filename,
                                 avifPixelFormat requestedFormat,
                                 int requestedDepth,
diff --git a/apps/shared/avifutil.h b/apps/shared/avifutil.h
index e35d70b..f3fac95 100644
--- a/apps/shared/avifutil.h
+++ b/apps/shared/avifutil.h
@@ -44,7 +44,11 @@
     AVIF_APP_FILE_FORMAT_Y4M
 } avifAppFileFormat;
 
+// Guesses the format of a file by looking at the first bytes, or at the extension if the file
+// can't be read or is empty.
 avifAppFileFormat avifGuessFileFormat(const char * filename);
+// Guesses the format of a buffer by looking at the first bytes.
+avifAppFileFormat avifGuessBufferFileFormat(const uint8_t * data, size_t size);
 
 // This structure holds any timing data coming from source (typically non-AVIF) inputs being fed
 // into avifenc. If either or both values are 0, the timing is "invalid" / sentinel and the values
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 228e566..e85eb68 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -176,6 +176,7 @@
         link_fuzztest(${TEST_NAME})
         gtest_discover_tests(${TEST_NAME})
         add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
+        set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT "TEST_DATA_DIR=${CMAKE_CURRENT_SOURCE_DIR}/data/")
     endmacro()
 
     if(AVIF_LOCAL_FUZZTEST)
@@ -197,6 +198,7 @@
 
     add_avif_fuzztest(avif_fuzztest_dec_incr avifincrtest_helpers)
     add_avif_fuzztest(avif_fuzztest_enc_dec)
+    add_avif_fuzztest(avif_fuzztest_read_image)
 
     if(AVIF_ENABLE_EXPERIMENTAL_GAIN_MAP)
         add_avif_fuzztest(avif_fuzztest_enc_dec_experimental)
diff --git a/tests/gtest/avif_fuzztest_helpers.cc b/tests/gtest/avif_fuzztest_helpers.cc
index 69119fd..09ed9d5 100644
--- a/tests/gtest/avif_fuzztest_helpers.cc
+++ b/tests/gtest/avif_fuzztest_helpers.cc
@@ -172,5 +172,42 @@
 
 //------------------------------------------------------------------------------
 
+std::vector<std::string> GetTestImagesContents(
+    size_t max_file_size, const std::vector<avifAppFileFormat>& image_formats) {
+  // Use an environment variable to get the test data directory because
+  // fuzztest seeds are created before the main() function is called, so the
+  // test has no chance to parse command line arguments.
+  const char* const test_data_dir = std::getenv("TEST_DATA_DIR");
+  if (test_data_dir == nullptr) {
+    // Do not fail, this can happen in normal circumstances when calling
+    // gtest_discover_tests() in cmake.
+    std::cout << "TEST_DATA_DIR not set, returning an empty seed set\n";
+    return {};
+  }
+
+  std::cout << "Reading seeds from " << test_data_dir << "\n";
+  auto tuple_vector = fuzztest::ReadFilesFromDirectory(test_data_dir);
+  std::vector<std::string> seeds;
+  seeds.reserve(tuple_vector.size());
+  for (auto& [file_content] : tuple_vector) {
+    if (file_content.size() > max_file_size) continue;
+    if (!image_formats.empty()) {
+      const avifAppFileFormat format = avifGuessBufferFileFormat(
+          reinterpret_cast<const uint8_t*>(file_content.data()),
+          file_content.size());
+      if (std::find(image_formats.begin(), image_formats.end(), format) ==
+          image_formats.end()) {
+        continue;
+      }
+    }
+
+    seeds.push_back(std::move(file_content));
+  }
+  std::cout << "Returning " << seeds.size() << " seed images\n";
+  return seeds;
+}
+
+//------------------------------------------------------------------------------
+
 }  // namespace testutil
 }  // namespace libavif
diff --git a/tests/gtest/avif_fuzztest_helpers.h b/tests/gtest/avif_fuzztest_helpers.h
index 79d83e5..b7aab04 100644
--- a/tests/gtest/avif_fuzztest_helpers.h
+++ b/tests/gtest/avif_fuzztest_helpers.h
@@ -10,6 +10,7 @@
 
 #include "avif/avif.h"
 #include "aviftest_helpers.h"
+#include "avifutil.h"
 #include "fuzztest/fuzztest.h"
 #include "gtest/gtest.h"
 
@@ -170,6 +171,16 @@
 
 //------------------------------------------------------------------------------
 
+// Returns a list of test images contents (not paths) from the directory set in
+// the 'TEST_DATA_DIR' environment variable, that are smaller than
+// 'max_file_size' and have one of the formats in 'image_formats' (or any format
+// if 'image_formats' is empty).
+// Typically used to create image file seeds for fuzzing.
+std::vector<std::string> GetTestImagesContents(
+    size_t max_file_size, const std::vector<avifAppFileFormat>& image_formats);
+
+//------------------------------------------------------------------------------
+
 }  // namespace testutil
 }  // namespace libavif
 
diff --git a/tests/gtest/avif_fuzztest_read_image.cc b/tests/gtest/avif_fuzztest_read_image.cc
new file mode 100644
index 0000000..e1c2290
--- /dev/null
+++ b/tests/gtest/avif_fuzztest_read_image.cc
@@ -0,0 +1,131 @@
+// Copyright 2023 Google LLC
+// SPDX-License-Identifier: BSD-2-Clause
+//
+// Tests the jpeg/png/y4m reading code from avifenc.
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "avif/avif.h"
+#include "avif_fuzztest_helpers.h"
+#include "aviftest_helpers.h"
+#include "avifutil.h"
+#include "fuzztest/fuzztest.h"
+#include "gtest/gtest.h"
+
+using ::fuzztest::Arbitrary;
+using ::fuzztest::ElementOf;
+
+namespace libavif {
+namespace testutil {
+namespace {
+
+::testing::Environment* const stack_limit_env =
+    ::testing::AddGlobalTestEnvironment(
+        new FuzztestStackLimitEnvironment("524288"));  // 512 * 1024
+
+//------------------------------------------------------------------------------
+
+std::string FileFormatToString(avifAppFileFormat file_format) {
+  switch (file_format) {
+    case AVIF_APP_FILE_FORMAT_PNG:
+      return "PNG";
+    case AVIF_APP_FILE_FORMAT_JPEG:
+      return "JPEG";
+    case AVIF_APP_FILE_FORMAT_Y4M:
+      return "Y4M";
+    default:
+      assert(false);
+      return "unknown";
+  }
+}
+
+void ReadImageFile(const std::string& arbitrary_bytes,
+                   avifPixelFormat requested_format, int requested_depth,
+                   avifChromaDownsampling chroma_downsampling,
+                   bool ignore_color_profile, bool ignore_exif, bool ignore_xmp,
+                   bool allow_changing_cicp, bool ignore_gain_map,
+                   avifMatrixCoefficients matrix_coefficients) {
+  // Write the byte stream to a temp file since avifReadImage() takes a file
+  // path as input.
+  const std::string file_path = testing::TempDir() + "inputimage";
+  std::ofstream out(file_path);
+  out << arbitrary_bytes;
+  out.close();
+
+  uint32_t out_depth;
+  avifAppSourceTiming timing;
+  testutil::AvifImagePtr avif_image(avifImageCreateEmpty(), avifImageDestroy);
+  avif_image->matrixCoefficients = matrix_coefficients;
+
+  const avifAppFileFormat file_format = avifReadImage(
+      file_path.c_str(), requested_format, requested_depth, chroma_downsampling,
+      ignore_color_profile, ignore_exif, ignore_xmp, allow_changing_cicp,
+      ignore_gain_map, avif_image.get(), &out_depth, &timing,
+      /*frameIter=*/nullptr);
+
+  if (file_format != AVIF_APP_FILE_FORMAT_UNKNOWN) {
+    EXPECT_GT(avif_image->width, 0);
+    EXPECT_GT(avif_image->height, 0);
+
+    if (requested_depth != 0 && file_format != AVIF_APP_FILE_FORMAT_Y4M) {
+      EXPECT_EQ(avif_image->depth, requested_depth);
+    }
+    if (file_format != AVIF_APP_FILE_FORMAT_Y4M) {
+      EXPECT_EQ(avif_image->yuvFormat, requested_format);
+    }
+    if (ignore_color_profile) {
+      EXPECT_EQ(avif_image->icc.size, 0);
+    }
+    if (ignore_exif) {
+      EXPECT_EQ(avif_image->exif.size, 0);
+    }
+    if (ignore_xmp) {
+      EXPECT_EQ(avif_image->xmp.size, 0);
+    }
+    std::cout << "Decode successful (" << FileFormatToString(file_format)
+              << ")\n";
+  }
+}
+
+constexpr uint32_t kMaxFileSize = 1024 * 1024;  // 1MB.
+
+FUZZ_TEST(DecodeAvifTest, ReadImageFile)
+    .WithDomains(
+        Arbitrary<std::string>()
+            .WithMaxSize(kMaxFileSize)
+            .WithSeeds(GetTestImagesContents(kMaxFileSize,
+                                             {AVIF_APP_FILE_FORMAT_JPEG,
+                                              AVIF_APP_FILE_FORMAT_PNG,
+                                              AVIF_APP_FILE_FORMAT_Y4M})),
+        ArbitraryPixelFormat(),
+        /*requested_depth=*/ElementOf({0, 8, 10, 12}),
+        ElementOf({AVIF_CHROMA_DOWNSAMPLING_AUTOMATIC,
+                   AVIF_CHROMA_DOWNSAMPLING_FASTEST,
+                   AVIF_CHROMA_DOWNSAMPLING_BEST_QUALITY,
+                   AVIF_CHROMA_DOWNSAMPLING_AVERAGE,
+                   AVIF_CHROMA_DOWNSAMPLING_SHARP_YUV}),
+        /*ignore_color_profile=*/Arbitrary<bool>(),
+        /*ignore_exif=*/Arbitrary<bool>(),
+        /*ignore_xmp=*/Arbitrary<bool>(),
+        /*allow_changing_cicp=*/Arbitrary<bool>(),
+        /*ignore_gain_map=*/Arbitrary<bool>(),
+        ElementOf(
+            {AVIF_MATRIX_COEFFICIENTS_IDENTITY, AVIF_MATRIX_COEFFICIENTS_BT709,
+             AVIF_MATRIX_COEFFICIENTS_UNSPECIFIED, AVIF_MATRIX_COEFFICIENTS_FCC,
+             AVIF_MATRIX_COEFFICIENTS_BT470BG, AVIF_MATRIX_COEFFICIENTS_BT601,
+             AVIF_MATRIX_COEFFICIENTS_SMPTE240, AVIF_MATRIX_COEFFICIENTS_YCGCO,
+             AVIF_MATRIX_COEFFICIENTS_BT2020_NCL,
+             AVIF_MATRIX_COEFFICIENTS_BT2020_CL,
+             AVIF_MATRIX_COEFFICIENTS_SMPTE2085,
+             AVIF_MATRIX_COEFFICIENTS_CHROMA_DERIVED_NCL,
+             AVIF_MATRIX_COEFFICIENTS_CHROMA_DERIVED_CL,
+             AVIF_MATRIX_COEFFICIENTS_ICTCP}));
+
+//------------------------------------------------------------------------------
+
+}  // namespace
+}  // namespace testutil
+}  // namespace libavif