Add avifinfo_tool

A binary to compare the feature extraction results between libavif
and libavifinfo. Also allows consistency-checking and min-header-size
stats gathering.

Change-Id: I70721fb52cc60add032da3f676db8a1bf356a5af
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ce831e6..e44cba0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,10 +13,12 @@
 
 option(AVIFINFO_BUILD_TESTS
        "Build and enable tests (GoogleTest must be installed)" OFF)
+option(AVIFINFO_BUILD_TOOLS "Build tools" OFF)
 
 # C library
 
 add_library(avifinfo avifinfo.c)
+target_include_directories(avifinfo PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 
 # C++ tests
 
@@ -26,11 +28,42 @@
 
   enable_testing()
   add_executable(avifinfo_test tests/avifinfo_test.cc)
-  target_include_directories(avifinfo_test PRIVATE ${GTEST_INCLUDE_DIRS}
-                                                   ${CMAKE_CURRENT_SOURCE_DIR})
+  target_include_directories(avifinfo_test PRIVATE ${GTEST_INCLUDE_DIRS})
   target_link_libraries(avifinfo_test PRIVATE ${GTEST_BOTH_LIBRARIES} avifinfo)
   add_test(
     NAME avifinfo_test
     COMMAND ${CMAKE_CURRENT_BINARY_DIR}/avifinfo_test
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests)
 endif()
+
+# C++ tools
+
+if(AVIFINFO_BUILD_TOOLS)
+  find_package(Threads REQUIRED) # For aom
+
+  # Clone and build libavif and its dependency aom.
+  include(ExternalProject)
+  set(EXTERNAL_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/external)
+  ExternalProject_Add(
+    libavif
+    GIT_REPOSITORY https://github.com/AOMediaCodec/libavif
+    PATCH_COMMAND cd ${CMAKE_BINARY_DIR}/libavif-prefix/src/libavif/ext &&
+                  ./aom.cmd
+    UPDATE_DISCONNECTED 1 # Avoid building aom everytime.
+    # aom.cmd builds aom as static so libavif should be built as static too.
+    CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${EXTERNAL_INSTALL_LOCATION}
+               -DAVIF_CODEC_AOM=ON -DAVIF_LOCAL_AOM=ON -DBUILD_SHARED_LIBS=OFF)
+
+  add_executable(avifinfo_tool tools/avifinfo_tool.cc tests/avifinfo_fuzz.cc)
+  set_property(TARGET avifinfo_tool PROPERTY CXX_STANDARD 17) # for filesystem
+  target_include_directories(avifinfo_tool SYSTEM
+                             PRIVATE ${EXTERNAL_INSTALL_LOCATION}/include)
+  # libavif's CMakeLists.txt uses a PRIVATE target_link_libraries() so
+  # avifinfo_tool must be linked to aom too (and thus also to pthread).
+  target_link_directories(
+    avifinfo_tool PRIVATE ${EXTERNAL_INSTALL_LOCATION}/lib
+    ${CMAKE_BINARY_DIR}/libavif-prefix/src/libavif/ext/aom/build.libavif)
+  target_link_libraries(avifinfo_tool PRIVATE avifinfo avif aom
+                                              Threads::Threads)
+  add_dependencies(avifinfo_tool libavif)
+endif()
diff --git a/tools/avifinfo_tool.cc b/tools/avifinfo_tool.cc
new file mode 100644
index 0000000..b16281c
--- /dev/null
+++ b/tools/avifinfo_tool.cc
@@ -0,0 +1,271 @@
+// Copyright (c) 2021, Alliance for Open Media. All rights reserved
+//
+// This source code is subject to the terms of the BSD 2 Clause License and
+// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+// was not distributed with this source code in the LICENSE file, you can
+// obtain it at www.aomedia.org/license/software. If the Alliance for Open
+// Media Patent License 1.0 was not distributed with this source code in the
+// PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+
+#include <algorithm>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "avifinfo.h"
+#include "avif/avif.h"
+
+namespace {
+
+//------------------------------------------------------------------------------
+
+std::string GetHelpStr() {
+  std::string str;
+  str += "Command line tool to compare libavif and libavifinfo results.\n";
+  str += "Usage:   avifparse [options] <directory>\n";
+  str += "Options:\n";
+  str += "  -h, --help ...... Print this help\n";
+  str += "  --fast .......... Skip libavif decoding, only use libavifinfo\n";
+  str += "  --min-size ...... Find minimum size to extract features per file\n";
+  str += "  --validate ...... Check libavifinfo consistency on each file\n";
+  return str;
+}
+
+//------------------------------------------------------------------------------
+
+// Decoding result.
+struct Result {
+  bool success;  // True if the 'features' were correctly decoded.
+  AvifInfoFeatures features;
+};
+
+// Decodes the AVIF at 'data' of 'data_size' bytes using libavif.
+Result DecodeAvif(const uint8_t data[], size_t data_size) {
+  Result result;
+  avifImage* const image = avifImageCreateEmpty();
+  avifDecoder* const decoder = avifDecoderCreate();
+  decoder->strictFlags = AVIF_STRICT_DISABLED;
+  const avifResult status =
+      avifDecoderReadMemory(decoder, image, data, data_size);
+  avifDecoderDestroy(decoder);
+  if (status == AVIF_RESULT_OK) {
+    const uint32_t num_channels =
+        ((image->yuvFormat == AVIF_PIXEL_FORMAT_NONE)     ? 0
+         : (image->yuvFormat == AVIF_PIXEL_FORMAT_YUV400) ? 1
+                                                          : 3) +
+        ((image->alphaPlane != nullptr) ? 1 : 0);
+    result = {true, {image->width, image->height, image->depth, num_channels}};
+  } else {
+    result = {false};
+  }
+  avifImageDestroy(image);
+  return result;
+}
+
+// Parses the AVIF at 'data' of 'data_size' bytes using libavifinfo.
+Result ParseAvif(const uint8_t data[], size_t data_size) {
+  Result result;
+  const AvifInfoStatus status = AvifInfoGetWithSize(
+      data, data_size, &result.features, /*file_size=*/data_size);
+  result.success = (status == kAvifInfoOk);
+  return result;
+}
+
+// Same as above but also returns the 'min_data_size' for which 'data' can be
+// successfully parsed.
+Result ParseAvifForSize(const uint8_t data[], size_t data_size,
+                        size_t& min_data_size) {
+  const Result result = ParseAvif(data, data_size);
+  if (!result.success) {
+    min_data_size = data_size;
+    return result;
+  }
+  min_data_size = 1;
+  size_t max_data_size = data_size;
+  while (min_data_size < max_data_size) {
+    const size_t middle = (min_data_size + max_data_size) / 2;
+    if (AvifInfoGetWithSize(data, middle, nullptr, /*file_size=*/data_size) ==
+        kAvifInfoOk) {
+      max_data_size = middle;
+    } else {
+      min_data_size = middle + 1;
+    }
+  }
+  return result;
+}
+
+// Reuses the fuzz target for easy library validation.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size);
+
+// Aggregated stats about the decoded/parsed AVIF files.
+struct Stats {
+  uint32_t num_files_invalid_at_decode = 0;
+  uint32_t num_files_invalid_at_parse = 0;
+  uint32_t num_files_invalid_at_both = 0;
+  std::unordered_map<size_t, uint32_t> min_size_to_count;
+};
+
+//------------------------------------------------------------------------------
+
+// Recursively adds all files at 'path' to 'file_paths'.
+void FindFiles(const std::string& path, std::vector<std::string>& file_paths) {
+  if (std::filesystem::is_directory(path)) {
+    for (const std::filesystem::directory_entry& entry :
+         std::filesystem::directory_iterator(path)) {
+      FindFiles(entry.path(), file_paths);
+    }
+  } else {
+    file_paths.emplace_back(path);
+  }
+}
+
+// Find the longest common prefix of all input 'paths'.
+std::string FindCommonLongestPrefix(const std::vector<std::string>& paths) {
+  std::string prefix = paths.empty() ? "" : paths.front();
+  for (const std::string& path : paths) {
+    const auto mismatch =
+        std::mismatch(prefix.begin(), prefix.end(), path.begin(), path.end());
+    prefix = prefix.substr(0, std::distance(prefix.begin(), mismatch.first));
+  }
+  return std::filesystem::path(prefix).remove_filename().string();
+}
+
+//------------------------------------------------------------------------------
+
+// Uses libavifinfo to extract the features of an AVIF file stored in 'data' at
+// 'path'. The AVIF file is 'data_size'-byte long.
+void ParseFile(const std::string& path, const uint8_t* data, size_t data_size,
+               Stats& stats) {
+  const Result parse = ParseAvif(data, data_size);
+  if (!parse.success) {
+    ++stats.num_files_invalid_at_parse;
+    std::cout << "parsing failure for " << path << std::endl;
+  }
+}
+
+// Uses libavif then libavifinfo to extract the features of an AVIF file.
+void DecodeAndParseFile(const std::string& path, const uint8_t* data,
+                        size_t data_size, Stats& stats) {
+  const Result decode = DecodeAvif(data, data_size);
+  const Result parse = ParseAvif(data, data_size);
+  if (!decode.success) ++stats.num_files_invalid_at_decode;
+  if (!parse.success) ++stats.num_files_invalid_at_parse;
+  if (!decode.success && !parse.success) ++stats.num_files_invalid_at_both;
+
+  if (!parse.success ||
+      (decode.success &&
+       (decode.features.width != parse.features.width ||
+        decode.features.height != parse.features.height ||
+        decode.features.bit_depth != parse.features.bit_depth ||
+        decode.features.num_channels != parse.features.num_channels))) {
+    if (decode.success && parse.success) {
+      std::cout << "decoded " << decode.features.width << "x"
+                << decode.features.height << "," << decode.features.bit_depth
+                << "b*" << decode.features.num_channels << " / "
+                << "parsed " << parse.features.width << "x"
+                << parse.features.height << "," << parse.features.bit_depth
+                << "b*" << parse.features.num_channels;
+    } else {
+      std::cout << "decoding " << (decode.success ? "success" : "failure")
+                << " / parsing " << (parse.success ? "success" : "failure");
+    }
+    std::cout << " for " << path << std::endl;
+  }
+}
+
+// Returns the minimum number of bytes of AVIF 'data' for features to be
+// extracted.
+void FindMinSizeOfFile(const std::string& path, const uint8_t* data,
+                       size_t data_size, Stats& stats) {
+  size_t min_size;
+  const Result parse = ParseAvifForSize(data, data_size, min_size);
+  if (parse.success) {
+    ++stats.min_size_to_count[min_size];
+  } else {
+    ++stats.num_files_invalid_at_parse;
+  }
+}
+
+// Checks the consistency of libavifinfo over an AVIF file.
+void ValidateFile(const std::string& path, const uint8_t* data,
+                  size_t data_size) {
+  if (LLVMFuzzerTestOneInput(data, data_size) != 0) {
+    std::cout << "validation failed for " << path << std::endl;
+  }
+}
+
+}  // namespace
+
+//------------------------------------------------------------------------------
+
+int main(int argc, char** argv) {
+  std::vector<std::string> file_paths;
+  bool only_parse = false;
+  bool find_min_size = false;
+  bool validate = false;
+
+  for (int arg = 1; arg < argc; ++arg) {
+    if (!std::strcmp(argv[arg], "-h")) {
+      std::cout << GetHelpStr();
+      return 0;
+    } else if (!std::strcmp(argv[arg], "--fast")) {
+      only_parse = true;
+    } else if (!std::strcmp(argv[arg], "--min-size")) {
+      find_min_size = true;
+      only_parse = true;
+    } else if (!std::strcmp(argv[arg], "--validate")) {
+      validate = true;
+    } else {
+      FindFiles(argv[arg], file_paths);
+    }
+  }
+  if (file_paths.empty()) {
+    std::cerr << "No input specified" << std::endl;
+    return 1;
+  }
+  std::cout << "Found " << file_paths.size() << " files" << std::endl;
+  const std::string prefix = FindCommonLongestPrefix(file_paths);
+  for (std::string& file_path : file_paths) {
+    file_path = file_path.substr(prefix.size());
+  }
+
+  Stats stats;
+  for (const std::string& file_path : file_paths) {
+    std::vector<uint8_t> bytes(std::filesystem::file_size(prefix + file_path));
+    std::ifstream file(prefix + file_path, std::ios::binary);
+    file.read(reinterpret_cast<char*>(bytes.data()), bytes.size());
+    if (find_min_size) {
+      FindMinSizeOfFile(file_path, bytes.data(), bytes.size(), stats);
+    } else if (only_parse) {
+      ParseFile(file_path, bytes.data(), bytes.size(), stats);
+    } else {
+      DecodeAndParseFile(file_path, bytes.data(), bytes.size(), stats);
+    }
+    if (validate) {
+      ValidateFile(file_path, bytes.data(), bytes.size());
+    }
+  }
+
+  std::cout << stats.num_files_invalid_at_parse << " files failed to parse"
+            << std::endl;
+  if (!only_parse) {
+    std::cout << stats.num_files_invalid_at_decode << " files failed to decode"
+              << std::endl;
+    std::cout << stats.num_files_invalid_at_both
+              << " files failed to parse and decode" << std::endl;
+  }
+
+  if (find_min_size) {
+    std::cout << std::endl;
+    for (const auto& it : stats.min_size_to_count) {
+      std::cout << it.second << " files need " << it.first
+                << " bytes to extract features" << std::endl;
+    }
+  }
+  return 0;
+}