svc_encoder_rtc: add multilayer metadata Read metadata from simplified yaml file and write as frame metadata. Bug: 377851082 Change-Id: I9200d2736376bf79a1c3d41748e225a8d46179f7

commit: bfe96c2b1baf47f8b6aaf5959319cf27777a9b25 [log] [tgz]
author: Maryla <maryla@google.com> Wed Nov 06 10:30:02 2024 +0100
committer: Maryla Ustarroz-Calonge <maryla@google.com> Fri Nov 08 13:09:24 2024 +0000
tree: f3ee81fb357d884e82c5f98c279dedf9303b4df7
parent: 77846f5e9aafef94ca5e2c51f4d56830c9950e22 [diff]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 55b2f97..a228fad 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt

@@ -200,7 +200,9 @@
             "${AOM_ROOT}/common/y4minput.c"
             "${AOM_ROOT}/common/y4minput.h"
             "${AOM_ROOT}/examples/encoder_util.h"
-            "${AOM_ROOT}/examples/encoder_util.c")
+            "${AOM_ROOT}/examples/encoder_util.c"
+            "${AOM_ROOT}/examples/multilayer_metadata.h"
+            "${AOM_ROOT}/examples/multilayer_metadata.cc")
 
 list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
             "${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
@@ -402,6 +404,7 @@
   if(CONFIG_AV1_ENCODER)
     add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES})
     set_property(TARGET ${example} PROPERTY FOLDER examples)
+    set_property(TARGET aom_encoder_app_util PROPERTY CXX_STANDARD 17)
   endif()
 endif()
 
@@ -521,6 +524,7 @@
                                    $<TARGET_OBJECTS:aom_common_app_util>
                                    $<TARGET_OBJECTS:aom_encoder_app_util>)
     target_link_libraries(svc_encoder_rtc ${AOM_LIB_LINK_TYPE} aom_av1_rc)
+    set_property(TARGET svc_encoder_rtc PROPERTY CXX_STANDARD 17)
 
     # Maintain a list of encoder example targets.
     list(APPEND AOM_ENCODER_EXAMPLE_TARGETS aomenc lossless_encoder set_maps

diff --git a/examples/multilayer_metadata.cc b/examples/multilayer_metadata.cc
new file mode 100644
index 0000000..aabad46
--- /dev/null
+++ b/examples/multilayer_metadata.cc

@@ -0,0 +1,471 @@
+#include "examples/multilayer_metadata.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "aom/aom_integer.h"
+#include "examples/multilayer_metadata.h"
+
+extern void usage_exit(void);
+
+namespace libaom_examples {
+
+namespace {
+
+constexpr int kMaxNumSpatialLayers = 4;
+
+// Removes comments and trailing spaces from the line.
+void cleanup_line(std::string &line) {
+  // Remove everything after the first '#'.
+  std::size_t comment_pos = line.find('#');
+  if (comment_pos != std::string::npos) {
+    line.resize(comment_pos);
+  }
+  // Remove spaces at the end of the line.
+  while (!line.empty() && line.back() == ' ') {
+    line.resize(line.length() - 1);
+  }
+}
+
+// Finds the indentation level of the line, and sets 'has_list_prefix' to true
+// if the line has a '-' indicating a new item in a list.
+void get_indent(const std::string &line, int *indent, bool *has_list_prefix) {
+  *indent = 0;
+  *has_list_prefix = 0;
+  while (
+      *indent < (int)line.length() &&
+      (line[*indent] == ' ' || line[*indent] == '\t' || line[*indent] == '-')) {
+    if (line[*indent] == '-') {
+      *has_list_prefix = true;
+    }
+    ++(*indent);
+  }
+}
+
+/*
+ * Parses the next line from the file, skipping empty lines.
+ * Returns false if the end of the file was reached, or if the line was indented
+ * less than 'min_indent', meaning that parsing should go back to the previous
+ * function in the stack.
+ *
+ * 'min_indent' is the minimum indentation expected for the next line.
+ * 'is_list' must be true if the line is allowed to contain list items ('-').
+ * 'indent' MUST be initialized to -1 before the first call, and is then set to
+ * the indentation of the line.
+ * 'has_list_prefix' is set to true if the line starts a new list item with '-'.
+ * 'line_idx' is set to the index of the last line read.
+ * 'field_name' is set to the field name if the line contains a colon, or to an
+ * empty string otherwise.
+ * 'value' is set to the integer value of the line, or to 0 if the line doesn't
+ * contain a number.
+ */
+bool parse_line(std::fstream &file, int min_indent, bool is_list, int *indent,
+                bool *has_list_prefix, int *line_idx, std::string *field_name,
+                int *value) {
+  *field_name = "";
+  *value = 0;
+  std::string line;
+  std::fstream::pos_type prev_file_position;
+  const int prev_indent = *indent;
+  while (prev_file_position = file.tellg(), std::getline(file, line)) {
+    cleanup_line(line);
+    get_indent(line, indent, has_list_prefix);
+    line = line.substr(*indent);  // skip indentation
+    // If the line is indented less than 'min_indent', it belongs to the outer
+    // object, and parsing should go back to the previous function in the stack.
+    if (!line.empty() && *indent < min_indent) {
+      // Undo reading the last line.
+      if (!file.seekp(prev_file_position, std::ios::beg)) {
+        fprintf(stderr, "Failed to seek to previous file position\n");
+        exit(EXIT_FAILURE);
+      }
+      return false;
+    }
+
+    ++(*line_idx);
+    if (line.empty()) continue;
+
+    if (prev_indent >= 0 && prev_indent != *indent) {
+      fprintf(stderr, "Error: Bad indentation at line %d\n", *line_idx);
+      exit(EXIT_FAILURE);
+    }
+    if (*has_list_prefix && !is_list) {
+      fprintf(stderr, "Error: Unexpected list item at line %d\n", *line_idx);
+      exit(EXIT_FAILURE);
+    }
+
+    std::string value_str = line;
+    size_t colon_pos = line.find(':');
+    if (colon_pos != std::string::npos) {
+      *field_name = line.substr(0, colon_pos);
+      value_str = line.substr(colon_pos + 1);
+    }
+    char *endptr;
+    *value = (int)strtol(&line[colon_pos + 1], &endptr, 10);
+    if (*endptr != '\0') {
+      fprintf(stderr, "Error: Failed to parse number from '%s'\n",
+              value_str.c_str());
+      exit(EXIT_FAILURE);
+    }
+    return true;
+  }
+  return false;  // Reached the end of the file.
+}
+
+template <typename T>
+std::vector<T> parse_integer_list(std::fstream &file, int min_indent,
+                                  int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  std::vector<T> result;
+  while (parse_line(file, min_indent, /*is_list=*/true, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (!field_name.empty()) {
+      fprintf(
+          stderr,
+          "Error: Unexpected field name '%s' at line %d, expected a number\n",
+          field_name.c_str(), *line_idx);
+      exit(EXIT_FAILURE);
+    } else if (!has_list_prefix) {
+      fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx);
+      exit(EXIT_FAILURE);
+    } else if (value > (int)std::numeric_limits<T>::max() ||
+               value < (int)std::numeric_limits<T>::min()) {
+      fprintf(stderr, "Error: Value %d is out of range at line %d\n", value,
+              *line_idx);
+      exit(EXIT_FAILURE);
+    } else {
+      result.push_back(value);
+    }
+  }
+  return result;
+}
+
+ColorProperties parse_color_properties(std::fstream &file, int min_indent,
+                                       int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  ColorProperties color = {};
+  while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (field_name == "color_range") {
+      color.color_range = value;
+    } else if (field_name == "color_primaries") {
+      color.color_primaries = value;
+    } else if (field_name == "transfer_characteristics") {
+      color.transfer_characteristics = value;
+    } else if (field_name == "matrix_coefficients") {
+      color.matrix_coefficients = value;
+    } else {
+      fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+              field_name.c_str(), *line_idx);
+    }
+  }
+  return color;
+}
+
+AlphaInformation parse_multilayer_layer_alpha(std::fstream &file,
+                                              int min_indent, int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  AlphaInformation alpha_info = {};
+  while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (field_name == "alpha_use_idc") {
+      alpha_info.alpha_use_idc = (AlphaUse)value;
+    } else if (field_name == "alpha_bit_depth") {
+      alpha_info.alpha_bit_depth = value;
+    } else if (field_name == "alpha_clip_idc") {
+      alpha_info.alpha_clip_idc = value;
+    } else if (field_name == "alpha_incr_flag") {
+      alpha_info.alpha_incr_flag = value;
+    } else if (field_name == "alpha_transparent_value") {
+      alpha_info.alpha_transparent_value = value;
+    } else if (field_name == "alpha_opaque_value") {
+      alpha_info.alpha_opaque_value = value;
+    } else if (field_name == "alpha_color_description") {
+      alpha_info.alpha_color_description =
+          parse_color_properties(file, indent, line_idx);
+    } else if (field_name == "label_type_id") {
+      alpha_info.label_type_id = parse_integer_list<uint16_t>(
+          file, /*min_indent=*/indent + 1, line_idx);
+    } else {
+      fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+              field_name.c_str(), *line_idx);
+      exit(EXIT_FAILURE);
+    }
+  }
+  return alpha_info;
+}
+
+DepthRepresentationElement parse_depth_representation_element(
+    std::fstream &file, int min_indent, int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  DepthRepresentationElement element;
+  while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (field_name == "sign_flag") {
+      element.sign_flag = value;
+    } else if (field_name == "exponent") {
+      element.exponent = value;
+    } else if (field_name == "mantissa") {
+      element.mantissa = value;
+    } else {
+      fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+              field_name.c_str(), *line_idx);
+      exit(EXIT_FAILURE);
+    }
+  }
+  return element;
+}
+
+DepthInformation parse_multilayer_layer_depth(std::fstream &file,
+                                              int min_indent, int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  DepthInformation depth_info = {};
+  while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (field_name == "z_near") {
+      depth_info.z_near =
+          parse_depth_representation_element(file, indent, line_idx);
+    } else if (field_name == "z_far") {
+      depth_info.z_far =
+          parse_depth_representation_element(file, indent, line_idx);
+    } else if (field_name == "d_min") {
+      depth_info.d_min =
+          parse_depth_representation_element(file, indent, line_idx);
+    } else if (field_name == "d_max") {
+      depth_info.d_max =
+          parse_depth_representation_element(file, indent, line_idx);
+    } else if (field_name == "depth_representation_type") {
+      depth_info.depth_representation_type = value;
+    } else if (field_name == "disparity_ref_view_id") {
+      depth_info.disparity_ref_view_id = value;
+    } else if (field_name == "depth_nonlinear_precision") {
+      depth_info.depth_nonlinear_precision = value;
+    } else if (field_name == "depth_nonlinear_representation_model") {
+      depth_info.depth_nonlinear_representation_model =
+          parse_integer_list<uint32_t>(file, /*min_indent=*/indent + 1,
+                                       line_idx);
+    } else {
+      fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+              field_name.c_str(), *line_idx);
+      exit(EXIT_FAILURE);
+    }
+  }
+  return depth_info;
+}
+
+std::vector<LayerMetadata> parse_multilayer_layer_metadata(std::fstream &file,
+                                                           int min_indent,
+                                                           int *line_idx) {
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  std::vector<LayerMetadata> layers;
+  while (parse_line(file, min_indent, /*is_list=*/true, &indent,
+                    &has_list_prefix, line_idx, &field_name, &value)) {
+    if (has_list_prefix) {
+      if (layers.size() >= kMaxNumSpatialLayers) {
+        fprintf(stderr,
+                "Error: Too many layers at line %d, the maximum is %d\n",
+                *line_idx, kMaxNumSpatialLayers);
+        exit(EXIT_FAILURE);
+      }
+      layers.emplace_back();
+    }
+    if (layers.empty()) {
+      fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx);
+      exit(EXIT_FAILURE);
+    }
+    LayerMetadata *layer = &layers.back();
+    // Check if string starts with field name.
+    if ((field_name == "layer_type")) {
+      layer->layer_type = (LayerType)value;
+    } else if ((field_name == "luma_plane_only_flag")) {
+      layer->luma_plane_only_flag = value;
+    } else if ((field_name == "layer_view_type")) {
+      layer->layer_view_type = (MultilayerViewType)value;
+    } else if ((field_name == "group_id")) {
+      layer->group_id = value;
+    } else if ((field_name == "layer_dependency_idc")) {
+      layer->layer_dependency_idc = value;
+    } else if ((field_name == "layer_metadata_scope")) {
+      layer->layer_metadata_scope = (MultilayerMetadataScope)value;
+    } else if ((field_name == "layer_color_description")) {
+      layer->layer_color_description =
+          parse_color_properties(file, indent, line_idx);
+    } else if ((field_name == "alpha")) {
+      layer->global_alpha_info =
+          parse_multilayer_layer_alpha(file,
+                                       /*min_indent=*/indent + 1, line_idx);
+    } else if (field_name == "depth") {
+      layer->global_depth_info =
+          parse_multilayer_layer_depth(file,
+                                       /*min_indent=*/indent + 1, line_idx);
+    } else {
+      fprintf(stderr, "Error: Unknown field %s at line %d\n",
+              field_name.c_str(), *line_idx);
+      exit(EXIT_FAILURE);
+    }
+  }
+  return layers;
+}
+
+MultilayerMetadata parse_multilayer_metadata(std::fstream &file) {
+  int line_idx = 0;
+  bool has_list_prefix;
+  int indent = -1;
+  std::string field_name;
+  int value;
+  MultilayerMetadata multilayer = {};
+  while (parse_line(file, /*min_indent=*/0, /*is_list=*/false, &indent,
+                    &has_list_prefix, &line_idx, &field_name, &value)) {
+    // Check if string starts with field name.
+    if ((field_name == "use_case")) {
+      multilayer.use_case = (MultilayerUseCase)value;
+    } else if ((field_name == "layers")) {
+      multilayer.layers =
+          parse_multilayer_layer_metadata(file,
+                                          /*min_indent=*/indent + 1, &line_idx);
+    } else {
+      fprintf(stderr, "Error: Unknown field %s at line %d\n",
+              field_name.c_str(), line_idx);
+      exit(EXIT_FAILURE);
+    }
+  }
+  return multilayer;
+}
+
+std::string format_depth_representation_element(
+    const std::optional<DepthRepresentationElement> &element) {
+  if (!element.has_value()) {
+    return "absent";
+  } else {
+    return "sign_flag " + std::to_string(element->sign_flag) + " exponent " +
+           std::to_string(element->exponent) + " mantissa " +
+           std::to_string(element->mantissa);
+  }
+}
+
+std::string format_color_properties(
+    const std::optional<ColorProperties> &color_properties) {
+  if (!color_properties.has_value()) {
+    return "absent";
+  } else {
+    return std::to_string(color_properties->color_primaries) + "/" +
+           std::to_string(color_properties->transfer_characteristics) + "/" +
+           std::to_string(color_properties->matrix_coefficients) +
+           (color_properties->color_range ? "F" : "L");
+  }
+}
+
+}  // namespace
+
+MultilayerMetadata parse_multilayer_file(const char *metadata_path) {
+  std::fstream file(metadata_path);
+  if (!file.is_open()) {
+    fprintf(stderr, "Error: Failed to open %s\n", metadata_path);
+    exit(EXIT_FAILURE);
+  }
+
+  const MultilayerMetadata multilayer = parse_multilayer_metadata(file);
+  if (multilayer.layers.empty()) {
+    fprintf(stderr, "Error: No layers found, there must be at least one\n");
+    exit(EXIT_FAILURE);
+  }
+  return multilayer;
+}
+
+void print_multilayer_metadata(const MultilayerMetadata &multilayer) {
+  printf("=== Multilayer metadata ===\n");
+  printf("use_case: %d\n", multilayer.use_case);
+  for (size_t i = 0; i < multilayer.layers.size(); ++i) {
+    const LayerMetadata &layer = multilayer.layers[i];
+    printf("layer %d\n", (int)i);
+    printf("  layer_type: %d\n", layer.layer_type);
+    printf("  luma_plane_only_flag: %d\n", layer.luma_plane_only_flag);
+    printf("  layer_view_type: %d\n", layer.layer_view_type);
+    printf("  group_id: %d\n", layer.group_id);
+    printf("  layer_dependency_idc: %d\n", layer.layer_dependency_idc);
+    printf("  layer_metadata_scope: %d\n", layer.layer_metadata_scope);
+    printf("  layer_color_description: %s\n",
+           format_color_properties(layer.layer_color_description).c_str());
+    if (layer.layer_type == MULTIALYER_LAYER_TYPE_ALPHA) {
+      printf("  alpha:\n");
+      printf("    alpha_use_idc: %d\n", layer.global_alpha_info.alpha_use_idc);
+      printf("    alpha_bit_depth: %d\n",
+             layer.global_alpha_info.alpha_bit_depth);
+      printf("    alpha_clip_idc: %d\n",
+             layer.global_alpha_info.alpha_clip_idc);
+      printf("    alpha_incr_flag: %d\n",
+             layer.global_alpha_info.alpha_incr_flag);
+      printf("    alpha_transparent_value: %hu\n",
+             layer.global_alpha_info.alpha_transparent_value);
+      printf("    alpha_opaque_value: %hu\n",
+             layer.global_alpha_info.alpha_opaque_value);
+      printf("    alpha_color_description: %s\n",
+             format_color_properties(
+                 layer.global_alpha_info.alpha_color_description)
+                 .c_str());
+      printf("    label_type_id:");
+      for (uint16_t label_type_id : layer.global_alpha_info.label_type_id) {
+        printf(" %d", label_type_id);
+      }
+      printf("\n");
+    } else if (layer.layer_type == MULTIALYER_LAYER_TYPE_DEPTH) {
+      printf("  depth:\n");
+      printf("    z_near_flag %s\n",
+             format_depth_representation_element(layer.global_depth_info.z_near)
+                 .c_str());
+      printf("    z_far_flag %s\n",
+             format_depth_representation_element(layer.global_depth_info.z_far)
+                 .c_str());
+      printf("    d_min_flag %s\n",
+             format_depth_representation_element(layer.global_depth_info.d_min)
+                 .c_str());
+      printf("    d_max_flag %s\n",
+             format_depth_representation_element(layer.global_depth_info.d_max)
+                 .c_str());
+      printf("    depth_representation_type: %d\n",
+             layer.global_depth_info.depth_representation_type);
+      printf("    disparity_ref_view_id: %d\n",
+             layer.global_depth_info.disparity_ref_view_id);
+      printf("    depth_nonlinear_precision: %d\n",
+             layer.global_depth_info.depth_nonlinear_precision);
+      printf("    depth_nonlinear_representation_model:");
+      for (uint32_t depth_nonlinear_representation_model :
+           layer.global_depth_info.depth_nonlinear_representation_model) {
+        printf(" %d", depth_nonlinear_representation_model);
+      }
+      printf("\n");
+    }
+  }
+  printf("\n");
+}
+
+}  // namespace libaom_examples

diff --git a/examples/multilayer_metadata.h b/examples/multilayer_metadata.h
new file mode 100644
index 0000000..9001542
--- /dev/null
+++ b/examples/multilayer_metadata.h

@@ -0,0 +1,132 @@
+#ifndef AOM_EXAMPLES_MULTILAYER_METADATA_H_
+#define AOM_EXAMPLES_MULTILAYER_METADATA_H_
+
+#include <cstdint>
+#include <optional>
+#include <vector>
+
+namespace libaom_examples {
+
+struct ColorProperties {
+  bool color_range;  // true for full range values
+  uint8_t color_primaries;
+  uint8_t transfer_characteristics;
+  uint8_t matrix_coefficients;
+};
+
+enum AlphaUse {
+  ALPHA_STRAIGHT = 0,
+  ALPHA_PREMULTIPLIED = 1,
+  ALPHA_SEGMENTATION = 2,
+  ALPHA_UNSPECIFIED = 3,
+};
+
+struct AlphaInformation {
+  AlphaUse alpha_use_idc;   // [0, 7]
+  uint8_t alpha_bit_depth;  // [8, 15]
+  uint8_t alpha_clip_idc;   // [0, 3]
+  bool alpha_incr_flag;
+  uint16_t alpha_transparent_value;  // [0, 1<<alpha_bit_depth]
+  uint16_t alpha_opaque_value;       // [0, 1<<alpha_bit_depth]
+  // Relevant for ALPHA_STRAIGHT only.
+  std::optional<ColorProperties> alpha_color_description;
+  // Relevant for ALPHA_SEGMENTATION only.
+  // Must be either empty or have the same size as the number of values between
+  // alpha_transparent_value and alpha_opaque_value, inclusively.
+  std::vector<uint16_t> label_type_id;
+};
+
+// TODO: maryla - parse floats directly and convert to this wire
+// representation at write time.
+struct DepthRepresentationElement {
+  bool sign_flag;
+  uint8_t exponent;  // [0, 126]
+  uint32_t mantissa;
+};
+
+struct DepthInformation {
+  std::optional<DepthRepresentationElement> z_near;
+  std::optional<DepthRepresentationElement> z_far;
+  std::optional<DepthRepresentationElement> d_min;
+  std::optional<DepthRepresentationElement> d_max;
+  uint8_t depth_representation_type;  // [0, 15]
+  uint8_t disparity_ref_view_id;      // [0, 3]
+  uint8_t depth_nonlinear_precision;  // [8, 23]
+  // [0, 1<<depth_nonlinear_precision]
+  std::vector<uint32_t> depth_nonlinear_representation_model;
+};
+
+enum MultilayerUseCase {
+  MULTILAYER_USE_CASE_UNSPECIFIED = 0,
+  MULTILAYER_USE_CASE_ALPHA = 1,
+  MULTILAYER_USE_CASE_DEPTH = 2,
+  MULTILAYER_USE_CASE_STEREO = 3,
+  MULTILAYER_USE_CASE_STEREO_ALPHA_GLOBAL = 4,
+  MULTILAYER_USE_CASE_STEREO_DEPTH_GLOBAL = 5,
+  MULTILAYER_USE_CASE_STEREO_ALPHA = 6,
+  MULTILAYER_USE_CASE_STEREO_DEPTH = 7,
+  MULTILAYER_USE_CASE_444 = 8,
+  MULTILAYER_USE_CASE_420_444 = 9,
+  MULTILAYER_USE_CASE_444_ALPHA = 10,
+  MULTILAYER_USE_CASE_444_DEPTH = 11,
+};
+
+enum LayerType {
+  MULTIALYER_LAYER_TYPE_UNSPECIFIED = 0,
+  MULTIALYER_LAYER_TYPE_TEXTURE = 1,
+  MULTIALYER_LAYER_TYPE_TEXTURE_1 = 2,
+  MULTIALYER_LAYER_TYPE_TEXTURE_2 = 3,
+  MULTIALYER_LAYER_TYPE_TEXTURE_3 = 4,
+  MULTIALYER_LAYER_TYPE_ALPHA = 5,
+  MULTIALYER_LAYER_TYPE_DEPTH = 6,
+};
+
+enum MultilayerMetadataScope {
+  SCOPE_UNSPECIFIED = 0,
+  SCOPE_LOCAL = 1,
+  SCOPE_GLOBAL = 2,
+  SCOPE_MIXED = 3,
+};
+
+enum MultilayerViewType {
+  VIEW_UNSPECIFIED = 0,
+  VIEW_CENTER = 1,
+  VIEW_LEFT = 2,
+  VIEW_RIGHT = 3,
+};
+
+struct LayerMetadata {
+  LayerType layer_type;  // [0, 31]
+  bool luma_plane_only_flag;
+  MultilayerViewType layer_view_type;            // [0, 7]
+  uint8_t group_id;                              // [0, 3]
+  uint8_t layer_dependency_idc;                  // [0, 7]
+  MultilayerMetadataScope layer_metadata_scope;  // [0, 3]
+
+  std::optional<ColorProperties> layer_color_description;
+
+  // Relevant for MULTIALYER_LAYER_TYPE_ALPHA with SCOPE_GLOBAL or SCOPE_MIXED.
+  AlphaInformation global_alpha_info;
+  // Relevant for MULTIALYER_LAYER_TYPE_DEPTH with SCOPE_GLOBAL or SCOPE_MIXED.
+  DepthInformation global_depth_info;
+};
+
+struct MultilayerMetadata {
+  MultilayerUseCase use_case;  // [0, 63]
+  std::vector<LayerMetadata> layers;
+};
+
+// Parses a multilayer metadata file.
+// Terminates the process in case of error.
+// The metadata is expected to be in a subset of the YAML format supporting
+// simple lists and maps with integer values, and comments.
+// Does very little validation on the metadata, e.g. does not check that the
+// values are in the correct range.
+MultilayerMetadata parse_multilayer_file(const char *metadata_path);
+
+// Prints the multilayer metadata to stdout for debugging.
+void print_multilayer_metadata(const MultilayerMetadata &multilayer);
+
+}  // namespace libaom_examples
+
+#endif  // AOM_EXAMPLES_MULTILAYER_METADATA_H_

diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc
index ad40132..b115a0a 100644
--- a/examples/svc_encoder_rtc.cc
+++ b/examples/svc_encoder_rtc.cc

@@ -20,6 +20,7 @@
 #include <string.h>
 
 #include <memory>
+#include <optional>
 
 #include "config/aom_config.h"
 
@@ -27,13 +28,17 @@
 #include "aom/aom_decoder.h"
 #endif
 #include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+#include "aom/aom_integer.h"
 #include "aom/aomcx.h"
+#include "aom_dsp/bitwriter_buffer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/ratectrl_rtc.h"
 #include "common/args.h"
 #include "common/tools_common.h"
 #include "common/video_writer.h"
 #include "examples/encoder_util.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/ratectrl_rtc.h"
+#include "examples/multilayer_metadata.h"
 
 #define OPTION_BUFFER_SIZE 1024
 #define MAX_NUM_SPATIAL_LAYERS 4
@@ -51,6 +56,7 @@
   int show_psnr;
   bool use_external_rc;
   bool scale_factors_explicitly_set;
+  const char *multilayer_metadata_file;
 } AppInput;
 
 typedef enum {
@@ -116,6 +122,9 @@
 };
 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
     NULL, "tune-content", 1, "Tune content type", tune_content_enum);
+static const arg_def_t multilayer_metadata_file_arg =
+    ARG_DEF("ml", "multilayer_metadata_file", 1,
+            "Experimental: path to multilayer metadata file");
 
 #if CONFIG_AV1_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
@@ -394,6 +403,8 @@
       app_input->show_psnr = 1;
     } else if (arg_match(&arg, &ext_rc_arg, argi)) {
       app_input->use_external_rc = true;
+    } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
+      app_input->multilayer_metadata_file = arg.val;
     } else {
       ++argj;
     }
@@ -1353,6 +1364,201 @@
   }
 }
 
+static void write_literal(struct aom_write_bit_buffer *wb, int data, int bits,
+                          int offset = 0) {
+  const int to_write = data - offset;
+  if (to_write < 0 || to_write >= (1 << bits)) {
+    die("Invalid data, value %d out of range [%d, %d]\n", data, offset,
+        offset + (1 << bits) - 1);
+  }
+  aom_wb_write_literal(wb, to_write, bits);
+}
+
+static void write_depth_representation_element(
+    struct aom_write_bit_buffer *buffer,
+    const std::optional<libaom_examples::DepthRepresentationElement> &element) {
+  if (!element.has_value()) {
+    return;
+  }
+  write_literal(buffer, element->sign_flag, 1);
+  write_literal(buffer, element->exponent, 7);
+  int mantissa_len = 1;
+  while (mantissa_len < 32 && (element->mantissa >> mantissa_len != 0)) {
+    ++mantissa_len;
+  }
+  write_literal(buffer, mantissa_len - 1, 5);
+  write_literal(buffer, element->mantissa, mantissa_len);
+}
+
+static void write_color_properties(
+    struct aom_write_bit_buffer *buffer,
+    const std::optional<libaom_examples::ColorProperties> &color_properties) {
+  write_literal(buffer, color_properties.has_value(), 1);
+  if (color_properties.has_value()) {
+    write_literal(buffer, color_properties->color_range, 1);
+    write_literal(buffer, color_properties->color_primaries, 8);
+    write_literal(buffer, color_properties->transfer_characteristics, 8);
+    write_literal(buffer, color_properties->matrix_coefficients, 8);
+  } else {
+    write_literal(buffer, 0, 1);  // reserved_1bit
+  }
+}
+
+static void add_multilayer_metadata(
+    aom_image_t *frame,
+    const std::optional<libaom_examples::MultilayerMetadata> &multilayer) {
+  if (!multilayer.has_value()) {
+    return;
+  }
+  // Pretty large buffer to accommodate the largest multilayer metadata
+  // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
+  std::vector<uint8_t> data(66000 * multilayer->layers.size());
+  struct aom_write_bit_buffer buffer = { data.data(), 0 };
+
+  write_literal(&buffer, multilayer->use_case, 6);
+  if (multilayer->layers.empty()) {
+    die("Invalid multilayer metadata, no layers found\n");
+  } else if (multilayer->layers.size() > MAX_NUM_SPATIAL_LAYERS) {
+    die("Invalid multilayer metadata, too many layers (max is %d)\n",
+        MAX_NUM_SPATIAL_LAYERS);
+  }
+  write_literal(&buffer, (int)multilayer->layers.size() - 1, 2);
+  assert(buffer.bit_offset % 8 == 0);
+  for (size_t i = 0; i < multilayer->layers.size(); ++i) {
+    const libaom_examples::LayerMetadata &layer = multilayer->layers[i];
+    // Alpha info with segmentation with labels can be up to about 66k bytes,
+    // which requires 3 bytes to encode in leb128.
+    const int bytes_reserved_for_size = 3;
+    // Placeholder for layer_metadata_size which will be written later.
+    write_literal(&buffer, 0, bytes_reserved_for_size * 8);
+    const uint32_t metadata_start = buffer.bit_offset;
+    write_literal(&buffer, (int)i, 2);  // ml_spatial_id
+    write_literal(&buffer, layer.layer_type, 5);
+    write_literal(&buffer, layer.luma_plane_only_flag, 1);
+    write_literal(&buffer, layer.layer_view_type, 3);
+    write_literal(&buffer, layer.group_id, 2);
+    write_literal(&buffer, layer.layer_dependency_idc, 3);
+    write_literal(&buffer, layer.layer_metadata_scope, 2);
+    write_literal(&buffer, 0, 4);  // ml_reserved_4bits
+
+    if (i > 0) {
+      write_color_properties(&buffer, layer.layer_color_description);
+    } else {
+      write_literal(&buffer, 0, 2);  // ml_reserved_2bits
+    }
+    assert(buffer.bit_offset % 8 == 0);
+
+    if (multilayer->use_case < 12) {
+      if (layer.layer_type == libaom_examples::MULTIALYER_LAYER_TYPE_ALPHA &&
+          layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
+        const libaom_examples::AlphaInformation &alpha_info =
+            layer.global_alpha_info;
+        write_literal(&buffer, alpha_info.alpha_use_idc, 3);
+        write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
+        write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
+        write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
+        write_literal(&buffer, alpha_info.alpha_transparent_value,
+                      alpha_info.alpha_bit_depth);
+        write_literal(&buffer, alpha_info.alpha_opaque_value,
+                      alpha_info.alpha_bit_depth);
+        if (buffer.bit_offset % 8 != 0) {
+          // ai_byte_alignment_bits
+          write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
+        }
+        assert(buffer.bit_offset % 8 == 0);
+
+        if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
+          write_literal(&buffer, 0, 6);  // ai_reserved_6bits
+          write_color_properties(&buffer, alpha_info.alpha_color_description);
+        } else if (alpha_info.alpha_use_idc ==
+                   libaom_examples::ALPHA_SEGMENTATION) {
+          write_literal(&buffer, 0, 7);  // ai_reserved_7bits
+          write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
+          if (!alpha_info.label_type_id.empty()) {
+            const size_t num_values =
+                std::abs(alpha_info.alpha_transparent_value -
+                         alpha_info.alpha_opaque_value) +
+                1;
+            if (!alpha_info.label_type_id.empty() &&
+                alpha_info.label_type_id.size() != num_values) {
+              die("Invalid multilayer metadata, label_type_id size must be "
+                  "equal to the range of alpha values between "
+                  "alpha_transparent_value and alpha_opaque_value (expected "
+                  "%d values, found %d values)\n",
+                  (int)num_values, (int)alpha_info.label_type_id.size());
+            }
+            for (size_t j = 0; j < num_values; ++j) {
+              write_literal(&buffer, alpha_info.label_type_id[j], 16);
+            }
+          }
+        }
+        assert(buffer.bit_offset % 8 == 0);
+      } else if (layer.layer_type ==
+                     libaom_examples::MULTIALYER_LAYER_TYPE_DEPTH &&
+                 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
+        const libaom_examples::DepthInformation &depth_info =
+            layer.global_depth_info;
+        write_literal(&buffer, depth_info.z_near.has_value(), 1);
+        write_literal(&buffer, depth_info.z_far.has_value(), 1);
+        write_literal(&buffer, depth_info.d_min.has_value(), 1);
+        write_literal(&buffer, depth_info.d_max.has_value(), 1);
+        write_literal(&buffer, depth_info.depth_representation_type, 4);
+        if (depth_info.d_min.has_value() || depth_info.d_max.has_value()) {
+          write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
+        }
+        write_depth_representation_element(&buffer, depth_info.z_near);
+        write_depth_representation_element(&buffer, depth_info.z_far);
+        write_depth_representation_element(&buffer, depth_info.d_min);
+        write_depth_representation_element(&buffer, depth_info.d_max);
+        if (depth_info.depth_representation_type == 3) {
+          write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
+                        /*offset=*/8);
+          if (depth_info.depth_nonlinear_representation_model.empty() ||
+              depth_info.depth_nonlinear_representation_model.size() >
+                  (1 << 6)) {
+            die("Invalid multilayer metadata, if depth_nonlinear_precision "
+                "== 3, depth_nonlinear_representation_model must have 1 to "
+                "%d elements, found %d elements\n",
+                1 << 6,
+                (int)depth_info.depth_nonlinear_representation_model.size());
+          }
+          write_literal(
+              &buffer,
+              (int)depth_info.depth_nonlinear_representation_model.size() - 1,
+              6);
+          const int bit_depth =
+              depth_info.depth_nonlinear_precision + 8;  // XXX + 9 ???
+          for (const uint32_t v :
+               depth_info.depth_nonlinear_representation_model) {
+            write_literal(&buffer, v, bit_depth);
+          }
+        }
+        if (buffer.bit_offset % 8 != 0) {
+          write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
+        }
+        assert(buffer.bit_offset % 8 == 0);
+      }
+    }
+    assert(buffer.bit_offset % 8 == 0);
+
+    const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
+    const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
+    size_t coded_size;
+    if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
+                                   bytes_reserved_for_size,
+                                   &buffer.bit_buffer[size_pos], &coded_size)) {
+      // Need to increase bytes_reserved_for_size in the code above.
+      die("Error: Failed to write metadata size\n");
+    }
+  }
+  assert(buffer.bit_offset % 8 == 0);
+  if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
+                           buffer.bit_buffer, buffer.bit_offset / 8,
+                           AOM_MIF_KEY_FRAME)) {
+    die("Error: Failed to add metadata\n");
+  }
+}
+
 #if CONFIG_AV1_DECODER
 // Returns whether there is a mismatch between the encoder's new frame and the
 // decoder's new frame.
@@ -1660,6 +1866,13 @@
     svc_params.framerate_factor[2] = 1;
   }
 
+  std::optional<libaom_examples::MultilayerMetadata> multilayer_metadata;
+  if (app_input.multilayer_metadata_file != NULL) {
+    multilayer_metadata = libaom_examples::parse_multilayer_file(
+        app_input.multilayer_metadata_file);
+    libaom_examples::print_multilayer_metadata(multilayer_metadata.value());
+  }
+
   framerate = cfg.g_timebase.den / cfg.g_timebase.num;
   set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
 
@@ -1836,6 +2049,7 @@
           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
                             &ref_frame_comp_pred);
         }
+        add_multilayer_metadata(&raw, multilayer_metadata);
         // Set the speed per layer.
         if (test_speed_per_layer) {
           int speed_per_layer = 10;

diff --git a/test/svc_encoder_rtc.sh b/test/svc_encoder_rtc.sh
index c8b2891..8df2f6b 100644
--- a/test/svc_encoder_rtc.sh
+++ b/test/svc_encoder_rtc.sh

@@ -83,6 +83,36 @@
 svc_encoder_s2_t1() {
   local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}"
   local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc"
+  local metadata_file="${AOM_TEST_OUTPUT_DIR}/multilayer_metadata.yaml"
+  cat > "${metadata_file}" <<EOF
+
+  # test comment
+
+use_case: 1 # alpha
+layers:
+# first layer...
+use_case: 1 # alpha
+layers:
+  - layer_type: 5 # alpha
+    luma_plane_only_flag: 1
+    layer_metadata_scope: 2 # global
+    alpha:
+      alpha_use_idc: 2 # segmentation
+      alpha_bit_depth: 8
+      alpha_transparent_value: 0
+      alpha_opaque_value: 4
+      label_type_id:
+          - 5
+          - 3
+          - 9
+          - 128
+          - 42
+
+# second layer...
+  - layer_type: 1 # texture
+    luma_plane_only_flag: 0
+
+EOF
 
   if [ ! -x "${encoder}" ]; then
     elog "${encoder} does not exist or is not executable."
@@ -99,6 +129,7 @@
       "--spatial-layers=2" \
       "--temporal-layers=1" \
       "--timebase=1/30" \
+      "--multilayer_metadata_file=${metadata_file}" \
       "${YUV_RAW_INPUT}" \
       "${YUV_RAW_INPUT}" \
       "-o ${output_file}" \
commit	bfe96c2b1baf47f8b6aaf5959319cf27777a9b25	[log] [tgz]
author	Maryla <maryla@google.com>	Wed Nov 06 10:30:02 2024 +0100
committer	Maryla Ustarroz-Calonge <maryla@google.com>	Fri Nov 08 13:09:24 2024 +0000
tree	f3ee81fb357d884e82c5f98c279dedf9303b4df7
parent	77846f5e9aafef94ca5e2c51f4d56830c9950e22 [diff]