svc_encoder_rtc: add multilayer metadata
Read metadata from simplified yaml file and write as frame metadata.
Bug: 377851082
Change-Id: I9200d2736376bf79a1c3d41748e225a8d46179f7
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 55b2f97..a228fad 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -200,7 +200,9 @@
"${AOM_ROOT}/common/y4minput.c"
"${AOM_ROOT}/common/y4minput.h"
"${AOM_ROOT}/examples/encoder_util.h"
- "${AOM_ROOT}/examples/encoder_util.c")
+ "${AOM_ROOT}/examples/encoder_util.c"
+ "${AOM_ROOT}/examples/multilayer_metadata.h"
+ "${AOM_ROOT}/examples/multilayer_metadata.cc")
list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
"${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
@@ -402,6 +404,7 @@
if(CONFIG_AV1_ENCODER)
add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES})
set_property(TARGET ${example} PROPERTY FOLDER examples)
+ set_property(TARGET aom_encoder_app_util PROPERTY CXX_STANDARD 17)
endif()
endif()
@@ -521,6 +524,7 @@
$<TARGET_OBJECTS:aom_common_app_util>
$<TARGET_OBJECTS:aom_encoder_app_util>)
target_link_libraries(svc_encoder_rtc ${AOM_LIB_LINK_TYPE} aom_av1_rc)
+ set_property(TARGET svc_encoder_rtc PROPERTY CXX_STANDARD 17)
# Maintain a list of encoder example targets.
list(APPEND AOM_ENCODER_EXAMPLE_TARGETS aomenc lossless_encoder set_maps
diff --git a/examples/multilayer_metadata.cc b/examples/multilayer_metadata.cc
new file mode 100644
index 0000000..aabad46
--- /dev/null
+++ b/examples/multilayer_metadata.cc
@@ -0,0 +1,471 @@
+#include "examples/multilayer_metadata.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cstdio>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "aom/aom_integer.h"
+#include "examples/multilayer_metadata.h"
+
+extern void usage_exit(void);
+
+namespace libaom_examples {
+
+namespace {
+
+constexpr int kMaxNumSpatialLayers = 4;
+
+// Removes comments and trailing spaces from the line.
+void cleanup_line(std::string &line) {
+ // Remove everything after the first '#'.
+ std::size_t comment_pos = line.find('#');
+ if (comment_pos != std::string::npos) {
+ line.resize(comment_pos);
+ }
+ // Remove spaces at the end of the line.
+ while (!line.empty() && line.back() == ' ') {
+ line.resize(line.length() - 1);
+ }
+}
+
+// Finds the indentation level of the line, and sets 'has_list_prefix' to true
+// if the line has a '-' indicating a new item in a list.
+void get_indent(const std::string &line, int *indent, bool *has_list_prefix) {
+ *indent = 0;
+ *has_list_prefix = 0;
+ while (
+ *indent < (int)line.length() &&
+ (line[*indent] == ' ' || line[*indent] == '\t' || line[*indent] == '-')) {
+ if (line[*indent] == '-') {
+ *has_list_prefix = true;
+ }
+ ++(*indent);
+ }
+}
+
+/*
+ * Parses the next line from the file, skipping empty lines.
+ * Returns false if the end of the file was reached, or if the line was indented
+ * less than 'min_indent', meaning that parsing should go back to the previous
+ * function in the stack.
+ *
+ * 'min_indent' is the minimum indentation expected for the next line.
+ * 'is_list' must be true if the line is allowed to contain list items ('-').
+ * 'indent' MUST be initialized to -1 before the first call, and is then set to
+ * the indentation of the line.
+ * 'has_list_prefix' is set to true if the line starts a new list item with '-'.
+ * 'line_idx' is set to the index of the last line read.
+ * 'field_name' is set to the field name if the line contains a colon, or to an
+ * empty string otherwise.
+ * 'value' is set to the integer value of the line, or to 0 if the line doesn't
+ * contain a number.
+ */
+bool parse_line(std::fstream &file, int min_indent, bool is_list, int *indent,
+ bool *has_list_prefix, int *line_idx, std::string *field_name,
+ int *value) {
+ *field_name = "";
+ *value = 0;
+ std::string line;
+ std::fstream::pos_type prev_file_position;
+ const int prev_indent = *indent;
+ while (prev_file_position = file.tellg(), std::getline(file, line)) {
+ cleanup_line(line);
+ get_indent(line, indent, has_list_prefix);
+ line = line.substr(*indent); // skip indentation
+ // If the line is indented less than 'min_indent', it belongs to the outer
+ // object, and parsing should go back to the previous function in the stack.
+ if (!line.empty() && *indent < min_indent) {
+ // Undo reading the last line.
+ if (!file.seekp(prev_file_position, std::ios::beg)) {
+ fprintf(stderr, "Failed to seek to previous file position\n");
+ exit(EXIT_FAILURE);
+ }
+ return false;
+ }
+
+ ++(*line_idx);
+ if (line.empty()) continue;
+
+ if (prev_indent >= 0 && prev_indent != *indent) {
+ fprintf(stderr, "Error: Bad indentation at line %d\n", *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ if (*has_list_prefix && !is_list) {
+ fprintf(stderr, "Error: Unexpected list item at line %d\n", *line_idx);
+ exit(EXIT_FAILURE);
+ }
+
+ std::string value_str = line;
+ size_t colon_pos = line.find(':');
+ if (colon_pos != std::string::npos) {
+ *field_name = line.substr(0, colon_pos);
+ value_str = line.substr(colon_pos + 1);
+ }
+ char *endptr;
+ *value = (int)strtol(&line[colon_pos + 1], &endptr, 10);
+ if (*endptr != '\0') {
+ fprintf(stderr, "Error: Failed to parse number from '%s'\n",
+ value_str.c_str());
+ exit(EXIT_FAILURE);
+ }
+ return true;
+ }
+ return false; // Reached the end of the file.
+}
+
+template <typename T>
+std::vector<T> parse_integer_list(std::fstream &file, int min_indent,
+ int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ std::vector<T> result;
+ while (parse_line(file, min_indent, /*is_list=*/true, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (!field_name.empty()) {
+ fprintf(
+ stderr,
+ "Error: Unexpected field name '%s' at line %d, expected a number\n",
+ field_name.c_str(), *line_idx);
+ exit(EXIT_FAILURE);
+ } else if (!has_list_prefix) {
+ fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx);
+ exit(EXIT_FAILURE);
+ } else if (value > (int)std::numeric_limits<T>::max() ||
+ value < (int)std::numeric_limits<T>::min()) {
+ fprintf(stderr, "Error: Value %d is out of range at line %d\n", value,
+ *line_idx);
+ exit(EXIT_FAILURE);
+ } else {
+ result.push_back(value);
+ }
+ }
+ return result;
+}
+
+ColorProperties parse_color_properties(std::fstream &file, int min_indent,
+ int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ ColorProperties color = {};
+ while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (field_name == "color_range") {
+ color.color_range = value;
+ } else if (field_name == "color_primaries") {
+ color.color_primaries = value;
+ } else if (field_name == "transfer_characteristics") {
+ color.transfer_characteristics = value;
+ } else if (field_name == "matrix_coefficients") {
+ color.matrix_coefficients = value;
+ } else {
+ fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+ field_name.c_str(), *line_idx);
+ }
+ }
+ return color;
+}
+
+AlphaInformation parse_multilayer_layer_alpha(std::fstream &file,
+ int min_indent, int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ AlphaInformation alpha_info = {};
+ while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (field_name == "alpha_use_idc") {
+ alpha_info.alpha_use_idc = (AlphaUse)value;
+ } else if (field_name == "alpha_bit_depth") {
+ alpha_info.alpha_bit_depth = value;
+ } else if (field_name == "alpha_clip_idc") {
+ alpha_info.alpha_clip_idc = value;
+ } else if (field_name == "alpha_incr_flag") {
+ alpha_info.alpha_incr_flag = value;
+ } else if (field_name == "alpha_transparent_value") {
+ alpha_info.alpha_transparent_value = value;
+ } else if (field_name == "alpha_opaque_value") {
+ alpha_info.alpha_opaque_value = value;
+ } else if (field_name == "alpha_color_description") {
+ alpha_info.alpha_color_description =
+ parse_color_properties(file, indent, line_idx);
+ } else if (field_name == "label_type_id") {
+ alpha_info.label_type_id = parse_integer_list<uint16_t>(
+ file, /*min_indent=*/indent + 1, line_idx);
+ } else {
+ fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+ field_name.c_str(), *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ }
+ return alpha_info;
+}
+
+DepthRepresentationElement parse_depth_representation_element(
+ std::fstream &file, int min_indent, int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ DepthRepresentationElement element;
+ while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (field_name == "sign_flag") {
+ element.sign_flag = value;
+ } else if (field_name == "exponent") {
+ element.exponent = value;
+ } else if (field_name == "mantissa") {
+ element.mantissa = value;
+ } else {
+ fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+ field_name.c_str(), *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ }
+ return element;
+}
+
+DepthInformation parse_multilayer_layer_depth(std::fstream &file,
+ int min_indent, int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ DepthInformation depth_info = {};
+ while (parse_line(file, min_indent, /*is_list=*/false, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (field_name == "z_near") {
+ depth_info.z_near =
+ parse_depth_representation_element(file, indent, line_idx);
+ } else if (field_name == "z_far") {
+ depth_info.z_far =
+ parse_depth_representation_element(file, indent, line_idx);
+ } else if (field_name == "d_min") {
+ depth_info.d_min =
+ parse_depth_representation_element(file, indent, line_idx);
+ } else if (field_name == "d_max") {
+ depth_info.d_max =
+ parse_depth_representation_element(file, indent, line_idx);
+ } else if (field_name == "depth_representation_type") {
+ depth_info.depth_representation_type = value;
+ } else if (field_name == "disparity_ref_view_id") {
+ depth_info.disparity_ref_view_id = value;
+ } else if (field_name == "depth_nonlinear_precision") {
+ depth_info.depth_nonlinear_precision = value;
+ } else if (field_name == "depth_nonlinear_representation_model") {
+ depth_info.depth_nonlinear_representation_model =
+ parse_integer_list<uint32_t>(file, /*min_indent=*/indent + 1,
+ line_idx);
+ } else {
+ fprintf(stderr, "Error: Unknown field '%s' at line %d\n",
+ field_name.c_str(), *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ }
+ return depth_info;
+}
+
+std::vector<LayerMetadata> parse_multilayer_layer_metadata(std::fstream &file,
+ int min_indent,
+ int *line_idx) {
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ std::vector<LayerMetadata> layers;
+ while (parse_line(file, min_indent, /*is_list=*/true, &indent,
+ &has_list_prefix, line_idx, &field_name, &value)) {
+ if (has_list_prefix) {
+ if (layers.size() >= kMaxNumSpatialLayers) {
+ fprintf(stderr,
+ "Error: Too many layers at line %d, the maximum is %d\n",
+ *line_idx, kMaxNumSpatialLayers);
+ exit(EXIT_FAILURE);
+ }
+ layers.emplace_back();
+ }
+ if (layers.empty()) {
+ fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ LayerMetadata *layer = &layers.back();
+ // Check if string starts with field name.
+ if ((field_name == "layer_type")) {
+ layer->layer_type = (LayerType)value;
+ } else if ((field_name == "luma_plane_only_flag")) {
+ layer->luma_plane_only_flag = value;
+ } else if ((field_name == "layer_view_type")) {
+ layer->layer_view_type = (MultilayerViewType)value;
+ } else if ((field_name == "group_id")) {
+ layer->group_id = value;
+ } else if ((field_name == "layer_dependency_idc")) {
+ layer->layer_dependency_idc = value;
+ } else if ((field_name == "layer_metadata_scope")) {
+ layer->layer_metadata_scope = (MultilayerMetadataScope)value;
+ } else if ((field_name == "layer_color_description")) {
+ layer->layer_color_description =
+ parse_color_properties(file, indent, line_idx);
+ } else if ((field_name == "alpha")) {
+ layer->global_alpha_info =
+ parse_multilayer_layer_alpha(file,
+ /*min_indent=*/indent + 1, line_idx);
+ } else if (field_name == "depth") {
+ layer->global_depth_info =
+ parse_multilayer_layer_depth(file,
+ /*min_indent=*/indent + 1, line_idx);
+ } else {
+ fprintf(stderr, "Error: Unknown field %s at line %d\n",
+ field_name.c_str(), *line_idx);
+ exit(EXIT_FAILURE);
+ }
+ }
+ return layers;
+}
+
+MultilayerMetadata parse_multilayer_metadata(std::fstream &file) {
+ int line_idx = 0;
+ bool has_list_prefix;
+ int indent = -1;
+ std::string field_name;
+ int value;
+ MultilayerMetadata multilayer = {};
+ while (parse_line(file, /*min_indent=*/0, /*is_list=*/false, &indent,
+ &has_list_prefix, &line_idx, &field_name, &value)) {
+ // Check if string starts with field name.
+ if ((field_name == "use_case")) {
+ multilayer.use_case = (MultilayerUseCase)value;
+ } else if ((field_name == "layers")) {
+ multilayer.layers =
+ parse_multilayer_layer_metadata(file,
+ /*min_indent=*/indent + 1, &line_idx);
+ } else {
+ fprintf(stderr, "Error: Unknown field %s at line %d\n",
+ field_name.c_str(), line_idx);
+ exit(EXIT_FAILURE);
+ }
+ }
+ return multilayer;
+}
+
+std::string format_depth_representation_element(
+ const std::optional<DepthRepresentationElement> &element) {
+ if (!element.has_value()) {
+ return "absent";
+ } else {
+ return "sign_flag " + std::to_string(element->sign_flag) + " exponent " +
+ std::to_string(element->exponent) + " mantissa " +
+ std::to_string(element->mantissa);
+ }
+}
+
+std::string format_color_properties(
+ const std::optional<ColorProperties> &color_properties) {
+ if (!color_properties.has_value()) {
+ return "absent";
+ } else {
+ return std::to_string(color_properties->color_primaries) + "/" +
+ std::to_string(color_properties->transfer_characteristics) + "/" +
+ std::to_string(color_properties->matrix_coefficients) +
+ (color_properties->color_range ? "F" : "L");
+ }
+}
+
+} // namespace
+
+MultilayerMetadata parse_multilayer_file(const char *metadata_path) {
+ std::fstream file(metadata_path);
+ if (!file.is_open()) {
+ fprintf(stderr, "Error: Failed to open %s\n", metadata_path);
+ exit(EXIT_FAILURE);
+ }
+
+ const MultilayerMetadata multilayer = parse_multilayer_metadata(file);
+ if (multilayer.layers.empty()) {
+ fprintf(stderr, "Error: No layers found, there must be at least one\n");
+ exit(EXIT_FAILURE);
+ }
+ return multilayer;
+}
+
+void print_multilayer_metadata(const MultilayerMetadata &multilayer) {
+ printf("=== Multilayer metadata ===\n");
+ printf("use_case: %d\n", multilayer.use_case);
+ for (size_t i = 0; i < multilayer.layers.size(); ++i) {
+ const LayerMetadata &layer = multilayer.layers[i];
+ printf("layer %d\n", (int)i);
+ printf(" layer_type: %d\n", layer.layer_type);
+ printf(" luma_plane_only_flag: %d\n", layer.luma_plane_only_flag);
+ printf(" layer_view_type: %d\n", layer.layer_view_type);
+ printf(" group_id: %d\n", layer.group_id);
+ printf(" layer_dependency_idc: %d\n", layer.layer_dependency_idc);
+ printf(" layer_metadata_scope: %d\n", layer.layer_metadata_scope);
+ printf(" layer_color_description: %s\n",
+ format_color_properties(layer.layer_color_description).c_str());
+ if (layer.layer_type == MULTIALYER_LAYER_TYPE_ALPHA) {
+ printf(" alpha:\n");
+ printf(" alpha_use_idc: %d\n", layer.global_alpha_info.alpha_use_idc);
+ printf(" alpha_bit_depth: %d\n",
+ layer.global_alpha_info.alpha_bit_depth);
+ printf(" alpha_clip_idc: %d\n",
+ layer.global_alpha_info.alpha_clip_idc);
+ printf(" alpha_incr_flag: %d\n",
+ layer.global_alpha_info.alpha_incr_flag);
+ printf(" alpha_transparent_value: %hu\n",
+ layer.global_alpha_info.alpha_transparent_value);
+ printf(" alpha_opaque_value: %hu\n",
+ layer.global_alpha_info.alpha_opaque_value);
+ printf(" alpha_color_description: %s\n",
+ format_color_properties(
+ layer.global_alpha_info.alpha_color_description)
+ .c_str());
+ printf(" label_type_id:");
+ for (uint16_t label_type_id : layer.global_alpha_info.label_type_id) {
+ printf(" %d", label_type_id);
+ }
+ printf("\n");
+ } else if (layer.layer_type == MULTIALYER_LAYER_TYPE_DEPTH) {
+ printf(" depth:\n");
+ printf(" z_near_flag %s\n",
+ format_depth_representation_element(layer.global_depth_info.z_near)
+ .c_str());
+ printf(" z_far_flag %s\n",
+ format_depth_representation_element(layer.global_depth_info.z_far)
+ .c_str());
+ printf(" d_min_flag %s\n",
+ format_depth_representation_element(layer.global_depth_info.d_min)
+ .c_str());
+ printf(" d_max_flag %s\n",
+ format_depth_representation_element(layer.global_depth_info.d_max)
+ .c_str());
+ printf(" depth_representation_type: %d\n",
+ layer.global_depth_info.depth_representation_type);
+ printf(" disparity_ref_view_id: %d\n",
+ layer.global_depth_info.disparity_ref_view_id);
+ printf(" depth_nonlinear_precision: %d\n",
+ layer.global_depth_info.depth_nonlinear_precision);
+ printf(" depth_nonlinear_representation_model:");
+ for (uint32_t depth_nonlinear_representation_model :
+ layer.global_depth_info.depth_nonlinear_representation_model) {
+ printf(" %d", depth_nonlinear_representation_model);
+ }
+ printf("\n");
+ }
+ }
+ printf("\n");
+}
+
+} // namespace libaom_examples
diff --git a/examples/multilayer_metadata.h b/examples/multilayer_metadata.h
new file mode 100644
index 0000000..9001542
--- /dev/null
+++ b/examples/multilayer_metadata.h
@@ -0,0 +1,132 @@
+#ifndef AOM_EXAMPLES_MULTILAYER_METADATA_H_
+#define AOM_EXAMPLES_MULTILAYER_METADATA_H_
+
+#include <cstdint>
+#include <optional>
+#include <vector>
+
+namespace libaom_examples {
+
+struct ColorProperties {
+ bool color_range; // true for full range values
+ uint8_t color_primaries;
+ uint8_t transfer_characteristics;
+ uint8_t matrix_coefficients;
+};
+
+enum AlphaUse {
+ ALPHA_STRAIGHT = 0,
+ ALPHA_PREMULTIPLIED = 1,
+ ALPHA_SEGMENTATION = 2,
+ ALPHA_UNSPECIFIED = 3,
+};
+
+struct AlphaInformation {
+ AlphaUse alpha_use_idc; // [0, 7]
+ uint8_t alpha_bit_depth; // [8, 15]
+ uint8_t alpha_clip_idc; // [0, 3]
+ bool alpha_incr_flag;
+ uint16_t alpha_transparent_value; // [0, 1<<alpha_bit_depth]
+ uint16_t alpha_opaque_value; // [0, 1<<alpha_bit_depth]
+ // Relevant for ALPHA_STRAIGHT only.
+ std::optional<ColorProperties> alpha_color_description;
+ // Relevant for ALPHA_SEGMENTATION only.
+ // Must be either empty or have the same size as the number of values between
+ // alpha_transparent_value and alpha_opaque_value, inclusively.
+ std::vector<uint16_t> label_type_id;
+};
+
+// TODO: maryla - parse floats directly and convert to this wire
+// representation at write time.
+struct DepthRepresentationElement {
+ bool sign_flag;
+ uint8_t exponent; // [0, 126]
+ uint32_t mantissa;
+};
+
+struct DepthInformation {
+ std::optional<DepthRepresentationElement> z_near;
+ std::optional<DepthRepresentationElement> z_far;
+ std::optional<DepthRepresentationElement> d_min;
+ std::optional<DepthRepresentationElement> d_max;
+ uint8_t depth_representation_type; // [0, 15]
+ uint8_t disparity_ref_view_id; // [0, 3]
+ uint8_t depth_nonlinear_precision; // [8, 23]
+ // [0, 1<<depth_nonlinear_precision]
+ std::vector<uint32_t> depth_nonlinear_representation_model;
+};
+
+enum MultilayerUseCase {
+ MULTILAYER_USE_CASE_UNSPECIFIED = 0,
+ MULTILAYER_USE_CASE_ALPHA = 1,
+ MULTILAYER_USE_CASE_DEPTH = 2,
+ MULTILAYER_USE_CASE_STEREO = 3,
+ MULTILAYER_USE_CASE_STEREO_ALPHA_GLOBAL = 4,
+ MULTILAYER_USE_CASE_STEREO_DEPTH_GLOBAL = 5,
+ MULTILAYER_USE_CASE_STEREO_ALPHA = 6,
+ MULTILAYER_USE_CASE_STEREO_DEPTH = 7,
+ MULTILAYER_USE_CASE_444 = 8,
+ MULTILAYER_USE_CASE_420_444 = 9,
+ MULTILAYER_USE_CASE_444_ALPHA = 10,
+ MULTILAYER_USE_CASE_444_DEPTH = 11,
+};
+
+enum LayerType {
+ MULTIALYER_LAYER_TYPE_UNSPECIFIED = 0,
+ MULTIALYER_LAYER_TYPE_TEXTURE = 1,
+ MULTIALYER_LAYER_TYPE_TEXTURE_1 = 2,
+ MULTIALYER_LAYER_TYPE_TEXTURE_2 = 3,
+ MULTIALYER_LAYER_TYPE_TEXTURE_3 = 4,
+ MULTIALYER_LAYER_TYPE_ALPHA = 5,
+ MULTIALYER_LAYER_TYPE_DEPTH = 6,
+};
+
+enum MultilayerMetadataScope {
+ SCOPE_UNSPECIFIED = 0,
+ SCOPE_LOCAL = 1,
+ SCOPE_GLOBAL = 2,
+ SCOPE_MIXED = 3,
+};
+
+enum MultilayerViewType {
+ VIEW_UNSPECIFIED = 0,
+ VIEW_CENTER = 1,
+ VIEW_LEFT = 2,
+ VIEW_RIGHT = 3,
+};
+
+struct LayerMetadata {
+ LayerType layer_type; // [0, 31]
+ bool luma_plane_only_flag;
+ MultilayerViewType layer_view_type; // [0, 7]
+ uint8_t group_id; // [0, 3]
+ uint8_t layer_dependency_idc; // [0, 7]
+ MultilayerMetadataScope layer_metadata_scope; // [0, 3]
+
+ std::optional<ColorProperties> layer_color_description;
+
+ // Relevant for MULTIALYER_LAYER_TYPE_ALPHA with SCOPE_GLOBAL or SCOPE_MIXED.
+ AlphaInformation global_alpha_info;
+ // Relevant for MULTIALYER_LAYER_TYPE_DEPTH with SCOPE_GLOBAL or SCOPE_MIXED.
+ DepthInformation global_depth_info;
+};
+
+struct MultilayerMetadata {
+ MultilayerUseCase use_case; // [0, 63]
+ std::vector<LayerMetadata> layers;
+};
+
+// Parses a multilayer metadata file.
+// Terminates the process in case of error.
+// The metadata is expected to be in a subset of the YAML format supporting
+// simple lists and maps with integer values, and comments.
+// Does very little validation on the metadata, e.g. does not check that the
+// values are in the correct range.
+MultilayerMetadata parse_multilayer_file(const char *metadata_path);
+
+// Prints the multilayer metadata to stdout for debugging.
+void print_multilayer_metadata(const MultilayerMetadata &multilayer);
+
+} // namespace libaom_examples
+
+#endif // AOM_EXAMPLES_MULTILAYER_METADATA_H_
diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc
index ad40132..b115a0a 100644
--- a/examples/svc_encoder_rtc.cc
+++ b/examples/svc_encoder_rtc.cc
@@ -20,6 +20,7 @@
#include <string.h>
#include <memory>
+#include <optional>
#include "config/aom_config.h"
@@ -27,13 +28,17 @@
#include "aom/aom_decoder.h"
#endif
#include "aom/aom_encoder.h"
+#include "aom/aom_image.h"
+#include "aom/aom_integer.h"
#include "aom/aomcx.h"
+#include "aom_dsp/bitwriter_buffer.h"
+#include "aom_ports/aom_timer.h"
+#include "av1/ratectrl_rtc.h"
#include "common/args.h"
#include "common/tools_common.h"
#include "common/video_writer.h"
#include "examples/encoder_util.h"
-#include "aom_ports/aom_timer.h"
-#include "av1/ratectrl_rtc.h"
+#include "examples/multilayer_metadata.h"
#define OPTION_BUFFER_SIZE 1024
#define MAX_NUM_SPATIAL_LAYERS 4
@@ -51,6 +56,7 @@
int show_psnr;
bool use_external_rc;
bool scale_factors_explicitly_set;
+ const char *multilayer_metadata_file;
} AppInput;
typedef enum {
@@ -116,6 +122,9 @@
};
static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
NULL, "tune-content", 1, "Tune content type", tune_content_enum);
+static const arg_def_t multilayer_metadata_file_arg =
+ ARG_DEF("ml", "multilayer_metadata_file", 1,
+ "Experimental: path to multilayer metadata file");
#if CONFIG_AV1_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
@@ -394,6 +403,8 @@
app_input->show_psnr = 1;
} else if (arg_match(&arg, &ext_rc_arg, argi)) {
app_input->use_external_rc = true;
+ } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
+ app_input->multilayer_metadata_file = arg.val;
} else {
++argj;
}
@@ -1353,6 +1364,201 @@
}
}
+static void write_literal(struct aom_write_bit_buffer *wb, int data, int bits,
+ int offset = 0) {
+ const int to_write = data - offset;
+ if (to_write < 0 || to_write >= (1 << bits)) {
+ die("Invalid data, value %d out of range [%d, %d]\n", data, offset,
+ offset + (1 << bits) - 1);
+ }
+ aom_wb_write_literal(wb, to_write, bits);
+}
+
+static void write_depth_representation_element(
+ struct aom_write_bit_buffer *buffer,
+ const std::optional<libaom_examples::DepthRepresentationElement> &element) {
+ if (!element.has_value()) {
+ return;
+ }
+ write_literal(buffer, element->sign_flag, 1);
+ write_literal(buffer, element->exponent, 7);
+ int mantissa_len = 1;
+ while (mantissa_len < 32 && (element->mantissa >> mantissa_len != 0)) {
+ ++mantissa_len;
+ }
+ write_literal(buffer, mantissa_len - 1, 5);
+ write_literal(buffer, element->mantissa, mantissa_len);
+}
+
+static void write_color_properties(
+ struct aom_write_bit_buffer *buffer,
+ const std::optional<libaom_examples::ColorProperties> &color_properties) {
+ write_literal(buffer, color_properties.has_value(), 1);
+ if (color_properties.has_value()) {
+ write_literal(buffer, color_properties->color_range, 1);
+ write_literal(buffer, color_properties->color_primaries, 8);
+ write_literal(buffer, color_properties->transfer_characteristics, 8);
+ write_literal(buffer, color_properties->matrix_coefficients, 8);
+ } else {
+ write_literal(buffer, 0, 1); // reserved_1bit
+ }
+}
+
+static void add_multilayer_metadata(
+ aom_image_t *frame,
+ const std::optional<libaom_examples::MultilayerMetadata> &multilayer) {
+ if (!multilayer.has_value()) {
+ return;
+ }
+ // Pretty large buffer to accommodate the largest multilayer metadata
+ // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
+ std::vector<uint8_t> data(66000 * multilayer->layers.size());
+ struct aom_write_bit_buffer buffer = { data.data(), 0 };
+
+ write_literal(&buffer, multilayer->use_case, 6);
+ if (multilayer->layers.empty()) {
+ die("Invalid multilayer metadata, no layers found\n");
+ } else if (multilayer->layers.size() > MAX_NUM_SPATIAL_LAYERS) {
+ die("Invalid multilayer metadata, too many layers (max is %d)\n",
+ MAX_NUM_SPATIAL_LAYERS);
+ }
+ write_literal(&buffer, (int)multilayer->layers.size() - 1, 2);
+ assert(buffer.bit_offset % 8 == 0);
+ for (size_t i = 0; i < multilayer->layers.size(); ++i) {
+ const libaom_examples::LayerMetadata &layer = multilayer->layers[i];
+ // Alpha info with segmentation with labels can be up to about 66k bytes,
+ // which requires 3 bytes to encode in leb128.
+ const int bytes_reserved_for_size = 3;
+ // Placeholder for layer_metadata_size which will be written later.
+ write_literal(&buffer, 0, bytes_reserved_for_size * 8);
+ const uint32_t metadata_start = buffer.bit_offset;
+ write_literal(&buffer, (int)i, 2); // ml_spatial_id
+ write_literal(&buffer, layer.layer_type, 5);
+ write_literal(&buffer, layer.luma_plane_only_flag, 1);
+ write_literal(&buffer, layer.layer_view_type, 3);
+ write_literal(&buffer, layer.group_id, 2);
+ write_literal(&buffer, layer.layer_dependency_idc, 3);
+ write_literal(&buffer, layer.layer_metadata_scope, 2);
+ write_literal(&buffer, 0, 4); // ml_reserved_4bits
+
+ if (i > 0) {
+ write_color_properties(&buffer, layer.layer_color_description);
+ } else {
+ write_literal(&buffer, 0, 2); // ml_reserved_2bits
+ }
+ assert(buffer.bit_offset % 8 == 0);
+
+ if (multilayer->use_case < 12) {
+ if (layer.layer_type == libaom_examples::MULTIALYER_LAYER_TYPE_ALPHA &&
+ layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
+ const libaom_examples::AlphaInformation &alpha_info =
+ layer.global_alpha_info;
+ write_literal(&buffer, alpha_info.alpha_use_idc, 3);
+ write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
+ write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
+ write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
+ write_literal(&buffer, alpha_info.alpha_transparent_value,
+ alpha_info.alpha_bit_depth);
+ write_literal(&buffer, alpha_info.alpha_opaque_value,
+ alpha_info.alpha_bit_depth);
+ if (buffer.bit_offset % 8 != 0) {
+ // ai_byte_alignment_bits
+ write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
+ }
+ assert(buffer.bit_offset % 8 == 0);
+
+ if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
+ write_literal(&buffer, 0, 6); // ai_reserved_6bits
+ write_color_properties(&buffer, alpha_info.alpha_color_description);
+ } else if (alpha_info.alpha_use_idc ==
+ libaom_examples::ALPHA_SEGMENTATION) {
+ write_literal(&buffer, 0, 7); // ai_reserved_7bits
+ write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
+ if (!alpha_info.label_type_id.empty()) {
+ const size_t num_values =
+ std::abs(alpha_info.alpha_transparent_value -
+ alpha_info.alpha_opaque_value) +
+ 1;
+ if (!alpha_info.label_type_id.empty() &&
+ alpha_info.label_type_id.size() != num_values) {
+ die("Invalid multilayer metadata, label_type_id size must be "
+ "equal to the range of alpha values between "
+ "alpha_transparent_value and alpha_opaque_value (expected "
+ "%d values, found %d values)\n",
+ (int)num_values, (int)alpha_info.label_type_id.size());
+ }
+ for (size_t j = 0; j < num_values; ++j) {
+ write_literal(&buffer, alpha_info.label_type_id[j], 16);
+ }
+ }
+ }
+ assert(buffer.bit_offset % 8 == 0);
+ } else if (layer.layer_type ==
+ libaom_examples::MULTIALYER_LAYER_TYPE_DEPTH &&
+ layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
+ const libaom_examples::DepthInformation &depth_info =
+ layer.global_depth_info;
+ write_literal(&buffer, depth_info.z_near.has_value(), 1);
+ write_literal(&buffer, depth_info.z_far.has_value(), 1);
+ write_literal(&buffer, depth_info.d_min.has_value(), 1);
+ write_literal(&buffer, depth_info.d_max.has_value(), 1);
+ write_literal(&buffer, depth_info.depth_representation_type, 4);
+ if (depth_info.d_min.has_value() || depth_info.d_max.has_value()) {
+ write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
+ }
+ write_depth_representation_element(&buffer, depth_info.z_near);
+ write_depth_representation_element(&buffer, depth_info.z_far);
+ write_depth_representation_element(&buffer, depth_info.d_min);
+ write_depth_representation_element(&buffer, depth_info.d_max);
+ if (depth_info.depth_representation_type == 3) {
+ write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
+ /*offset=*/8);
+ if (depth_info.depth_nonlinear_representation_model.empty() ||
+ depth_info.depth_nonlinear_representation_model.size() >
+ (1 << 6)) {
+ die("Invalid multilayer metadata, if depth_nonlinear_precision "
+ "== 3, depth_nonlinear_representation_model must have 1 to "
+ "%d elements, found %d elements\n",
+ 1 << 6,
+ (int)depth_info.depth_nonlinear_representation_model.size());
+ }
+ write_literal(
+ &buffer,
+ (int)depth_info.depth_nonlinear_representation_model.size() - 1,
+ 6);
+ const int bit_depth =
+ depth_info.depth_nonlinear_precision + 8; // XXX + 9 ???
+ for (const uint32_t v :
+ depth_info.depth_nonlinear_representation_model) {
+ write_literal(&buffer, v, bit_depth);
+ }
+ }
+ if (buffer.bit_offset % 8 != 0) {
+ write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
+ }
+ assert(buffer.bit_offset % 8 == 0);
+ }
+ }
+ assert(buffer.bit_offset % 8 == 0);
+
+ const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
+ const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
+ size_t coded_size;
+ if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
+ bytes_reserved_for_size,
+ &buffer.bit_buffer[size_pos], &coded_size)) {
+ // Need to increase bytes_reserved_for_size in the code above.
+ die("Error: Failed to write metadata size\n");
+ }
+ }
+ assert(buffer.bit_offset % 8 == 0);
+ if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
+ buffer.bit_buffer, buffer.bit_offset / 8,
+ AOM_MIF_KEY_FRAME)) {
+ die("Error: Failed to add metadata\n");
+ }
+}
+
#if CONFIG_AV1_DECODER
// Returns whether there is a mismatch between the encoder's new frame and the
// decoder's new frame.
@@ -1660,6 +1866,13 @@
svc_params.framerate_factor[2] = 1;
}
+ std::optional<libaom_examples::MultilayerMetadata> multilayer_metadata;
+ if (app_input.multilayer_metadata_file != NULL) {
+ multilayer_metadata = libaom_examples::parse_multilayer_file(
+ app_input.multilayer_metadata_file);
+ libaom_examples::print_multilayer_metadata(multilayer_metadata.value());
+ }
+
framerate = cfg.g_timebase.den / cfg.g_timebase.num;
set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
@@ -1836,6 +2049,7 @@
aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
&ref_frame_comp_pred);
}
+ add_multilayer_metadata(&raw, multilayer_metadata);
// Set the speed per layer.
if (test_speed_per_layer) {
int speed_per_layer = 10;
diff --git a/test/svc_encoder_rtc.sh b/test/svc_encoder_rtc.sh
index c8b2891..8df2f6b 100644
--- a/test/svc_encoder_rtc.sh
+++ b/test/svc_encoder_rtc.sh
@@ -83,6 +83,36 @@
svc_encoder_s2_t1() {
local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}"
local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc"
+ local metadata_file="${AOM_TEST_OUTPUT_DIR}/multilayer_metadata.yaml"
+ cat > "${metadata_file}" <<EOF
+
+ # test comment
+
+use_case: 1 # alpha
+layers:
+# first layer...
+use_case: 1 # alpha
+layers:
+ - layer_type: 5 # alpha
+ luma_plane_only_flag: 1
+ layer_metadata_scope: 2 # global
+ alpha:
+ alpha_use_idc: 2 # segmentation
+ alpha_bit_depth: 8
+ alpha_transparent_value: 0
+ alpha_opaque_value: 4
+ label_type_id:
+ - 5
+ - 3
+ - 9
+ - 128
+ - 42
+
+# second layer...
+ - layer_type: 1 # texture
+ luma_plane_only_flag: 0
+
+EOF
if [ ! -x "${encoder}" ]; then
elog "${encoder} does not exist or is not executable."
@@ -99,6 +129,7 @@
"--spatial-layers=2" \
"--temporal-layers=1" \
"--timebase=1/30" \
+ "--multilayer_metadata_file=${metadata_file}" \
"${YUV_RAW_INPUT}" \
"${YUV_RAW_INPUT}" \
"-o ${output_file}" \