svc_encoder_rtc: add multilayer metadata Read metadata from simplified yaml file and write as frame metadata. Bug: 377851082 Change-Id: I9200d2736376bf79a1c3d41748e225a8d46179f7
diff --git a/CMakeLists.txt b/CMakeLists.txt index 55b2f97..a228fad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt
@@ -200,7 +200,9 @@ "${AOM_ROOT}/common/y4minput.c" "${AOM_ROOT}/common/y4minput.h" "${AOM_ROOT}/examples/encoder_util.h" - "${AOM_ROOT}/examples/encoder_util.c") + "${AOM_ROOT}/examples/encoder_util.c" + "${AOM_ROOT}/examples/multilayer_metadata.h" + "${AOM_ROOT}/examples/multilayer_metadata.cc") list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c" "${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c" @@ -402,6 +404,7 @@ if(CONFIG_AV1_ENCODER) add_library(aom_encoder_app_util OBJECT ${AOM_ENCODER_APP_UTIL_SOURCES}) set_property(TARGET ${example} PROPERTY FOLDER examples) + set_property(TARGET aom_encoder_app_util PROPERTY CXX_STANDARD 17) endif() endif() @@ -521,6 +524,7 @@ $<TARGET_OBJECTS:aom_common_app_util> $<TARGET_OBJECTS:aom_encoder_app_util>) target_link_libraries(svc_encoder_rtc ${AOM_LIB_LINK_TYPE} aom_av1_rc) + set_property(TARGET svc_encoder_rtc PROPERTY CXX_STANDARD 17) # Maintain a list of encoder example targets. list(APPEND AOM_ENCODER_EXAMPLE_TARGETS aomenc lossless_encoder set_maps
diff --git a/examples/multilayer_metadata.cc b/examples/multilayer_metadata.cc new file mode 100644 index 0000000..aabad46 --- /dev/null +++ b/examples/multilayer_metadata.cc
@@ -0,0 +1,471 @@ +#include "examples/multilayer_metadata.h" + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <cstdio> +#include <fstream> +#include <iostream> +#include <limits> +#include <optional> +#include <string> +#include <vector> + +#include "aom/aom_integer.h" +#include "examples/multilayer_metadata.h" + +extern void usage_exit(void); + +namespace libaom_examples { + +namespace { + +constexpr int kMaxNumSpatialLayers = 4; + +// Removes comments and trailing spaces from the line. +void cleanup_line(std::string &line) { + // Remove everything after the first '#'. + std::size_t comment_pos = line.find('#'); + if (comment_pos != std::string::npos) { + line.resize(comment_pos); + } + // Remove spaces at the end of the line. + while (!line.empty() && line.back() == ' ') { + line.resize(line.length() - 1); + } +} + +// Finds the indentation level of the line, and sets 'has_list_prefix' to true +// if the line has a '-' indicating a new item in a list. +void get_indent(const std::string &line, int *indent, bool *has_list_prefix) { + *indent = 0; + *has_list_prefix = 0; + while ( + *indent < (int)line.length() && + (line[*indent] == ' ' || line[*indent] == '\t' || line[*indent] == '-')) { + if (line[*indent] == '-') { + *has_list_prefix = true; + } + ++(*indent); + } +} + +/* + * Parses the next line from the file, skipping empty lines. + * Returns false if the end of the file was reached, or if the line was indented + * less than 'min_indent', meaning that parsing should go back to the previous + * function in the stack. + * + * 'min_indent' is the minimum indentation expected for the next line. + * 'is_list' must be true if the line is allowed to contain list items ('-'). + * 'indent' MUST be initialized to -1 before the first call, and is then set to + * the indentation of the line. + * 'has_list_prefix' is set to true if the line starts a new list item with '-'. + * 'line_idx' is set to the index of the last line read. + * 'field_name' is set to the field name if the line contains a colon, or to an + * empty string otherwise. + * 'value' is set to the integer value of the line, or to 0 if the line doesn't + * contain a number. + */ +bool parse_line(std::fstream &file, int min_indent, bool is_list, int *indent, + bool *has_list_prefix, int *line_idx, std::string *field_name, + int *value) { + *field_name = ""; + *value = 0; + std::string line; + std::fstream::pos_type prev_file_position; + const int prev_indent = *indent; + while (prev_file_position = file.tellg(), std::getline(file, line)) { + cleanup_line(line); + get_indent(line, indent, has_list_prefix); + line = line.substr(*indent); // skip indentation + // If the line is indented less than 'min_indent', it belongs to the outer + // object, and parsing should go back to the previous function in the stack. + if (!line.empty() && *indent < min_indent) { + // Undo reading the last line. + if (!file.seekp(prev_file_position, std::ios::beg)) { + fprintf(stderr, "Failed to seek to previous file position\n"); + exit(EXIT_FAILURE); + } + return false; + } + + ++(*line_idx); + if (line.empty()) continue; + + if (prev_indent >= 0 && prev_indent != *indent) { + fprintf(stderr, "Error: Bad indentation at line %d\n", *line_idx); + exit(EXIT_FAILURE); + } + if (*has_list_prefix && !is_list) { + fprintf(stderr, "Error: Unexpected list item at line %d\n", *line_idx); + exit(EXIT_FAILURE); + } + + std::string value_str = line; + size_t colon_pos = line.find(':'); + if (colon_pos != std::string::npos) { + *field_name = line.substr(0, colon_pos); + value_str = line.substr(colon_pos + 1); + } + char *endptr; + *value = (int)strtol(&line[colon_pos + 1], &endptr, 10); + if (*endptr != '\0') { + fprintf(stderr, "Error: Failed to parse number from '%s'\n", + value_str.c_str()); + exit(EXIT_FAILURE); + } + return true; + } + return false; // Reached the end of the file. +} + +template <typename T> +std::vector<T> parse_integer_list(std::fstream &file, int min_indent, + int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + std::vector<T> result; + while (parse_line(file, min_indent, /*is_list=*/true, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (!field_name.empty()) { + fprintf( + stderr, + "Error: Unexpected field name '%s' at line %d, expected a number\n", + field_name.c_str(), *line_idx); + exit(EXIT_FAILURE); + } else if (!has_list_prefix) { + fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx); + exit(EXIT_FAILURE); + } else if (value > (int)std::numeric_limits<T>::max() || + value < (int)std::numeric_limits<T>::min()) { + fprintf(stderr, "Error: Value %d is out of range at line %d\n", value, + *line_idx); + exit(EXIT_FAILURE); + } else { + result.push_back(value); + } + } + return result; +} + +ColorProperties parse_color_properties(std::fstream &file, int min_indent, + int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + ColorProperties color = {}; + while (parse_line(file, min_indent, /*is_list=*/false, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (field_name == "color_range") { + color.color_range = value; + } else if (field_name == "color_primaries") { + color.color_primaries = value; + } else if (field_name == "transfer_characteristics") { + color.transfer_characteristics = value; + } else if (field_name == "matrix_coefficients") { + color.matrix_coefficients = value; + } else { + fprintf(stderr, "Error: Unknown field '%s' at line %d\n", + field_name.c_str(), *line_idx); + } + } + return color; +} + +AlphaInformation parse_multilayer_layer_alpha(std::fstream &file, + int min_indent, int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + AlphaInformation alpha_info = {}; + while (parse_line(file, min_indent, /*is_list=*/false, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (field_name == "alpha_use_idc") { + alpha_info.alpha_use_idc = (AlphaUse)value; + } else if (field_name == "alpha_bit_depth") { + alpha_info.alpha_bit_depth = value; + } else if (field_name == "alpha_clip_idc") { + alpha_info.alpha_clip_idc = value; + } else if (field_name == "alpha_incr_flag") { + alpha_info.alpha_incr_flag = value; + } else if (field_name == "alpha_transparent_value") { + alpha_info.alpha_transparent_value = value; + } else if (field_name == "alpha_opaque_value") { + alpha_info.alpha_opaque_value = value; + } else if (field_name == "alpha_color_description") { + alpha_info.alpha_color_description = + parse_color_properties(file, indent, line_idx); + } else if (field_name == "label_type_id") { + alpha_info.label_type_id = parse_integer_list<uint16_t>( + file, /*min_indent=*/indent + 1, line_idx); + } else { + fprintf(stderr, "Error: Unknown field '%s' at line %d\n", + field_name.c_str(), *line_idx); + exit(EXIT_FAILURE); + } + } + return alpha_info; +} + +DepthRepresentationElement parse_depth_representation_element( + std::fstream &file, int min_indent, int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + DepthRepresentationElement element; + while (parse_line(file, min_indent, /*is_list=*/false, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (field_name == "sign_flag") { + element.sign_flag = value; + } else if (field_name == "exponent") { + element.exponent = value; + } else if (field_name == "mantissa") { + element.mantissa = value; + } else { + fprintf(stderr, "Error: Unknown field '%s' at line %d\n", + field_name.c_str(), *line_idx); + exit(EXIT_FAILURE); + } + } + return element; +} + +DepthInformation parse_multilayer_layer_depth(std::fstream &file, + int min_indent, int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + DepthInformation depth_info = {}; + while (parse_line(file, min_indent, /*is_list=*/false, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (field_name == "z_near") { + depth_info.z_near = + parse_depth_representation_element(file, indent, line_idx); + } else if (field_name == "z_far") { + depth_info.z_far = + parse_depth_representation_element(file, indent, line_idx); + } else if (field_name == "d_min") { + depth_info.d_min = + parse_depth_representation_element(file, indent, line_idx); + } else if (field_name == "d_max") { + depth_info.d_max = + parse_depth_representation_element(file, indent, line_idx); + } else if (field_name == "depth_representation_type") { + depth_info.depth_representation_type = value; + } else if (field_name == "disparity_ref_view_id") { + depth_info.disparity_ref_view_id = value; + } else if (field_name == "depth_nonlinear_precision") { + depth_info.depth_nonlinear_precision = value; + } else if (field_name == "depth_nonlinear_representation_model") { + depth_info.depth_nonlinear_representation_model = + parse_integer_list<uint32_t>(file, /*min_indent=*/indent + 1, + line_idx); + } else { + fprintf(stderr, "Error: Unknown field '%s' at line %d\n", + field_name.c_str(), *line_idx); + exit(EXIT_FAILURE); + } + } + return depth_info; +} + +std::vector<LayerMetadata> parse_multilayer_layer_metadata(std::fstream &file, + int min_indent, + int *line_idx) { + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + std::vector<LayerMetadata> layers; + while (parse_line(file, min_indent, /*is_list=*/true, &indent, + &has_list_prefix, line_idx, &field_name, &value)) { + if (has_list_prefix) { + if (layers.size() >= kMaxNumSpatialLayers) { + fprintf(stderr, + "Error: Too many layers at line %d, the maximum is %d\n", + *line_idx, kMaxNumSpatialLayers); + exit(EXIT_FAILURE); + } + layers.emplace_back(); + } + if (layers.empty()) { + fprintf(stderr, "Error: Missing list prefix '-' at line %d\n", *line_idx); + exit(EXIT_FAILURE); + } + LayerMetadata *layer = &layers.back(); + // Check if string starts with field name. + if ((field_name == "layer_type")) { + layer->layer_type = (LayerType)value; + } else if ((field_name == "luma_plane_only_flag")) { + layer->luma_plane_only_flag = value; + } else if ((field_name == "layer_view_type")) { + layer->layer_view_type = (MultilayerViewType)value; + } else if ((field_name == "group_id")) { + layer->group_id = value; + } else if ((field_name == "layer_dependency_idc")) { + layer->layer_dependency_idc = value; + } else if ((field_name == "layer_metadata_scope")) { + layer->layer_metadata_scope = (MultilayerMetadataScope)value; + } else if ((field_name == "layer_color_description")) { + layer->layer_color_description = + parse_color_properties(file, indent, line_idx); + } else if ((field_name == "alpha")) { + layer->global_alpha_info = + parse_multilayer_layer_alpha(file, + /*min_indent=*/indent + 1, line_idx); + } else if (field_name == "depth") { + layer->global_depth_info = + parse_multilayer_layer_depth(file, + /*min_indent=*/indent + 1, line_idx); + } else { + fprintf(stderr, "Error: Unknown field %s at line %d\n", + field_name.c_str(), *line_idx); + exit(EXIT_FAILURE); + } + } + return layers; +} + +MultilayerMetadata parse_multilayer_metadata(std::fstream &file) { + int line_idx = 0; + bool has_list_prefix; + int indent = -1; + std::string field_name; + int value; + MultilayerMetadata multilayer = {}; + while (parse_line(file, /*min_indent=*/0, /*is_list=*/false, &indent, + &has_list_prefix, &line_idx, &field_name, &value)) { + // Check if string starts with field name. + if ((field_name == "use_case")) { + multilayer.use_case = (MultilayerUseCase)value; + } else if ((field_name == "layers")) { + multilayer.layers = + parse_multilayer_layer_metadata(file, + /*min_indent=*/indent + 1, &line_idx); + } else { + fprintf(stderr, "Error: Unknown field %s at line %d\n", + field_name.c_str(), line_idx); + exit(EXIT_FAILURE); + } + } + return multilayer; +} + +std::string format_depth_representation_element( + const std::optional<DepthRepresentationElement> &element) { + if (!element.has_value()) { + return "absent"; + } else { + return "sign_flag " + std::to_string(element->sign_flag) + " exponent " + + std::to_string(element->exponent) + " mantissa " + + std::to_string(element->mantissa); + } +} + +std::string format_color_properties( + const std::optional<ColorProperties> &color_properties) { + if (!color_properties.has_value()) { + return "absent"; + } else { + return std::to_string(color_properties->color_primaries) + "/" + + std::to_string(color_properties->transfer_characteristics) + "/" + + std::to_string(color_properties->matrix_coefficients) + + (color_properties->color_range ? "F" : "L"); + } +} + +} // namespace + +MultilayerMetadata parse_multilayer_file(const char *metadata_path) { + std::fstream file(metadata_path); + if (!file.is_open()) { + fprintf(stderr, "Error: Failed to open %s\n", metadata_path); + exit(EXIT_FAILURE); + } + + const MultilayerMetadata multilayer = parse_multilayer_metadata(file); + if (multilayer.layers.empty()) { + fprintf(stderr, "Error: No layers found, there must be at least one\n"); + exit(EXIT_FAILURE); + } + return multilayer; +} + +void print_multilayer_metadata(const MultilayerMetadata &multilayer) { + printf("=== Multilayer metadata ===\n"); + printf("use_case: %d\n", multilayer.use_case); + for (size_t i = 0; i < multilayer.layers.size(); ++i) { + const LayerMetadata &layer = multilayer.layers[i]; + printf("layer %d\n", (int)i); + printf(" layer_type: %d\n", layer.layer_type); + printf(" luma_plane_only_flag: %d\n", layer.luma_plane_only_flag); + printf(" layer_view_type: %d\n", layer.layer_view_type); + printf(" group_id: %d\n", layer.group_id); + printf(" layer_dependency_idc: %d\n", layer.layer_dependency_idc); + printf(" layer_metadata_scope: %d\n", layer.layer_metadata_scope); + printf(" layer_color_description: %s\n", + format_color_properties(layer.layer_color_description).c_str()); + if (layer.layer_type == MULTIALYER_LAYER_TYPE_ALPHA) { + printf(" alpha:\n"); + printf(" alpha_use_idc: %d\n", layer.global_alpha_info.alpha_use_idc); + printf(" alpha_bit_depth: %d\n", + layer.global_alpha_info.alpha_bit_depth); + printf(" alpha_clip_idc: %d\n", + layer.global_alpha_info.alpha_clip_idc); + printf(" alpha_incr_flag: %d\n", + layer.global_alpha_info.alpha_incr_flag); + printf(" alpha_transparent_value: %hu\n", + layer.global_alpha_info.alpha_transparent_value); + printf(" alpha_opaque_value: %hu\n", + layer.global_alpha_info.alpha_opaque_value); + printf(" alpha_color_description: %s\n", + format_color_properties( + layer.global_alpha_info.alpha_color_description) + .c_str()); + printf(" label_type_id:"); + for (uint16_t label_type_id : layer.global_alpha_info.label_type_id) { + printf(" %d", label_type_id); + } + printf("\n"); + } else if (layer.layer_type == MULTIALYER_LAYER_TYPE_DEPTH) { + printf(" depth:\n"); + printf(" z_near_flag %s\n", + format_depth_representation_element(layer.global_depth_info.z_near) + .c_str()); + printf(" z_far_flag %s\n", + format_depth_representation_element(layer.global_depth_info.z_far) + .c_str()); + printf(" d_min_flag %s\n", + format_depth_representation_element(layer.global_depth_info.d_min) + .c_str()); + printf(" d_max_flag %s\n", + format_depth_representation_element(layer.global_depth_info.d_max) + .c_str()); + printf(" depth_representation_type: %d\n", + layer.global_depth_info.depth_representation_type); + printf(" disparity_ref_view_id: %d\n", + layer.global_depth_info.disparity_ref_view_id); + printf(" depth_nonlinear_precision: %d\n", + layer.global_depth_info.depth_nonlinear_precision); + printf(" depth_nonlinear_representation_model:"); + for (uint32_t depth_nonlinear_representation_model : + layer.global_depth_info.depth_nonlinear_representation_model) { + printf(" %d", depth_nonlinear_representation_model); + } + printf("\n"); + } + } + printf("\n"); +} + +} // namespace libaom_examples
diff --git a/examples/multilayer_metadata.h b/examples/multilayer_metadata.h new file mode 100644 index 0000000..9001542 --- /dev/null +++ b/examples/multilayer_metadata.h
@@ -0,0 +1,132 @@ +#ifndef AOM_EXAMPLES_MULTILAYER_METADATA_H_ +#define AOM_EXAMPLES_MULTILAYER_METADATA_H_ + +#include <cstdint> +#include <optional> +#include <vector> + +namespace libaom_examples { + +struct ColorProperties { + bool color_range; // true for full range values + uint8_t color_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +}; + +enum AlphaUse { + ALPHA_STRAIGHT = 0, + ALPHA_PREMULTIPLIED = 1, + ALPHA_SEGMENTATION = 2, + ALPHA_UNSPECIFIED = 3, +}; + +struct AlphaInformation { + AlphaUse alpha_use_idc; // [0, 7] + uint8_t alpha_bit_depth; // [8, 15] + uint8_t alpha_clip_idc; // [0, 3] + bool alpha_incr_flag; + uint16_t alpha_transparent_value; // [0, 1<<alpha_bit_depth] + uint16_t alpha_opaque_value; // [0, 1<<alpha_bit_depth] + // Relevant for ALPHA_STRAIGHT only. + std::optional<ColorProperties> alpha_color_description; + // Relevant for ALPHA_SEGMENTATION only. + // Must be either empty or have the same size as the number of values between + // alpha_transparent_value and alpha_opaque_value, inclusively. + std::vector<uint16_t> label_type_id; +}; + +// TODO: maryla - parse floats directly and convert to this wire +// representation at write time. +struct DepthRepresentationElement { + bool sign_flag; + uint8_t exponent; // [0, 126] + uint32_t mantissa; +}; + +struct DepthInformation { + std::optional<DepthRepresentationElement> z_near; + std::optional<DepthRepresentationElement> z_far; + std::optional<DepthRepresentationElement> d_min; + std::optional<DepthRepresentationElement> d_max; + uint8_t depth_representation_type; // [0, 15] + uint8_t disparity_ref_view_id; // [0, 3] + uint8_t depth_nonlinear_precision; // [8, 23] + // [0, 1<<depth_nonlinear_precision] + std::vector<uint32_t> depth_nonlinear_representation_model; +}; + +enum MultilayerUseCase { + MULTILAYER_USE_CASE_UNSPECIFIED = 0, + MULTILAYER_USE_CASE_ALPHA = 1, + MULTILAYER_USE_CASE_DEPTH = 2, + MULTILAYER_USE_CASE_STEREO = 3, + MULTILAYER_USE_CASE_STEREO_ALPHA_GLOBAL = 4, + MULTILAYER_USE_CASE_STEREO_DEPTH_GLOBAL = 5, + MULTILAYER_USE_CASE_STEREO_ALPHA = 6, + MULTILAYER_USE_CASE_STEREO_DEPTH = 7, + MULTILAYER_USE_CASE_444 = 8, + MULTILAYER_USE_CASE_420_444 = 9, + MULTILAYER_USE_CASE_444_ALPHA = 10, + MULTILAYER_USE_CASE_444_DEPTH = 11, +}; + +enum LayerType { + MULTIALYER_LAYER_TYPE_UNSPECIFIED = 0, + MULTIALYER_LAYER_TYPE_TEXTURE = 1, + MULTIALYER_LAYER_TYPE_TEXTURE_1 = 2, + MULTIALYER_LAYER_TYPE_TEXTURE_2 = 3, + MULTIALYER_LAYER_TYPE_TEXTURE_3 = 4, + MULTIALYER_LAYER_TYPE_ALPHA = 5, + MULTIALYER_LAYER_TYPE_DEPTH = 6, +}; + +enum MultilayerMetadataScope { + SCOPE_UNSPECIFIED = 0, + SCOPE_LOCAL = 1, + SCOPE_GLOBAL = 2, + SCOPE_MIXED = 3, +}; + +enum MultilayerViewType { + VIEW_UNSPECIFIED = 0, + VIEW_CENTER = 1, + VIEW_LEFT = 2, + VIEW_RIGHT = 3, +}; + +struct LayerMetadata { + LayerType layer_type; // [0, 31] + bool luma_plane_only_flag; + MultilayerViewType layer_view_type; // [0, 7] + uint8_t group_id; // [0, 3] + uint8_t layer_dependency_idc; // [0, 7] + MultilayerMetadataScope layer_metadata_scope; // [0, 3] + + std::optional<ColorProperties> layer_color_description; + + // Relevant for MULTIALYER_LAYER_TYPE_ALPHA with SCOPE_GLOBAL or SCOPE_MIXED. + AlphaInformation global_alpha_info; + // Relevant for MULTIALYER_LAYER_TYPE_DEPTH with SCOPE_GLOBAL or SCOPE_MIXED. + DepthInformation global_depth_info; +}; + +struct MultilayerMetadata { + MultilayerUseCase use_case; // [0, 63] + std::vector<LayerMetadata> layers; +}; + +// Parses a multilayer metadata file. +// Terminates the process in case of error. +// The metadata is expected to be in a subset of the YAML format supporting +// simple lists and maps with integer values, and comments. +// Does very little validation on the metadata, e.g. does not check that the +// values are in the correct range. +MultilayerMetadata parse_multilayer_file(const char *metadata_path); + +// Prints the multilayer metadata to stdout for debugging. +void print_multilayer_metadata(const MultilayerMetadata &multilayer); + +} // namespace libaom_examples + +#endif // AOM_EXAMPLES_MULTILAYER_METADATA_H_
diff --git a/examples/svc_encoder_rtc.cc b/examples/svc_encoder_rtc.cc index ad40132..b115a0a 100644 --- a/examples/svc_encoder_rtc.cc +++ b/examples/svc_encoder_rtc.cc
@@ -20,6 +20,7 @@ #include <string.h> #include <memory> +#include <optional> #include "config/aom_config.h" @@ -27,13 +28,17 @@ #include "aom/aom_decoder.h" #endif #include "aom/aom_encoder.h" +#include "aom/aom_image.h" +#include "aom/aom_integer.h" #include "aom/aomcx.h" +#include "aom_dsp/bitwriter_buffer.h" +#include "aom_ports/aom_timer.h" +#include "av1/ratectrl_rtc.h" #include "common/args.h" #include "common/tools_common.h" #include "common/video_writer.h" #include "examples/encoder_util.h" -#include "aom_ports/aom_timer.h" -#include "av1/ratectrl_rtc.h" +#include "examples/multilayer_metadata.h" #define OPTION_BUFFER_SIZE 1024 #define MAX_NUM_SPATIAL_LAYERS 4 @@ -51,6 +56,7 @@ int show_psnr; bool use_external_rc; bool scale_factors_explicitly_set; + const char *multilayer_metadata_file; } AppInput; typedef enum { @@ -116,6 +122,9 @@ }; static const arg_def_t tune_content_arg = ARG_DEF_ENUM( NULL, "tune-content", 1, "Tune content type", tune_content_enum); +static const arg_def_t multilayer_metadata_file_arg = + ARG_DEF("ml", "multilayer_metadata_file", 1, + "Experimental: path to multilayer metadata file"); #if CONFIG_AV1_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 }, @@ -394,6 +403,8 @@ app_input->show_psnr = 1; } else if (arg_match(&arg, &ext_rc_arg, argi)) { app_input->use_external_rc = true; + } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) { + app_input->multilayer_metadata_file = arg.val; } else { ++argj; } @@ -1353,6 +1364,201 @@ } } +static void write_literal(struct aom_write_bit_buffer *wb, int data, int bits, + int offset = 0) { + const int to_write = data - offset; + if (to_write < 0 || to_write >= (1 << bits)) { + die("Invalid data, value %d out of range [%d, %d]\n", data, offset, + offset + (1 << bits) - 1); + } + aom_wb_write_literal(wb, to_write, bits); +} + +static void write_depth_representation_element( + struct aom_write_bit_buffer *buffer, + const std::optional<libaom_examples::DepthRepresentationElement> &element) { + if (!element.has_value()) { + return; + } + write_literal(buffer, element->sign_flag, 1); + write_literal(buffer, element->exponent, 7); + int mantissa_len = 1; + while (mantissa_len < 32 && (element->mantissa >> mantissa_len != 0)) { + ++mantissa_len; + } + write_literal(buffer, mantissa_len - 1, 5); + write_literal(buffer, element->mantissa, mantissa_len); +} + +static void write_color_properties( + struct aom_write_bit_buffer *buffer, + const std::optional<libaom_examples::ColorProperties> &color_properties) { + write_literal(buffer, color_properties.has_value(), 1); + if (color_properties.has_value()) { + write_literal(buffer, color_properties->color_range, 1); + write_literal(buffer, color_properties->color_primaries, 8); + write_literal(buffer, color_properties->transfer_characteristics, 8); + write_literal(buffer, color_properties->matrix_coefficients, 8); + } else { + write_literal(buffer, 0, 1); // reserved_1bit + } +} + +static void add_multilayer_metadata( + aom_image_t *frame, + const std::optional<libaom_examples::MultilayerMetadata> &multilayer) { + if (!multilayer.has_value()) { + return; + } + // Pretty large buffer to accommodate the largest multilayer metadata + // possible, with 4 alpha segmentation layers (each can be up to about 66kB). + std::vector<uint8_t> data(66000 * multilayer->layers.size()); + struct aom_write_bit_buffer buffer = { data.data(), 0 }; + + write_literal(&buffer, multilayer->use_case, 6); + if (multilayer->layers.empty()) { + die("Invalid multilayer metadata, no layers found\n"); + } else if (multilayer->layers.size() > MAX_NUM_SPATIAL_LAYERS) { + die("Invalid multilayer metadata, too many layers (max is %d)\n", + MAX_NUM_SPATIAL_LAYERS); + } + write_literal(&buffer, (int)multilayer->layers.size() - 1, 2); + assert(buffer.bit_offset % 8 == 0); + for (size_t i = 0; i < multilayer->layers.size(); ++i) { + const libaom_examples::LayerMetadata &layer = multilayer->layers[i]; + // Alpha info with segmentation with labels can be up to about 66k bytes, + // which requires 3 bytes to encode in leb128. + const int bytes_reserved_for_size = 3; + // Placeholder for layer_metadata_size which will be written later. + write_literal(&buffer, 0, bytes_reserved_for_size * 8); + const uint32_t metadata_start = buffer.bit_offset; + write_literal(&buffer, (int)i, 2); // ml_spatial_id + write_literal(&buffer, layer.layer_type, 5); + write_literal(&buffer, layer.luma_plane_only_flag, 1); + write_literal(&buffer, layer.layer_view_type, 3); + write_literal(&buffer, layer.group_id, 2); + write_literal(&buffer, layer.layer_dependency_idc, 3); + write_literal(&buffer, layer.layer_metadata_scope, 2); + write_literal(&buffer, 0, 4); // ml_reserved_4bits + + if (i > 0) { + write_color_properties(&buffer, layer.layer_color_description); + } else { + write_literal(&buffer, 0, 2); // ml_reserved_2bits + } + assert(buffer.bit_offset % 8 == 0); + + if (multilayer->use_case < 12) { + if (layer.layer_type == libaom_examples::MULTIALYER_LAYER_TYPE_ALPHA && + layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) { + const libaom_examples::AlphaInformation &alpha_info = + layer.global_alpha_info; + write_literal(&buffer, alpha_info.alpha_use_idc, 3); + write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8); + write_literal(&buffer, alpha_info.alpha_clip_idc, 2); + write_literal(&buffer, alpha_info.alpha_incr_flag, 1); + write_literal(&buffer, alpha_info.alpha_transparent_value, + alpha_info.alpha_bit_depth); + write_literal(&buffer, alpha_info.alpha_opaque_value, + alpha_info.alpha_bit_depth); + if (buffer.bit_offset % 8 != 0) { + // ai_byte_alignment_bits + write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8)); + } + assert(buffer.bit_offset % 8 == 0); + + if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) { + write_literal(&buffer, 0, 6); // ai_reserved_6bits + write_color_properties(&buffer, alpha_info.alpha_color_description); + } else if (alpha_info.alpha_use_idc == + libaom_examples::ALPHA_SEGMENTATION) { + write_literal(&buffer, 0, 7); // ai_reserved_7bits + write_literal(&buffer, !alpha_info.label_type_id.empty(), 1); + if (!alpha_info.label_type_id.empty()) { + const size_t num_values = + std::abs(alpha_info.alpha_transparent_value - + alpha_info.alpha_opaque_value) + + 1; + if (!alpha_info.label_type_id.empty() && + alpha_info.label_type_id.size() != num_values) { + die("Invalid multilayer metadata, label_type_id size must be " + "equal to the range of alpha values between " + "alpha_transparent_value and alpha_opaque_value (expected " + "%d values, found %d values)\n", + (int)num_values, (int)alpha_info.label_type_id.size()); + } + for (size_t j = 0; j < num_values; ++j) { + write_literal(&buffer, alpha_info.label_type_id[j], 16); + } + } + } + assert(buffer.bit_offset % 8 == 0); + } else if (layer.layer_type == + libaom_examples::MULTIALYER_LAYER_TYPE_DEPTH && + layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) { + const libaom_examples::DepthInformation &depth_info = + layer.global_depth_info; + write_literal(&buffer, depth_info.z_near.has_value(), 1); + write_literal(&buffer, depth_info.z_far.has_value(), 1); + write_literal(&buffer, depth_info.d_min.has_value(), 1); + write_literal(&buffer, depth_info.d_max.has_value(), 1); + write_literal(&buffer, depth_info.depth_representation_type, 4); + if (depth_info.d_min.has_value() || depth_info.d_max.has_value()) { + write_literal(&buffer, depth_info.disparity_ref_view_id, 2); + } + write_depth_representation_element(&buffer, depth_info.z_near); + write_depth_representation_element(&buffer, depth_info.z_far); + write_depth_representation_element(&buffer, depth_info.d_min); + write_depth_representation_element(&buffer, depth_info.d_max); + if (depth_info.depth_representation_type == 3) { + write_literal(&buffer, depth_info.depth_nonlinear_precision, 4, + /*offset=*/8); + if (depth_info.depth_nonlinear_representation_model.empty() || + depth_info.depth_nonlinear_representation_model.size() > + (1 << 6)) { + die("Invalid multilayer metadata, if depth_nonlinear_precision " + "== 3, depth_nonlinear_representation_model must have 1 to " + "%d elements, found %d elements\n", + 1 << 6, + (int)depth_info.depth_nonlinear_representation_model.size()); + } + write_literal( + &buffer, + (int)depth_info.depth_nonlinear_representation_model.size() - 1, + 6); + const int bit_depth = + depth_info.depth_nonlinear_precision + 8; // XXX + 9 ??? + for (const uint32_t v : + depth_info.depth_nonlinear_representation_model) { + write_literal(&buffer, v, bit_depth); + } + } + if (buffer.bit_offset % 8 != 0) { + write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8)); + } + assert(buffer.bit_offset % 8 == 0); + } + } + assert(buffer.bit_offset % 8 == 0); + + const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8; + const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size; + size_t coded_size; + if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size, + bytes_reserved_for_size, + &buffer.bit_buffer[size_pos], &coded_size)) { + // Need to increase bytes_reserved_for_size in the code above. + die("Error: Failed to write metadata size\n"); + } + } + assert(buffer.bit_offset % 8 == 0); + if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/, + buffer.bit_buffer, buffer.bit_offset / 8, + AOM_MIF_KEY_FRAME)) { + die("Error: Failed to add metadata\n"); + } +} + #if CONFIG_AV1_DECODER // Returns whether there is a mismatch between the encoder's new frame and the // decoder's new frame. @@ -1660,6 +1866,13 @@ svc_params.framerate_factor[2] = 1; } + std::optional<libaom_examples::MultilayerMetadata> multilayer_metadata; + if (app_input.multilayer_metadata_file != NULL) { + multilayer_metadata = libaom_examples::parse_multilayer_file( + app_input.multilayer_metadata_file); + libaom_examples::print_multilayer_metadata(multilayer_metadata.value()); + } + framerate = cfg.g_timebase.den / cfg.g_timebase.num; set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); @@ -1836,6 +2049,7 @@ aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED, &ref_frame_comp_pred); } + add_multilayer_metadata(&raw, multilayer_metadata); // Set the speed per layer. if (test_speed_per_layer) { int speed_per_layer = 10;
diff --git a/test/svc_encoder_rtc.sh b/test/svc_encoder_rtc.sh index c8b2891..8df2f6b 100644 --- a/test/svc_encoder_rtc.sh +++ b/test/svc_encoder_rtc.sh
@@ -83,6 +83,36 @@ svc_encoder_s2_t1() { local encoder="${LIBAOM_BIN_PATH}/svc_encoder_rtc${AOM_TEST_EXE_SUFFIX}" local output_file="${AOM_TEST_OUTPUT_DIR}/svc_encoder_rtc" + local metadata_file="${AOM_TEST_OUTPUT_DIR}/multilayer_metadata.yaml" + cat > "${metadata_file}" <<EOF + + # test comment + +use_case: 1 # alpha +layers: +# first layer... +use_case: 1 # alpha +layers: + - layer_type: 5 # alpha + luma_plane_only_flag: 1 + layer_metadata_scope: 2 # global + alpha: + alpha_use_idc: 2 # segmentation + alpha_bit_depth: 8 + alpha_transparent_value: 0 + alpha_opaque_value: 4 + label_type_id: + - 5 + - 3 + - 9 + - 128 + - 42 + +# second layer... + - layer_type: 1 # texture + luma_plane_only_flag: 0 + +EOF if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." @@ -99,6 +129,7 @@ "--spatial-layers=2" \ "--temporal-layers=1" \ "--timebase=1/30" \ + "--multilayer_metadata_file=${metadata_file}" \ "${YUV_RAW_INPUT}" \ "${YUV_RAW_INPUT}" \ "-o ${output_file}" \