| /* | 
 |  * Copyright (c) 2023, Alliance for Open Media. All rights reserved | 
 |  * | 
 |  * This source code is subject to the terms of the BSD 3-Clause Clear License | 
 |  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear | 
 |  * License was not distributed with this source code in the LICENSE file, you | 
 |  * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the | 
 |  * Alliance for Open Media Patent License 1.0 was not distributed with this | 
 |  * source code in the PATENTS file, you can obtain it at | 
 |  * aomedia.org/license/patent-license/. | 
 |  */ | 
 |  | 
 | // Protobuf messages for frame data extraction from AVM streams. | 
 |  | 
 | syntax = "proto3"; | 
 |  | 
 | package avm.tools; | 
 |  | 
 | // Metadata for a symbol, including source information (C file/line/function). | 
 | message SymbolInfo { | 
 |   // Unique identifier for this symbol type. Note: these ids aren't stable | 
 |   // across frames. | 
 |   int32 id = 1; | 
 |   string source_file = 2; | 
 |   int32 source_line = 3; | 
 |   string source_function = 4; | 
 |   // Additional arbitrary tags that can be added per symbol. For example, if a | 
 |   // symbol type is decoded for both luma and chroma, there might be a "luma" or | 
 |   // "chroma" tag on the symbol. | 
 |   repeated string tags = 5; | 
 | } | 
 |  | 
 | message Symbol { | 
 |   // Unique ID for each "type" of symbol. Can be looked up in symbol_info (see | 
 |   // definition of Frame message) to get metadata for each symbol. | 
 |   int32 info_id = 1; | 
 |   // Raw value of this symbol | 
 |   int32 value = 2; | 
 |   // Entropy coding mode for this symbol (e.g. literal bit, CDF, etc...) | 
 |   int32 coding_mode = 3; | 
 |   float bits = 4; | 
 | } | 
 |  | 
 | // Range of symbols that make up some object. Symbols are stored at the | 
 | // superblock level, but lower-level constructs (e.g. transform blocks) can | 
 | // refer to a range in its superblock's symbol list for the exact subsequence of | 
 | // symbols that created it. Note that end is exclusive, e.g. (start = | 
 | // 5, end = 9) would refer to symbols 5, 6, 7, 8 in the superblock. | 
 | message SymbolRange { | 
 |   uint32 start = 1; | 
 |   uint32 end = 2; | 
 | } | 
 |  | 
 | // Size of a coding block, in pixel units | 
 | message BlockSize { | 
 |   int32 width = 1; | 
 |   int32 height = 2; | 
 |   // av1/common/enums.h enum value that corresponds to this block size | 
 |   int32 enum_value = 3; | 
 | } | 
 |  | 
 | // This contains the same fields as BlockSize, but is a distinct type since the | 
 | // meaning of its enum values is different than BlockSize. | 
 | message TransformSize { | 
 |   int32 width = 1; | 
 |   int32 height = 2; | 
 |   // av1/common/enums.h enum value that corresponds to this transform block size | 
 |   int32 enum_value = 3; | 
 | } | 
 |  | 
 | // Absolute position within a frame, in pixel units | 
 | message Position { | 
 |   int32 x = 1; | 
 |   int32 y = 2; | 
 | } | 
 |  | 
 | message TransformUnit { | 
 |   Position position = 1; | 
 |   int32 tx_type = 2; | 
 |   TransformSize size = 3; | 
 |   int32 skip = 4; | 
 |   repeated int32 quantized_coeffs = 5; | 
 |   repeated int32 dequantized_coeffs = 6; | 
 |   repeated int32 dequantizer_values = 7; | 
 |   SymbolRange symbol_range = 8; | 
 | } | 
 |  | 
 | message TransformPlane { | 
 |   int32 plane = 1; | 
 |   repeated TransformUnit transform_units = 2; | 
 | } | 
 |  | 
 | message MotionVector { | 
 |   int32 ref_frame = 1; | 
 |   sint32 dx = 2; | 
 |   sint32 dy = 3; | 
 |   SymbolRange symbol_range = 4; | 
 |   int32 ref_frame_order_hint = 5; | 
 |   bool ref_frame_is_tip = 6; | 
 |   bool ref_frame_is_inter = 7; | 
 | } | 
 |  | 
 | message PredictionParams { | 
 |   int32 mode = 1; | 
 |   int32 uv_mode = 2; | 
 |   int32 angle_delta = 3; | 
 |   repeated MotionVector motion_vectors = 4; | 
 |   bool use_intrabc = 5; | 
 |   int32 palette_count = 6; | 
 |   int32 uv_palette_count = 7; | 
 |   int32 compound_type = 8; | 
 |   int32 motion_mode = 9; | 
 |   int32 interpolation_filter = 10; | 
 |   int32 cfl_alpha_idx = 11; | 
 |   int32 cfl_alpha_sign = 12; | 
 |   int32 uv_angle_delta = 13; | 
 |   int32 motion_vector_precision = 14; | 
 | } | 
 |  | 
 | message PixelBuffer { | 
 |   int32 width = 1; | 
 |   int32 height = 2; | 
 |   int32 bit_depth = 3; | 
 |   repeated uint32 pixels = 4; | 
 | } | 
 |  | 
 | message PixelData { | 
 |   int32 plane = 1; | 
 |   // Original source pixels before encoding. Not available in the bitstream, so | 
 |   // the source YUV needs to be passed in separately to the extract_proto tool. | 
 |   PixelBuffer original = 2; | 
 |   PixelBuffer reconstruction = 3; | 
 |   PixelBuffer prediction = 4; | 
 |   // Reconstructed pixels BEFORE any filters are applied. | 
 |   PixelBuffer pre_filtered = 5; | 
 | } | 
 |  | 
 | // Leaf of the partition tree | 
 | message CodingUnit { | 
 |   Position position = 1; | 
 |   BlockSize size = 2; | 
 |   bool skip = 3; | 
 |   PredictionParams prediction_mode = 4; | 
 |   // TODO(comc): Support transform tree partition (max depth = 2?) | 
 |   // With SDP enabled, for the luma partition tree, exactly one plane will be present. | 
 |   // With SDP enabled, for the chroma partition tree, exactly two planes (U, V) will be present. | 
 |   // With SDP disabled, only a single shared partition tree exists, and all three planes will be present. | 
 |   repeated TransformPlane transform_planes = 5; | 
 |   SymbolRange symbol_range = 6; | 
 |   int32 qindex = 7; | 
 |   int32 segment_id = 8; | 
 |   int32 cdef_level = 9; | 
 |   int32 cdef_strength = 10; | 
 | } | 
 |  | 
 | // Range of coding units that make up a block at some level in the partition | 
 | // tree. Note that end is exclusive, e.g. (start = 5, end = 9) would refer to | 
 | // coding units 5, 6, 7, 8 in the superblock. | 
 | message CodingUnitRange { | 
 |   uint32 start = 1; | 
 |   uint32 end = 2; | 
 | } | 
 |  | 
 | message Partition { | 
 |   Position position = 1; | 
 |   BlockSize size = 2; | 
 |   int32 partition_type = 3; | 
 |   repeated Partition children = 4; | 
 |   // If this partition has children, coding_units will be a range representing | 
 |   // the coding units comprising all its children. If this is a leaf node, | 
 |   // coding_units will refer to exactly one CodingUnit, i.e. the range start is | 
 |   // equal to the range end. | 
 |   CodingUnitRange coding_unit_range = 5; | 
 |   SymbolRange symbol_range = 6; | 
 |   // True if this partition has more children, or false if it contains exactly | 
 |   // one coding unit. | 
 |   bool is_leaf_node = 7; | 
 | } | 
 |  | 
 | message Superblock { | 
 |   Position position = 1; | 
 |   BlockSize size = 2; | 
 |   Partition luma_partition_tree = 3; | 
 |   Partition chroma_partition_tree = 4; | 
 |   // Is SDP (semi-decoupled partitioning) enabled? | 
 |   bool has_separate_chroma_partition_tree = 5; | 
 |   // If this frame does not use SDP, all coding units will be stored in | 
 |   // coding_units_shared and coding_units_chroma will be empty. | 
 |   // If this frame uses SDP, the luma coding units will be stored in | 
 |   // coding_units_shared, and the chroma coding units will be stored in | 
 |   // coding_units_chroma. | 
 |   repeated CodingUnit coding_units_shared = 6; | 
 |   repeated CodingUnit coding_units_chroma = 7; | 
 |   repeated Symbol symbols = 8; | 
 |   repeated PixelData pixel_data = 9; | 
 | } | 
 |  | 
 | // Map C enum values to names. This is done rather than just using proto enums | 
 | // for a few reasons: | 
 | // - Proto3 enums REQUIRE a zero value, and strongly recommend it's used as an | 
 | //   unknown / unspecified value. This doesn't map cleanly to the AVM enums. | 
 | // - AVM's enums can evolve over time, or even within the same anchor if | 
 | //   different experiments / defines are used. Defining this enum mapping is | 
 | //   more maintainable than having a separate source of truth in this proto | 
 | //   schema. | 
 | message EnumMappings { | 
 |   map<int32, string> transform_type_mapping = 1; | 
 |   map<int32, string> entropy_coding_mode_mapping = 2; | 
 |   map<int32, string> interpolation_filter_mapping = 3; | 
 |   map<int32, string> prediction_mode_mapping = 4; | 
 |   map<int32, string> uv_prediction_mode_mapping = 5; | 
 |   map<int32, string> motion_mode_mapping = 6; | 
 |   map<int32, string> transform_size_mapping = 7; | 
 |   map<int32, string> block_size_mapping = 8; | 
 |   map<int32, string> partition_type_mapping = 9; | 
 |   map<int32, string> frame_type_mapping = 10; | 
 |   map<int32, string> tip_mode_mapping = 11; | 
 |   map<int32, string> motion_vector_precision_mapping = 12; | 
 | } | 
 |  | 
 | // TODO(comc): Add tile info and refactor FrameParams if necessary | 
 | message FrameParams { | 
 |   int32 frame_type = 1; | 
 |   int32 width = 2; | 
 |   int32 height = 3; | 
 |   int32 decode_index = 4; | 
 |   // Global display index, unique within the whole stream. | 
 |   int32 display_index = 5; | 
 |   BlockSize superblock_size = 6; | 
 |   bool show_frame = 7; | 
 |   int32 base_qindex = 8; | 
 |   int32 bit_depth = 9; | 
 |   // Raw display index, may not be unique within the whole stream (e.g. if the stream contains more than one sequence). | 
 |   int32 raw_display_index = 10; | 
 |   int32 subsampling_x = 11;  // Chroma subsampling for x | 
 |   int32 subsampling_y = 12;  // Chroma subsampling for y | 
 | } | 
 |  | 
 | message StreamParams { | 
 |   string stream_hash = 1; | 
 |   string stream_name = 2; | 
 |   float frame_rate = 3; | 
 |   // Note: these are present both here and in FrameParams. For most streams we | 
 |   // care about, the frame dimensions will be the same across every frame. For | 
 |   // streams with variable-sized frames, these fields can be omitted. | 
 |   int32 width = 4; | 
 |   int32 height = 5; | 
 |   string avm_version = 6; | 
 |   map<string, string> encoder_args = 7; | 
 |   string stream_path = 8; | 
 | } | 
 |  | 
 | message TipFrameParams { | 
 |   int32 tip_mode = 1; | 
 |   repeated PixelData pixel_data = 2; | 
 | } | 
 |  | 
 | message Frame { | 
 |   // Note: StreamParams encapsulates all parameters that are common to the | 
 |   // entire stream, e.g. the encoder version and args used to produce it. Since | 
 |   // the storage granularity of these protos is individual frames, not entire | 
 |   // streams, it is stored as a field of the Frame message. Identical | 
 |   // StreamParams will be present on each individual Frame message that make up | 
 |   // one stream. | 
 |   StreamParams stream_params = 1; | 
 |   FrameParams frame_params = 2; | 
 |   repeated Superblock superblocks = 3; | 
 |   map<int32, SymbolInfo> symbol_info = 4; | 
 |   EnumMappings enum_mappings = 5; | 
 |   TipFrameParams tip_frame_params = 6; | 
 | } |