| /* |
| * Copyright (c) 2022, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #ifndef AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ |
| #define AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ |
| |
| #include <array> |
| #include <string> |
| #include <vector> |
| |
| #include "aom/aom_codec.h" |
| #include "av1/encoder/firstpass.h" |
| |
| namespace aom { |
| |
| constexpr int kBlockRefCount = 2; |
| |
| struct MotionVector { |
| int row; // subpel row |
| int col; // subpel col |
| // TODO(b/241589513): Move this to TplFrameStats; it's wasteful to code it |
| // separately for each block. |
| int subpel_bits; // number of fractional bits used by row/col |
| }; |
| |
| enum class TplPassCount { |
| kOneTplPass = 1, |
| kTwoTplPasses = 2, |
| }; |
| |
| struct RateControlParam { |
| // Range of allowed GOP sizes (number of displayed frames). |
| int max_gop_show_frame_count; |
| int min_gop_show_frame_count; |
| // Number of reference frame buffers, i.e., size of the DPB. |
| int ref_frame_table_size; |
| // Maximum number of references a single frame may use. |
| int max_ref_frames; |
| |
| int base_q_index; |
| |
| // If greater than 1, enables per-superblock q_index, and limits the number of |
| // unique q_index values which may be used in a frame (each of which will have |
| // its own unique rdmult value). |
| int max_distinct_q_indices_per_frame; |
| |
| // If per-superblock q_index is enabled and this is greater than 1, enables |
| // additional per-superblock scaling of lambda, and limits the number of |
| // unique lambda scale values which may be used in a frame. |
| int max_distinct_lambda_scales_per_frame; |
| |
| int frame_width; |
| int frame_height; |
| |
| // Total number of TPL passes. |
| TplPassCount tpl_pass_count = TplPassCount::kOneTplPass; |
| // Current TPL pass number, 0 or 1 (for GetTplPassGopEncodeInfo). |
| int tpl_pass_index = 0; |
| }; |
| |
| struct TplBlockStats { |
| int16_t height; // Pixel height. |
| int16_t width; // Pixel width. |
| int16_t row; // Pixel row of the top left corner. |
| int16_t col; // Pixel col of the top left corner. |
| int64_t intra_cost; // Rd cost of the best intra mode. |
| int64_t inter_cost; // Rd cost of the best inter mode. |
| |
| // Valid only if TplFrameStats::rate_dist_present is true: |
| int64_t recrf_rate; // Bits when using recon as reference. |
| int64_t recrf_dist; // Distortion when using recon as reference. |
| int64_t intra_pred_err; // Prediction residual of the intra mode. |
| int64_t inter_pred_err; // Prediction residual of the inter mode. |
| |
| std::array<MotionVector, kBlockRefCount> mv; |
| std::array<int, kBlockRefCount> ref_frame_index; |
| }; |
| |
| // gop frame type used for facilitate setting up GopFrame |
| // TODO(angiebird): Define names for forward key frame and |
| // key frame with overlay |
| enum class GopFrameType { |
| kRegularKey, // High quality key frame without overlay |
| kRegularLeaf, // Regular leaf frame |
| kRegularGolden, // Regular golden frame |
| kRegularArf, // High quality arf with strong filtering followed by an overlay |
| // later |
| kOverlay, // Overlay frame |
| kIntermediateOverlay, // Intermediate overlay frame |
| kIntermediateArf, // Good quality arf with weak or no filtering followed by a |
| // show_existing later |
| }; |
| |
| enum class EncodeRefMode { |
| kRegular, |
| kOverlay, |
| kShowExisting, |
| }; |
| |
| enum class ReferenceName { |
| kNoneFrame = -1, |
| kIntraFrame = 0, |
| kLastFrame = 1, |
| kLast2Frame = 2, |
| kLast3Frame = 3, |
| kGoldenFrame = 4, |
| kBwdrefFrame = 5, |
| kAltref2Frame = 6, |
| kAltrefFrame = 7, |
| }; |
| |
| struct Status { |
| aom_codec_err_t code; |
| std::string message; // Empty if code == AOM_CODEC_OK. |
| bool ok() const { return code == AOM_CODEC_OK; } |
| }; |
| |
| // A very simple imitation of absl::StatusOr, this is conceptually a union of a |
| // Status struct and an object of type T. It models an object that is either a |
| // usable object, or an error explaining why such an object is not present. A |
| // StatusOr<T> may never hold a status with a code of AOM_CODEC_OK. |
| template <typename T> |
| class StatusOr { |
| public: |
| StatusOr(const T &value) : value_(value) {} |
| StatusOr(T &&value) : value_(std::move(value)) {} |
| StatusOr(Status status) : status_(std::move(status)) { |
| assert(status_.code != AOM_CODEC_OK); |
| } |
| |
| const Status &status() const { return status_; } |
| bool ok() const { return status().ok(); } |
| |
| // operator* returns the value; it should only be called after checking that |
| // ok() returns true. |
| const T &operator*() const & { return value_; } |
| T &operator*() & { return value_; } |
| const T &&operator*() const && { return value_; } |
| T &&operator*() && { return std::move(value_); } |
| |
| // sor->field is equivalent to (*sor).field. |
| const T *operator->() const & { return &value_; } |
| T *operator->() & { return &value_; } |
| |
| // value() is equivalent to operator*, but asserts that ok() is true. |
| const T &value() const & { |
| assert(ok()); |
| return value_; |
| } |
| T &value() & { |
| assert(ok()); |
| return value_; |
| } |
| const T &&value() const && { |
| assert(ok()); |
| return value_; |
| } |
| T &&value() && { |
| assert(ok()); |
| return std::move(value_); |
| } |
| |
| private: |
| T value_; // This could be std::optional<T> if it were available. |
| Status status_ = { AOM_CODEC_OK, "" }; |
| }; |
| |
| struct ReferenceFrame { |
| int index; // Index of reference slot containing the reference frame |
| ReferenceName name; |
| }; |
| |
| struct GopFrame { |
| // basic info |
| bool is_valid; |
| int order_idx; // Index in display order in a GOP |
| int coding_idx; // Index in coding order in a GOP |
| int display_idx; // The number of displayed frames preceding this frame in |
| // a GOP |
| |
| int global_order_idx; // Index in display order in the whole video chunk |
| int global_coding_idx; // Index in coding order in the whole video chunk |
| |
| bool is_key_frame; // If this is key frame, reset reference buffers are |
| // required |
| bool is_arf_frame; // Is this a forward frame, a frame with order_idx |
| // higher than the current display order |
| bool is_show_frame; // Is this frame a show frame after coding |
| bool is_golden_frame; // Is this a high quality frame |
| |
| GopFrameType update_type; // This is a redundant field. It is only used for |
| // easy conversion in SW integration. |
| |
| // reference frame info |
| EncodeRefMode encode_ref_mode; |
| int colocated_ref_idx; // colocated_ref_idx == -1 when encode_ref_mode == |
| // EncodeRefMode::kRegular |
| int update_ref_idx; // The reference index that this frame should be |
| // updated to. update_ref_idx == -1 when this frame |
| // will not serve as a reference frame |
| std::vector<ReferenceFrame> |
| ref_frame_list; // A list of available reference frames in priority order |
| // for the current to-be-coded frame. The list size |
| // should be less or equal to ref_frame_table_size. The |
| // reference frames with smaller indices are more likely |
| // to be a good reference frame. Therefore, they should |
| // be prioritized when the reference frame count is |
| // limited. For example, if we plan to use 3 reference |
| // frames, we should choose ref_frame_list[0], |
| // ref_frame_list[1] and ref_frame_list[2]. |
| int layer_depth; // Layer depth in the GOP structure |
| ReferenceFrame primary_ref_frame; // We will use the primary reference frame |
| // to update current frame's initial |
| // probability model |
| }; |
| |
| struct GopStruct { |
| int show_frame_count; |
| int global_coding_idx_offset; |
| int global_order_idx_offset; |
| // TODO(jingning): This can be removed once the framework is up running. |
| int display_tracker; // Track the number of frames displayed proceeding a |
| // current coding frame. |
| std::vector<GopFrame> gop_frame_list; |
| }; |
| |
| using GopStructList = std::vector<GopStruct>; |
| |
| struct SuperblockEncodeParameters { |
| int q_index; |
| int rdmult; |
| }; |
| |
| struct FrameEncodeParameters { |
| // Base q_index for the frame. |
| int q_index; |
| |
| // Frame level Lagrangian multiplier. |
| int rdmult; |
| |
| // If max_distinct_q_indices_per_frame <= 1, this will be empty. |
| // Otherwise: |
| // - There must be one entry per 64x64 superblock, in row-major order |
| // - There may be no more than max_distinct_q_indices_per_frame unique q_index |
| // values |
| // - All entries with the same q_index must have the same rdmult |
| // (If it's desired to use different rdmult values with the same q_index, this |
| // must be done with superblock_lambda_scales.) |
| std::vector<SuperblockEncodeParameters> superblock_encode_params; |
| |
| // If max_distinct_q_indices_per_frame <= 1 or |
| // max_distinct_lambda_scales_per_frame <= 1, this will be empty. Otherwise, |
| // it will have one entry per 64x64 superblock, in row-major order, with no |
| // more than max_distinct_lambda_scales_per_frame unique values. Each entry |
| // should be multiplied by the rdmult in the corresponding superblock's entry |
| // in superblock_encode_params. |
| std::vector<float> superblock_lambda_scales; |
| }; |
| |
| struct FirstpassInfo { |
| int num_mbs_16x16; // Count of 16x16 unit blocks in each frame. |
| // FIRSTPASS_STATS's unit block size is 16x16 |
| std::vector<FIRSTPASS_STATS> stats_list; |
| }; |
| |
| // In general, the number of elements in RefFrameTable must always equal |
| // ref_frame_table_size (as specified in RateControlParam), but see |
| // GetGopEncodeInfo for the one exception. |
| using RefFrameTable = std::vector<GopFrame>; |
| |
| struct GopEncodeInfo { |
| std::vector<FrameEncodeParameters> param_list; |
| RefFrameTable final_snapshot; // RefFrameTable snapshot after coding this GOP |
| }; |
| |
| struct TplFrameStats { |
| int min_block_size; |
| int frame_width; |
| int frame_height; |
| bool rate_dist_present; // True if recrf_rate and recrf_dist are populated. |
| std::vector<TplBlockStats> block_stats_list; |
| // Optional stats computed with different settings, should be empty unless |
| // tpl_pass_count == kTwoTplPasses. |
| std::vector<TplBlockStats> alternate_block_stats_list; |
| }; |
| |
| struct TplGopStats { |
| std::vector<TplFrameStats> frame_stats_list; |
| }; |
| |
| // Structure and TPL stats for a single GOP, to be used for lookahead. |
| struct LookaheadStats { |
| const GopStruct *gop_struct; // Not owned, may not be nullptr. |
| const TplGopStats *tpl_gop_stats; // Not owned, may not be nullptr. |
| }; |
| |
| class AV1RateControlQModeInterface { |
| public: |
| AV1RateControlQModeInterface(); |
| virtual ~AV1RateControlQModeInterface(); |
| |
| virtual Status SetRcParam(const RateControlParam &rc_param) = 0; |
| virtual StatusOr<GopStructList> DetermineGopInfo( |
| const FirstpassInfo &firstpass_info) = 0; |
| |
| // Accepts GOP structure and TPL info from the encoder and returns q index and |
| // rdmult for each frame. This should be called with consecutive GOPs as |
| // returned by DetermineGopInfo. |
| // |
| // GOP structure and TPL info from zero or more subsequent GOPs may optionally |
| // be passed in lookahead_stats. |
| // |
| // For the first GOP, a default-constructed RefFrameTable may be passed in as |
| // ref_frame_table_snapshot_init; for subsequent GOPs, it should be the |
| // final_snapshot returned on the previous call. |
| // |
| // TODO(b/260859962): Remove these once all callers and overrides are gone. |
| virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo( |
| const GopStruct &gop_struct AOM_UNUSED, |
| const TplGopStats &tpl_gop_stats AOM_UNUSED, |
| const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED, |
| const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) { |
| return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" }; |
| } |
| virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo( |
| const GopStruct &gop_struct AOM_UNUSED) { |
| return Status{ AOM_CODEC_UNSUP_FEATURE, "Deprecated" }; |
| } |
| |
| // Extensions to the API to pass in the first pass info. There should be stats |
| // for all frames starting from the first frame of the GOP and continuing to |
| // the end of the sequence. |
| // TODO(b/260859962): Make pure virtual once all derived classes implement it. |
| virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo( |
| const GopStruct &gop_struct AOM_UNUSED, |
| const TplGopStats &tpl_gop_stats AOM_UNUSED, |
| const std::vector<LookaheadStats> &lookahead_stats AOM_UNUSED, |
| const FirstpassInfo &firstpass_info AOM_UNUSED, |
| const RefFrameTable &ref_frame_table_snapshot AOM_UNUSED) { |
| return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" }; |
| } |
| virtual StatusOr<GopEncodeInfo> GetTplPassGopEncodeInfo( |
| const GopStruct &gop_struct AOM_UNUSED, |
| const FirstpassInfo &firstpass_info AOM_UNUSED) { |
| return Status{ AOM_CODEC_UNSUP_FEATURE, "Not yet implemented" }; |
| } |
| }; |
| } // namespace aom |
| |
| #endif // AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_ |