Additions to Encoder overview document.
Added text for the main rate control loop.
Added some additional references.
Removed some explicit references to .c files.
Change-Id: I4f68dbb782ca34aeb0973cc2b39bae0ab450ec7e
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7814ea8b..e362d29 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -117,14 +117,25 @@
RESERVE_3_SF = 128,
} UENUM1BYTE(DEV_SPEED_FEATURES);
+/* This enumeration defines when the rate control recode loop will be
+ * enabled.
+ */
enum {
- // No recode.
+ /*
+ * No recodes allowed
+ */
DISALLOW_RECODE = 0,
- // Allow recode for KF and exceeding maximum frame bandwidth.
+ /*
+ * Recode KF's exceeding maximum frame bandwidth
+ */
ALLOW_RECODE_KFMAXBW = 1,
- // Allow recode only for KF/ARF/GF frames.
+ /*
+ * Allow recode only for KF/ARF/GF frames
+ */
ALLOW_RECODE_KFARFGF = 2,
- // Allow recode for all frames based on bitrate constraints.
+ /*
+ * Allow recode for all frame types based on bitrate constraints.
+ */
ALLOW_RECODE = 3,
} UENUM1BYTE(RECODE_LOOP_TYPE);
@@ -276,15 +287,27 @@
QTR_ONLY,
} UENUM1BYTE(MV_PREC_LOGIC);
+/*!\endcond */
+/*!
+ * \brief Sequence/frame level speed vs quality features
+ */
typedef struct HIGH_LEVEL_SPEED_FEATURES {
+ /*!\cond */
// Frame level coding parameter update
int frame_parameter_update;
+ /*!\endcond */
+ /*!
+ * Cases and frame types for which the recode loop is enabled.
+ */
RECODE_LOOP_TYPE recode_loop;
- // This feature controls the tolerence vs target used in deciding whether to
- // recode a frame. It has no meaning if recode is disabled.
+ /*!
+ * Controls the tolerence vs target rate used in deciding whether to
+ * recode a frame. It has no meaning if recode is disabled.
+ */
int recode_tolerance;
+ /*!\cond */
// Determine how motion vector precision is chosen. The possibilities are:
// LAST_MV_DATA: use the mv data from the last coded frame
@@ -309,7 +332,9 @@
// Enable/disable second_alt_ref temporal filtering.
int second_alt_ref_filtering;
+ /*!\endcond */
} HIGH_LEVEL_SPEED_FEATURES;
+/*!\cond */
typedef struct TPL_SPEED_FEATURES {
// Enable/disable GOP length adaptive decision.
@@ -1019,72 +1044,70 @@
* \brief Top level speed vs quality trade off data struture.
*/
typedef struct SPEED_FEATURES {
- /*!\cond */
- /*
+ /*!
* Sequence/frame level speed features:
*/
HIGH_LEVEL_SPEED_FEATURES hl_sf;
- /*
+ /*!
* Speed features related to how tpl's searches are done.
*/
TPL_SPEED_FEATURES tpl_sf;
- /*
+ /*!
* Global motion speed features:
*/
GLOBAL_MOTION_SPEED_FEATURES gm_sf;
- /*
+ /*!
* Partition search speed features:
*/
PARTITION_SPEED_FEATURES part_sf;
- /*
+ /*!
* Motion search speed features:
*/
MV_SPEED_FEATURES mv_sf;
- /*
+ /*!
* Inter mode search speed features:
*/
INTER_MODE_SPEED_FEATURES inter_sf;
- /*
+ /*!
* Interpolation filter search speed features:
*/
INTERP_FILTER_SPEED_FEATURES interp_sf;
- /*
+ /*!
* Intra mode search speed features:
*/
INTRA_MODE_SPEED_FEATURES intra_sf;
- /*
+ /*!
* Transform size/type search speed features:
*/
TX_SPEED_FEATURES tx_sf;
- /*
+ /*!
* RD calculation speed features:
*/
RD_CALC_SPEED_FEATURES rd_sf;
- /*
+ /*!
* Two-pass mode evaluation features:
*/
WINNER_MODE_SPEED_FEATURES winner_mode_sf;
- /*
+ /*!
* In-loop filter speed features:
*/
LOOP_FILTER_SPEED_FEATURES lpf_sf;
- /*
+ /*!
* Real-time mode speed features:
*/
REAL_TIME_SPEED_FEATURES rt_sf;
- /*!\endcond */
} SPEED_FEATURES;
/*!\cond */
diff --git a/doc/dev_guide/av1_encoder.dox b/doc/dev_guide/av1_encoder.dox
index fadf0ee..0d80744 100644
--- a/doc/dev_guide/av1_encoder.dox
+++ b/doc/dev_guide/av1_encoder.dox
@@ -288,8 +288,8 @@
\subsubsection architecture_enc_1pass_vbr 1 Pass VBR Encoding
The command line for libaom does allow 1 Pass VBR, but this has not been
-properly optimised and behaves much like 1 pass CBR in most regards with bits
-allocated to frames by the following functions defined in ratectrl.c
+properly optimised and behaves much like 1 pass CBR in most regards, with bits
+allocated to frames by the following functions:
- \ref av1_calc_iframe_target_size_one_pass_vbr()
- \ref av1_calc_pframe_target_size_one_pass_vbr()
@@ -333,19 +333,19 @@
- \ref FIRSTPASS_STATS *frame_stats_buf (used to store per frame first
pass stats)
-For two pass encoding, the function \ref av1_encode() in encoder.c will first
-be called for each frame in the video with the value \ref AV1EncoderConfig.pass
-= 1. This will result in calls to \ref av1_first_pass() in firspass.c
+For two pass encoding, the function \ref av1_encode() will first be called
+for each frame in the video with the value \ref AV1EncoderConfig.pass = 1.
+This will result in calls to \ref av1_first_pass().
Statistics for each frame are stored in \ref FIRSTPASS_STATS frame_stats_buf.
After completion of the first pass, \ref av1_encode() will be called again for
each frame with \ref AV1EncoderConfig.pass = 2. The frames are then encoded in
accordance with the statistics gathered during the first pass by calls to
-\ref encode_frame_to_data_rate() in encoder.c.
+\ref encode_frame_to_data_rate().
\ref encode_frame_to_data_rate() in turn calls (TODO REF)
-av1_get_second_pass_params() which is defined in pass2_strategy.c
+av1_get_second_pass_params().
In summary the second pass code :-
@@ -361,8 +361,7 @@
- Tracks adherence to the overall rate control objectives and adjusts
heuristics.
-The main two pass 2 functions in regard to the above (also in pass2_strategy.c)
-include:-
+The main two pass 2 functions in regard to the above include:-
- (TODO REF) find_next_key_frame()
- (TODO REF) define_gf_group()
@@ -381,8 +380,8 @@
maintains a record of the actual Q value used to encode previous frames
at each level in the current pyramid hierarchy
(\ref RATE_CONTROL.active_best_quality). The function
-\ref rc_pick_q_and_bounds(), defined in rate_ctrl.c, uses these values
-to set a permitted Q range for each frame.
+\ref rc_pick_q_and_bounds(), uses these values to set a permitted Q range
+for each frame.
\subsubsection architecture_enc_1pass_lagged 1 Pass Lagged VBR Encoding
@@ -402,7 +401,72 @@
\subsection architecture_enc_rc_loop The Main Rate Control Loop
- Add details here.
+Having established a target rate for a given frame and an allowed range of Q
+values, the encoder then tries to encode the frame at a rate that is as close
+as possible to the target value, given the Q range constraints.
+
+There are two main mechanisms by which this is achieved.
+
+The first selects a frame level Q, using an adaptive estimate of the number of
+bits that will be generated when the frame is encoded at any given Q.
+Fundamentally this mechanism is common to VBR, CBR and to use cases such as
+RTC with small adjustments.
+
+As the Q value mainly adjusts the precision of the residual signal, it is not
+actually a reliable basis for accurately predicting the number of bits that
+will be generated across all clips. A well predicted clip, for example, may
+have a much smaller error residual after prediction. The algorithm copes with
+this by adapting its predictions on the fly using a feedback loop based on how
+well it did the previous time around.
+
+The main functions responsible for the prediction of Q and the adaptation over
+time, for the two pass encoding pipeline are:
+
+- \ref rc_pick_q_and_bounds()
+ - (TODO REF) get_q()
+ - (TODO REF) av1_rc_regulate_q()
+ - (TODO REF) get_rate_correction_factor()
+ - (TODO REF) find_closest_qindex_by_rate()
+
+- (TODO REF) av1_twopass_postencode_update()
+ - (TODO REF) av1_rc_update_rate_correction_factors()
+
+The second mechanism for control comes into play if there is a large rate miss
+for the current frame (much too big or too small). This is a recode mechanism
+which allows the current frame to be re-encoded one or more times with a
+revised Q value. This obviously has significant implications for encode speed
+and in the case of RTC latency (hence it is not used for the RTC pathway).
+
+Whether or not a recode is allowed for a given frame depends on the selected
+encode speed vs quality trade off. This is set on the command line using the
+--cpu-used parameter which maps onto the \ref AV1_COMP.speed field in the main
+compressor instance data structure.
+
+The value of \ref AV1_COMP.speed, combined with the use case, is used to
+populate the speed features data structure AV1_COMP.sf. In particular
+\ref HIGH_LEVEL_SPEED_FEATURES.recode_loop determines the types of frames that
+may be recoded and \ref HIGH_LEVEL_SPEED_FEATURES.recode_tolerance is a rate
+error trigger threshold.
+
+For more information the reader is directed to the following data structures:
+
+- \ref AV1_COMP cpi (the main compressor instance data structure)
+ - \ref AV1_COMP.speed
+ - \ref AV1_COMP.sf (\ref SPEED_FEATURES)
+
+- \ref SPEED_FEATURES (Encode speed vs quality tradeoff parameters)
+ - \ref SPEED_FEATURES.hl_sf (\ref HIGH_LEVEL_SPEED_FEATURES)
+
+- \ref HIGH_LEVEL_SPEED_FEATURES
+ - \ref HIGH_LEVEL_SPEED_FEATURES.recode_loop
+ - \ref HIGH_LEVEL_SPEED_FEATURES.recode_tolerance
+
+and functions:
+
+- (TODO REF) encode_with_recode_loop()
+- (TODO REF) recode_loop_update_q()
+- (TODO REF) av1_set_speed_features_framesize_independent()
+- (TODO REF) av1_set_speed_features_framesize_dependent()
\subsection architecture_enc_fixed_q Fixed Q Mode