doc/dev_guide/av1_encoder.dox - aom - Git at Google

 /*!\page encoder_guide AV1 ENCODING TECHNIQUES

   AV1 encoding algorithm consists following modules:
     - \ref high_level_algo
       - \ref frame_coding_pipeline
       - \ref two_pass_algo
       - \ref look_ahead_buffer
     - \ref partition_search
     - \ref intra_mode_search
     - \ref inter_mode_search
     - \ref transform_search
     - \ref in_loop_filter
     - \ref in_loop_cdef
     - \ref in_loop_restoration
     - \ref rate_control
     */

 /*!\defgroup encoder_algo Encoder Algorithm
  *
  * The encoder algorithm describes how a sequence is encoded, including high
  * level decision as well as algorithm used at every encoding stage.
  */

 /*!\defgroup high_level_algo High-level Algorithm
  * \ingroup encoder_algo
  * This module describes sequence level/frame level algorithm in AV1.
  * More details will be added.
  * @{
  */

  /*!\defgroup frame_coding_pipeline Frame Coding Pipeline
     \ingroup high_level_algo

  To encode a frame, first call \ref av1_receive_raw_frame() to obtain the raw
  frame data. Then call \ref av1_get_compressed_data() to encode raw frame data
  into compressed frame data. The main body of \ref av1_get_compressed_data()
  is \ref av1_encode_strategy(), which determines high-level encode strategy
  (frame type, frame placement, etc.) and then encodes the frame by calling
  \ref av1_encode(). In \ref av1_encode(), \ref av1_first_pass() will execute
  the first_pass of two-pass encoding, while \ref encode_frame_to_data_rate()
  will perform the final pass for either one-pass or two-pass encoding.

  The main body of \ref encode_frame_to_data_rate() is
  \ref encode_with_recode_loop_and_filter(), which handles encoding before
  in-loop filters (with recode loops encode_with_recode_loop(), or without
  any recode loop \ref encode_without_recode()), followed by in-loop filters
  (deblocking filters \ref loopfilter_frame(), CDEF filters and restoration
  filters \ref cdef_restoration_frame()).

  Except for rate/quality control, both encode_with_recode_loop() and
  \ref encode_without_recode() call \ref av1_encode_frame() to manage reference
  frame buffers and to perform the rest of encoding that does not require
  operating external frames by \ref encode_frame_internal(), which is the
  starting point of \ref partition_search.
  */

  /*!\defgroup two_pass_algo Two Pass Mode
     \ingroup high_level_algo

  In two pass mode, the input file is passed into the encoder for a quick
  first pass, where statistics are gathered. These statistics and the input
  file are then passed back into the encoder for a second pass. The statistics
  help the encoder reach the desired bitrate without as much overshooting or
  undershooting.

  During the first pass, the codec will return "stats" packets that contain
  information useful for the second pass. The caller should concatenate these
  packets as they are received. In the second pass, the concatenated packets
  are passed in, along with the frames to encode. During the second pass,
  "frame" packets are returned that represent the compressed video.

  A complete example can be found in `examples/twopass_encoder.c`. Pseudocode
  is provided below to illustrate the core parts.

  During the first pass, the uncompressed frames are passed in and stats
  information is appended to a byte array.

 ~~~~~~~~~~~~~~~{.c}
 // For simplicity, assume that there is enough memory in the stats buffer.
 // Actual code will want to use a resizable array. stats_len represents
 // the length of data already present in the buffer.
 void get_stats_data(aom_codec_ctx_t *encoder, char *stats,
                     size_t *stats_len, bool *got_data) {
   const aom_codec_cx_pkt_t *pkt;
   aom_codec_iter_t iter = NULL;
   while ((pkt = aom_codec_get_cx_data(encoder, &iter))) {
     *got_data = true;
     if (pkt->kind != AOM_CODEC_STATS_PKT) continue;
     memcpy(stats + *stats_len, pkt->data.twopass_stats.buf,
            pkt->data.twopass_stats.sz);
     *stats_len += pkt->data.twopass_stats.sz;
   }
 }

 void first_pass(char *stats, size_t *stats_len) {
   struct aom_codec_enc_cfg first_pass_cfg;
   ... // Initialize the config as needed.
   first_pass_cfg.g_pass = AOM_RC_FIRST_PASS;
   aom_codec_ctx_t first_pass_encoder;
   ... // Initialize the encoder.

   while (frame_available) {
     // Read in the uncompressed frame, update frame_available
     aom_image_t *frame_to_encode = ...;
     aom_codec_encode(&first_pass_encoder, img, pts, duration, flags);
     get_stats_data(&first_pass_encoder, stats, stats_len);
   }
   // After all frames have been processed, call aom_codec_encode with
   // a NULL ptr repeatedly, until no more data is returned. The NULL
   // ptr tells the encoder that no more frames are available.
   bool got_data;
   do {
     got_data = false;
     aom_codec_encode(&first_pass_encoder, NULL, pts, duration, flags);
     get_stats_data(&first_pass_encoder, stats, stats_len, &got_data);
   } while (got_data);

   aom_codec_destroy(&first_pass_encoder);
 }
 ~~~~~~~~~~~~~~~

  During the second pass, the uncompressed frames and the stats are
  passed into the encoder.

 ~~~~~~~~~~~~~~~{.c}
 // Write out each encoded frame to the file.
 void get_cx_data(aom_codec_ctx_t *encoder, FILE *file,
                  bool *got_data) {
   const aom_codec_cx_pkt_t *pkt;
   aom_codec_iter_t iter = NULL;
   while ((pkt = aom_codec_get_cx_data(encoder, &iter))) {
    *got_data = true;
    if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) continue;
    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, file);
   }
 }

 void second_pass(char *stats, size_t stats_len) {
   struct aom_codec_enc_cfg second_pass_cfg;
   ... // Initialize the config file as needed.
   second_pass_cfg.g_pass = AOM_RC_LAST_PASS;
   cfg.rc_twopass_stats_in.buf = stats;
   cfg.rc_twopass_stats_in.sz = stats_len;
   aom_codec_ctx_t second_pass_encoder;
   ... // Initialize the encoder from the config.

   FILE *output = fopen("output.obu", "wb");
   while (frame_available) {
     // Read in the uncompressed frame, update frame_available
     aom_image_t *frame_to_encode = ...;
     aom_codec_encode(&second_pass_encoder, img, pts, duration, flags);
     get_cx_data(&second_pass_encoder, output);
   }
   // Pass in NULL to flush the encoder.
   bool got_data;
   do {
     got_data = false;
     aom_codec_encode(&second_pass_encoder, NULL, pts, duration, flags);
     get_cx_data(&second_pass_encoder, output, &got_data);
   } while (got_data);

   aom_codec_destroy(&second_pass_encoder);
 }
 ~~~~~~~~~~~~~~~
  */

  /*!\defgroup look_ahead_buffer The Look-Ahead Buffer
     \ingroup high_level_algo

  A program should call \ref aom_codec_encode() for each frame that needs
  processing. These frames are internally copied and stored in a fixed-size
  circular buffer, known as the look-ahead buffer. Other parts of the code
  will use future frame information to inform current frame decisions;
  examples include the first-pass algorithm, TPL model, and temporal filter.
  Note that this buffer also keeps a reference to the last source frame.

  The look-ahead buffer is defined in \ref av1/encoder/lookahead.h. It acts as an
  opaque structure, with an interface to create and free memory associated with
  it. It supports pushing and popping frames onto the structure in a FIFO
  fashion. It also allows look-ahead when using the \ref av1_lookahead_peek()
  function with a non-negative number, and look-behind when -1 is passed in (for
  the last source frame; e.g., firstpass will use this for motion estimation).
  The \ref av1_lookahead_depth() function returns the current number of frames
  stored in it. Note that \ref av1_lookahead_pop() is a bit of a misnomer - it
  only pops if either the "flush" variable is set, or the buffer is at maximum
  capacity.

  The buffer is stored in the \ref AV1_COMP::lookahead field.
  It is initialized in the first call to \ref aom_codec_encode(), in the
  \ref av1_receive_raw_frame() sub-routine. The buffer size is defined by
  the g_lag_in_frames parameter set in the
  \ref aom_codec_enc_cfg_t::g_lag_in_frames struct.
  This can be modified manually but should only be set once. On the command
  line, the flag "--lag-in-frames" controls it. The default size is 19 for
  non-realtime usage and 1 for realtime. Note that a maximum value of 35 is
  enforced.

  A frame will stay in the buffer as long as possible. As mentioned above,
  the \ref av1_lookahead_pop() only removes a frame when either flush is set,
  or the buffer is full. Note that each call to \ref aom_codec_encode() inserts
  another frame into the buffer, and pop is called by the sub-function
  \ref av1_encode_strategy(). The buffer is told to flush when
  \ref aom_codec_encode() is passed a NULL image pointer. Note that the caller
  must repeatedly call \ref aom_codec_encode() with a NULL image pointer, until
  no more packets are available, in order to fully flush the buffer.

  */

 /*! @} - end defgroup high_level_algo */

 /*!\defgroup partition_search Partition Search
  * \ingroup encoder_algo
  A frame is first split into tiles in \ref encode_tiles(), with each tile
  compressed by av1_encode_tile(). Then a tile is processed in superblock rows
  via \ref av1_encode_sb_row() and then \ref encode_sb_row().

  Partition search starts by superblocks that are sequentially processed in
  \ref encode_sb_row(). For a superblock, two search modes are supported
  corresponding to the encoding configurations, \ref encode_nonrd_sb() is for
  1-pass and real-time modes, while \ref encode_rd_sb() performs more
  exhaustive searches.

  Partition search over the recursive quad-tree space is implemented by
  recursively calling \ref nonrd_use_partition(), \ref rd_use_partition(), or
  rd_pick_partition() and returning best options for sub-trees to their
  parent partitions.

  In libaom, partition search lays on top of mode search (predictor, transform,
  etc.) instead of being a separate module, the interface of mode search is
  \ref pick_sb_modes(), which connects \ref partition_search with
  \ref inter_mode_search and \ref intra_mode_search. To make good decisions,
  reconstruction is also required in order to build references and contexts, it
  is implemented by \ref encode_sb() at sub-tree level and \ref encode_b() at
  coding block level.
  * @{
  */
 /*! @} - end defgroup partition_search */

 /*!\defgroup intra_mode_search Intra Mode Search
  * \ingroup encoder_algo
  * This module describes intra mode search algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup intra_mode_search */

 /*!\defgroup inter_mode_search Inter Mode Search
  * \ingroup encoder_algo
  * This module describes inter mode search algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup inter_mode_search */

 /*!\defgroup palette_mode_search Palette Mode Search
  * \ingroup intra_mode_search
  * This module describes palette mode search algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup palette_mode_search */

 /*!\defgroup transform_search Transform Search
  * \ingroup encoder_algo
  * This module describes transform search algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup transform_search */

 /*!\defgroup in_loop_filter In-loop Filter
  * \ingroup encoder_algo
  * This module describes in-loop filter algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup in_loop_filter */

 /*!\defgroup in_loop_cdef CDEF
  * \ingroup encoder_algo
  * This module describes the CDEF parameter search algorithm
  * in AV1. More details will be added.
  * @{
  */
 /*! @} - end defgroup in_loop_restoration */

 /*!\defgroup in_loop_restoration Loop Restoration
  * \ingroup encoder_algo
  * This module describes the loop restoration search
  * and estimation algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup in_loop_restoration */

 /*!\defgroup rate_control Rate Control
  * \ingroup encoder_algo
  * This module describes rate control algorithm in AV1.
  * More details will be added.
  * @{
  */
 /*! @} - end defgroup rate_control */
	/*!\page encoder_guide AV1 ENCODING TECHNIQUES

	AV1 encoding algorithm consists following modules:
	- \ref high_level_algo
	- \ref frame_coding_pipeline
	- \ref two_pass_algo
	- \ref look_ahead_buffer
	- \ref partition_search
	- \ref intra_mode_search
	- \ref inter_mode_search
	- \ref transform_search
	- \ref in_loop_filter
	- \ref in_loop_cdef
	- \ref in_loop_restoration
	- \ref rate_control
	*/

	/*!\defgroup encoder_algo Encoder Algorithm
	*
	* The encoder algorithm describes how a sequence is encoded, including high
	* level decision as well as algorithm used at every encoding stage.
	*/

	/*!\defgroup high_level_algo High-level Algorithm
	* \ingroup encoder_algo
	* This module describes sequence level/frame level algorithm in AV1.
	* More details will be added.
	* @{
	*/

	/*!\defgroup frame_coding_pipeline Frame Coding Pipeline
	\ingroup high_level_algo

	To encode a frame, first call \ref av1_receive_raw_frame() to obtain the raw
	frame data. Then call \ref av1_get_compressed_data() to encode raw frame data
	into compressed frame data. The main body of \ref av1_get_compressed_data()
	is \ref av1_encode_strategy(), which determines high-level encode strategy
	(frame type, frame placement, etc.) and then encodes the frame by calling
	\ref av1_encode(). In \ref av1_encode(), \ref av1_first_pass() will execute
	the first_pass of two-pass encoding, while \ref encode_frame_to_data_rate()
	will perform the final pass for either one-pass or two-pass encoding.

	The main body of \ref encode_frame_to_data_rate() is
	\ref encode_with_recode_loop_and_filter(), which handles encoding before
	in-loop filters (with recode loops encode_with_recode_loop(), or without
	any recode loop \ref encode_without_recode()), followed by in-loop filters
	(deblocking filters \ref loopfilter_frame(), CDEF filters and restoration
	filters \ref cdef_restoration_frame()).

	Except for rate/quality control, both encode_with_recode_loop() and
	\ref encode_without_recode() call \ref av1_encode_frame() to manage reference
	frame buffers and to perform the rest of encoding that does not require
	operating external frames by \ref encode_frame_internal(), which is the
	starting point of \ref partition_search.
	*/

	/*!\defgroup two_pass_algo Two Pass Mode
	\ingroup high_level_algo

	In two pass mode, the input file is passed into the encoder for a quick
	first pass, where statistics are gathered. These statistics and the input
	file are then passed back into the encoder for a second pass. The statistics
	help the encoder reach the desired bitrate without as much overshooting or
	undershooting.

	During the first pass, the codec will return "stats" packets that contain
	information useful for the second pass. The caller should concatenate these
	packets as they are received. In the second pass, the concatenated packets
	are passed in, along with the frames to encode. During the second pass,
	"frame" packets are returned that represent the compressed video.

	A complete example can be found in `examples/twopass_encoder.c`. Pseudocode
	is provided below to illustrate the core parts.

	During the first pass, the uncompressed frames are passed in and stats
	information is appended to a byte array.

	~~~~~~~~~~~~~~~{.c}
	// For simplicity, assume that there is enough memory in the stats buffer.
	// Actual code will want to use a resizable array. stats_len represents
	// the length of data already present in the buffer.
	void get_stats_data(aom_codec_ctx_t encoder, char stats,
	size_t stats_len, bool got_data) {
	const aom_codec_cx_pkt_t *pkt;
	aom_codec_iter_t iter = NULL;
	while ((pkt = aom_codec_get_cx_data(encoder, &iter))) {
	*got_data = true;
	if (pkt->kind != AOM_CODEC_STATS_PKT) continue;
	memcpy(stats + *stats_len, pkt->data.twopass_stats.buf,
	pkt->data.twopass_stats.sz);
	*stats_len += pkt->data.twopass_stats.sz;
	}
	}

	void first_pass(char stats, size_t stats_len) {
	struct aom_codec_enc_cfg first_pass_cfg;
	... // Initialize the config as needed.
	first_pass_cfg.g_pass = AOM_RC_FIRST_PASS;
	aom_codec_ctx_t first_pass_encoder;
	... // Initialize the encoder.

	while (frame_available) {
	// Read in the uncompressed frame, update frame_available
	aom_image_t *frame_to_encode = ...;
	aom_codec_encode(&first_pass_encoder, img, pts, duration, flags);
	get_stats_data(&first_pass_encoder, stats, stats_len);
	}
	// After all frames have been processed, call aom_codec_encode with
	// a NULL ptr repeatedly, until no more data is returned. The NULL
	// ptr tells the encoder that no more frames are available.
	bool got_data;
	do {
	got_data = false;
	aom_codec_encode(&first_pass_encoder, NULL, pts, duration, flags);
	get_stats_data(&first_pass_encoder, stats, stats_len, &got_data);
	} while (got_data);

	aom_codec_destroy(&first_pass_encoder);
	}
	~~~~~~~~~~~~~~~

	During the second pass, the uncompressed frames and the stats are
	passed into the encoder.

	~~~~~~~~~~~~~~~{.c}
	// Write out each encoded frame to the file.
	void get_cx_data(aom_codec_ctx_t encoder, FILE file,
	bool *got_data) {
	const aom_codec_cx_pkt_t *pkt;
	aom_codec_iter_t iter = NULL;
	while ((pkt = aom_codec_get_cx_data(encoder, &iter))) {
	*got_data = true;
	if (pkt->kind != AOM_CODEC_CX_FRAME_PKT) continue;
	fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, file);
	}
	}

	void second_pass(char *stats, size_t stats_len) {
	struct aom_codec_enc_cfg second_pass_cfg;
	... // Initialize the config file as needed.
	second_pass_cfg.g_pass = AOM_RC_LAST_PASS;
	cfg.rc_twopass_stats_in.buf = stats;
	cfg.rc_twopass_stats_in.sz = stats_len;
	aom_codec_ctx_t second_pass_encoder;
	... // Initialize the encoder from the config.

	FILE *output = fopen("output.obu", "wb");
	while (frame_available) {
	// Read in the uncompressed frame, update frame_available
	aom_image_t *frame_to_encode = ...;
	aom_codec_encode(&second_pass_encoder, img, pts, duration, flags);
	get_cx_data(&second_pass_encoder, output);
	}
	// Pass in NULL to flush the encoder.
	bool got_data;
	do {
	got_data = false;
	aom_codec_encode(&second_pass_encoder, NULL, pts, duration, flags);
	get_cx_data(&second_pass_encoder, output, &got_data);
	} while (got_data);

	aom_codec_destroy(&second_pass_encoder);
	}
	~~~~~~~~~~~~~~~
	*/

	/*!\defgroup look_ahead_buffer The Look-Ahead Buffer
	\ingroup high_level_algo

	A program should call \ref aom_codec_encode() for each frame that needs
	processing. These frames are internally copied and stored in a fixed-size
	circular buffer, known as the look-ahead buffer. Other parts of the code
	will use future frame information to inform current frame decisions;
	examples include the first-pass algorithm, TPL model, and temporal filter.
	Note that this buffer also keeps a reference to the last source frame.

	The look-ahead buffer is defined in \ref av1/encoder/lookahead.h. It acts as an
	opaque structure, with an interface to create and free memory associated with
	it. It supports pushing and popping frames onto the structure in a FIFO
	fashion. It also allows look-ahead when using the \ref av1_lookahead_peek()
	function with a non-negative number, and look-behind when -1 is passed in (for
	the last source frame; e.g., firstpass will use this for motion estimation).
	The \ref av1_lookahead_depth() function returns the current number of frames
	stored in it. Note that \ref av1_lookahead_pop() is a bit of a misnomer - it
	only pops if either the "flush" variable is set, or the buffer is at maximum
	capacity.

	The buffer is stored in the \ref AV1_COMP::lookahead field.
	It is initialized in the first call to \ref aom_codec_encode(), in the
	\ref av1_receive_raw_frame() sub-routine. The buffer size is defined by
	the g_lag_in_frames parameter set in the
	\ref aom_codec_enc_cfg_t::g_lag_in_frames struct.
	This can be modified manually but should only be set once. On the command
	line, the flag "--lag-in-frames" controls it. The default size is 19 for
	non-realtime usage and 1 for realtime. Note that a maximum value of 35 is
	enforced.

	A frame will stay in the buffer as long as possible. As mentioned above,
	the \ref av1_lookahead_pop() only removes a frame when either flush is set,
	or the buffer is full. Note that each call to \ref aom_codec_encode() inserts
	another frame into the buffer, and pop is called by the sub-function
	\ref av1_encode_strategy(). The buffer is told to flush when
	\ref aom_codec_encode() is passed a NULL image pointer. Note that the caller
	must repeatedly call \ref aom_codec_encode() with a NULL image pointer, until
	no more packets are available, in order to fully flush the buffer.

	*/

	/! @} - end defgroup high_level_algo /

	/*!\defgroup partition_search Partition Search
	* \ingroup encoder_algo
	A frame is first split into tiles in \ref encode_tiles(), with each tile
	compressed by av1_encode_tile(). Then a tile is processed in superblock rows
	via \ref av1_encode_sb_row() and then \ref encode_sb_row().

	Partition search starts by superblocks that are sequentially processed in
	\ref encode_sb_row(). For a superblock, two search modes are supported
	corresponding to the encoding configurations, \ref encode_nonrd_sb() is for
	1-pass and real-time modes, while \ref encode_rd_sb() performs more
	exhaustive searches.

	Partition search over the recursive quad-tree space is implemented by
	recursively calling \ref nonrd_use_partition(), \ref rd_use_partition(), or
	rd_pick_partition() and returning best options for sub-trees to their
	parent partitions.

	In libaom, partition search lays on top of mode search (predictor, transform,
	etc.) instead of being a separate module, the interface of mode search is
	\ref pick_sb_modes(), which connects \ref partition_search with
	\ref inter_mode_search and \ref intra_mode_search. To make good decisions,
	reconstruction is also required in order to build references and contexts, it
	is implemented by \ref encode_sb() at sub-tree level and \ref encode_b() at
	coding block level.
	* @{
	*/
	/! @} - end defgroup partition_search /

	/*!\defgroup intra_mode_search Intra Mode Search
	* \ingroup encoder_algo
	* This module describes intra mode search algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup intra_mode_search /

	/*!\defgroup inter_mode_search Inter Mode Search
	* \ingroup encoder_algo
	* This module describes inter mode search algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup inter_mode_search /

	/*!\defgroup palette_mode_search Palette Mode Search
	* \ingroup intra_mode_search
	* This module describes palette mode search algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup palette_mode_search /

	/*!\defgroup transform_search Transform Search
	* \ingroup encoder_algo
	* This module describes transform search algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup transform_search /

	/*!\defgroup in_loop_filter In-loop Filter
	* \ingroup encoder_algo
	* This module describes in-loop filter algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup in_loop_filter /

	/*!\defgroup in_loop_cdef CDEF
	* \ingroup encoder_algo
	* This module describes the CDEF parameter search algorithm
	* in AV1. More details will be added.
	* @{
	*/
	/! @} - end defgroup in_loop_restoration /

	/*!\defgroup in_loop_restoration Loop Restoration
	* \ingroup encoder_algo
	* This module describes the loop restoration search
	* and estimation algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup in_loop_restoration /

	/*!\defgroup rate_control Rate Control
	* \ingroup encoder_algo
	* This module describes rate control algorithm in AV1.
	* More details will be added.
	* @{
	*/
	/! @} - end defgroup rate_control /