Refactor av1_cyclic_refresh_postencode() function

This patch reduces post-encode stage overhead by moving
cyclic refresh counters accumulation within encode stage.

Change-Id: I4e06298ce6bffed8b8daad4188007a1b24b22f27
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 3308ffe..c7abe43 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -16,6 +16,7 @@
 #include "av1/encoder/aq_cyclicrefresh.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/segmentation.h"
+#include "av1/encoder/tokenize.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_ports/system_state.h"
 
@@ -146,11 +147,13 @@
   return bits_per_mb;
 }
 
-void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
-                                       MB_MODE_INFO *const mbmi, int mi_row,
-                                       int mi_col, BLOCK_SIZE bsize,
-                                       int64_t rate, int64_t dist, int skip) {
+void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, MACROBLOCK *const x,
+                                       int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                       int64_t rate, int64_t dist, int skip,
+                                       RUN_TYPE dry_run) {
   const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = xd->mi[0];
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   const int bw = mi_size_wide[bsize];
   const int bh = mi_size_high[bsize];
@@ -189,12 +192,37 @@
   // Update entries in the cyclic refresh map with new_map_value, and
   // copy mbmi->segment_id into global segmentation map.
   // 8x8 is smallest coding block size for non-key frames.
-  for (int y = 0; y < ymis; y += 2)
-    for (int x = 0; x < xmis; x += 2) {
-      int map_offset = block_index + y * cm->mi_params.mi_cols + x;
+  const int sh = bw << 1;
+  for (int mi_y = 0; mi_y < ymis; mi_y += 2) {
+    for (int mi_x = 0; mi_x < xmis; mi_x += 2) {
+      int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
       cr->map[map_offset] = new_map_value;
       cpi->enc_seg.map[map_offset] = mbmi->segment_id;
     }
+    // Accumulate cyclic refresh update counters.
+    if (!dry_run && !frame_is_intra_only(cm)) {
+      if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
+        x->actual_num_seg1_blocks += sh;
+      else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
+               CR_SEGMENT_ID_BOOST2)
+        x->actual_num_seg2_blocks += sh;
+    }
+  }
+}
+
+// Initializes counters used for cyclic refresh.
+void av1_init_cyclic_refresh_counters(MACROBLOCK *const x) {
+  x->actual_num_seg1_blocks = 0;
+  x->actual_num_seg2_blocks = 0;
+  x->cnt_zeromv = 0;
+}
+
+// Accumulate cyclic refresh counters.
+void av1_accumulate_cyclic_refresh_counters(
+    CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x) {
+  cyclic_refresh->actual_num_seg1_blocks += x->actual_num_seg1_blocks;
+  cyclic_refresh->actual_num_seg2_blocks += x->actual_num_seg2_blocks;
+  cyclic_refresh->cnt_zeromv += x->cnt_zeromv;
 }
 
 void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
@@ -203,41 +231,15 @@
   CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
   RATE_CONTROL *const rc = &cpi->rc;
   SVC *const svc = &cpi->svc;
-  unsigned char *const seg_map = cpi->enc_seg.map;
-  int cnt_zeromv = 0;
-  cr->actual_num_seg1_blocks = 0;
-  cr->actual_num_seg2_blocks = 0;
-  // 8X8 blocks are smallest partition used on delta frames.
-  for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += 2) {
-    MB_MODE_INFO **mi = mi_params->mi_grid_base + mi_row * mi_params->mi_stride;
-    int sh = 2;
-    for (int mi_col = 0; mi_col < mi_params->mi_cols; mi_col += sh) {
-      sh = mi_size_wide[mi[0]->bsize];
-      MV mv = mi[0]->mv[0].as_mv;
-      if (cm->seg.enabled) {
-        int map_index = mi_row * mi_params->mi_cols + mi_col;
-        if (cyclic_refresh_segment_id(seg_map[map_index]) ==
-            CR_SEGMENT_ID_BOOST1)
-          cr->actual_num_seg1_blocks += sh << 1;
-        else if (cyclic_refresh_segment_id(seg_map[map_index]) ==
-                 CR_SEGMENT_ID_BOOST2)
-          cr->actual_num_seg2_blocks += sh << 1;
-      }
-      // Accumulate low_content_frame.
-      if (is_inter_block(mi[0]) && mi[0]->ref_frame[0] == LAST_FRAME &&
-          abs(mv.row) < 8 && abs(mv.col) < 8)
-        cnt_zeromv += sh << 1;
-      if (mi_col + sh < mi_params->mi_cols) {
-        mi += sh;
-      }
-    }
-  }
-  cnt_zeromv = 100 * cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
+  const int avg_cnt_zeromv =
+      100 * cr->cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
+
   if (!cpi->use_svc ||
       (cpi->use_svc &&
        !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
        cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
-    rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) / 4;
+    rc->avg_frame_low_motion =
+        (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
     // For SVC: set avg_frame_low_motion (only computed on top spatial layer)
     // to all lower spatial layers.
     if (cpi->use_svc &&
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index 23d1f16..97bd6f2 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -13,6 +13,8 @@
 #define AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_
 
 #include "av1/common/blockd.h"
+#include "av1/encoder/block.h"
+#include "av1/encoder/tokenize.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -70,6 +72,10 @@
    */
   int rdmult;
   /*!
+   * Count of zero motion vectors
+   */
+  int cnt_zeromv;
+  /*!
    * Cyclic refresh map.
    */
   int8_t *map;
@@ -166,21 +172,59 @@
  * \callergraph
  *
  * \param[in]   cpi       Top level encoder structure
- * \param[in]   mbmi      MB_MODE_INFO pointer for mi block
+ * \param[in]   x         Pointer to MACROBLOCK structure
  * \param[in]   mi_row    Row coordinate of the block in a step size of MI_SIZE
  * \param[in]   mi_col    Col coordinate of the block in a step size of MI_SIZE
  * \param[in]   bsize     Block size
  * \param[in]   rate      Projected block rate from pickmode
  * \param[in]   dist      Projected block dist from pickmode
- * \param[in]  skip       Skip flag set from picmode
+ * \param[in]   skip      Skip flag set from picmode
+ * \param[in]   dry_run   A code indicating whether it is part of the final
+ *                         pass for reconstructing the superblock
  *
  * \return Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
  * the \c cm->cpi->enc_seg.map.
  */
 void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
-                                       MB_MODE_INFO *const mbmi, int mi_row,
+                                       MACROBLOCK *const x, int mi_row,
                                        int mi_col, BLOCK_SIZE bsize,
-                                       int64_t rate, int64_t dist, int skip);
+                                       int64_t rate, int64_t dist, int skip,
+                                       RUN_TYPE dry_run);
+
+/*!\brief Initialize counters used for cyclic refresh.
+ *
+ * Initializes cyclic refresh counters cnt_zeromv, actual_num_seg1_blocks and
+ * actual_num_seg2_blocks.
+ *
+ * \ingroup cyclic_refresh
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in]   x         Pointer to MACROBLOCK structure
+ *
+ * \return Update the \c x->cnt_zeromv, the \c x->actual_num_seg1_blocks and
+ * the \c x->actual_num_seg1_blocks.
+ */
+void av1_init_cyclic_refresh_counters(MACROBLOCK *const x);
+
+/*!\brief Accumulate cyclic refresh counters.
+ *
+ * Accumulates cyclic refresh counters cnt_zeromv, actual_num_seg1_blocks and
+ * actual_num_seg2_blocks from MACROBLOCK strcture to CYCLIC_REFRESH strcture.
+ *
+ * \ingroup cyclic_refresh
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in]   cyclic_refresh Pointer to CYCLIC_REFRESH structure
+ * \param[in]   x              Pointer to MACROBLOCK structure
+ *
+ * \return Update the \c cyclic_refresh->cnt_zeromv, the \c
+ * cyclic_refresh->actual_num_seg1_blocks and the \c
+ * cyclic_refresh->actual_num_seg1_blocks.
+ */
+void av1_accumulate_cyclic_refresh_counters(
+    CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x);
 
 /*!\brief Update stats after encoding frame.
  *
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9993acb..03dae3e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -969,6 +969,22 @@
    * set 0 and all txfms are skipped.
    */
   int seg_skip_block;
+
+  /*! \brief Number of segment 1 blocks
+   * Actual number of (4x4) blocks that were applied delta-q,
+   * for segment 1.
+   */
+  int actual_num_seg1_blocks;
+
+  /*!\brief Number of segment 2 blocks
+   * Actual number of (4x4) blocks that were applied delta-q,
+   * for segment 2.
+   */
+  int actual_num_seg2_blocks;
+
+  /*!\brief Number of zero motion vectors
+   */
+  int cnt_zeromv;
   /**@}*/
 
   /*****************************************************************************
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 345ad12..24d3488 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1048,7 +1048,15 @@
       cpi->td.deltaq_used = 0;
       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
+      // Reset cyclic refresh counters.
+      av1_init_cyclic_refresh_counters(&cpi->td.mb);
+
       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
+      // Accumulate cyclic refresh params.
+      if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+          !frame_is_intra_only(&cpi->common))
+        av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
+                                               &cpi->td.mb);
       cpi->intrabc_used |= cpi->td.intrabc_used;
       cpi->deltaq_used |= cpi->td.deltaq_used;
     }
@@ -1388,6 +1396,10 @@
     // base_qindex
     cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
     cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
+  } else {
+    cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
+    cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
+    cpi->cyclic_refresh->cnt_zeromv = 0;
   }
 
   av1_frame_init_quantizer(cpi);
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 6251233..efecc79 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -256,14 +256,26 @@
     // Else for cyclic refresh mode update the segment map, set the segment id
     // and then update the quantizer.
     if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
-      av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
+      av1_cyclic_refresh_update_segment(cpi, x, mi_row, mi_col, bsize,
                                         ctx->rd_stats.rate, ctx->rd_stats.dist,
-                                        txfm_info->skip_txfm);
+                                        txfm_info->skip_txfm, dry_run);
     }
     if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
       mi_addr->uv_mode = UV_DC_PRED;
   }
 
+  // Count zero motion vector.
+  if (!dry_run && cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+      !frame_is_intra_only(cm)) {
+    const MV mv = mi->mv[0].as_mv;
+    if (is_inter_block(mi) && mi->ref_frame[0] == LAST_FRAME &&
+        abs(mv.row) < 8 && abs(mv.col) < 8) {
+      const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
+      // Accumulate low_content_frame.
+      for (int mi_y = 0; mi_y < ymis; mi_y += 2) x->cnt_zeromv += bw << 1;
+    }
+  }
+
   for (i = 0; i < num_planes; ++i) {
     p[i].coeff = ctx->coeff[i];
     p[i].qcoeff = ctx->qcoeff[i];
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 0a9325c..3735ca3 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -775,6 +775,11 @@
     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
     cpi->intrabc_used |= thread_data->td->intrabc_used;
     cpi->deltaq_used |= thread_data->td->deltaq_used;
+    // Accumulate cyclic refresh params.
+    if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
+        !frame_is_intra_only(&cpi->common))
+      av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
+                                             &thread_data->td->mb);
     if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
       aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
       thread_data->td->mb.txfm_search_info.txb_rd_records = NULL;
@@ -842,6 +847,9 @@
                sizeof(MvCosts));
       }
     }
+    // Reset cyclic refresh counters.
+    av1_init_cyclic_refresh_counters(&thread_data->td->mb);
+
     if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
       CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
                       (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));