Documents main functions for intra-mode coding

This CL starts the work on documenting intra-mode coding in libaom. This
contains the following two main changes:
  - The code flows for luma/chroma coding and inter/intra frame coding
    are made slightly more similar. For example, an analog of
    rd_pick_filter_intra_sby for intraframe coding is added to
    interframe coding flow. However, the analogy is limited right
    before the luma/chroma code path is too intermingled in
    av1_handle_intra_mode.
  - Some basic documentations are added to the main intra-mode, rdopt,
    non-skip, functions. Doxygen support will be added in the next CL.

BUG=aomedia:2712

Change-Id: Icaa02735c5df7980c0d34314f41e66e6afe4fba3
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f0980d7..a20d179 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -718,7 +718,7 @@
       bsize < BLOCK_16X16)
     av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
   else
-    av1_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
+    av1_nonrd_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
 }
 
 static AOM_INLINE void pick_sb_modes(AV1_COMP *const cpi,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 151fd98..a5f3890 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -61,6 +61,7 @@
 #include "av1/encoder/firstpass.h"
 #include "av1/encoder/grain_test_vectors.h"
 #include "av1/encoder/hash_motion.h"
+#include "av1/encoder/intra_mode_search.h"
 #include "av1/encoder/mv_prec.h"
 #include "av1/encoder/pass2_strategy.h"
 #include "av1/encoder/picklpf.h"
diff --git a/av1/encoder/encodetxb.h b/av1/encoder/encodetxb.h
index 7122895..bed1393 100644
--- a/av1/encoder/encodetxb.h
+++ b/av1/encoder/encodetxb.h
@@ -88,6 +88,17 @@
 CB_COEFF_BUFFER *av1_get_cb_coeff_buffer(const struct AV1_COMP *cpi, int mi_row,
                                          int mi_col);
 
+// Returns the rate cost associated with skipping the current transform block.
+static INLINE int av1_cost_skip_txb(const CoeffCosts *coeff_costs,
+                                    const TXB_CTX *const txb_ctx, int plane,
+                                    TX_SIZE tx_size) {
+  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const LV_MAP_COEFF_COST *const coeff_costs_ =
+      &coeff_costs->coeff_costs[txs_ctx][plane_type];
+  return coeff_costs_->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
+}
+
 // These numbers are empirically obtained.
 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
   { 17, 13 },
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index 52d3c34..fd53137 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -191,7 +191,8 @@
 
 #undef BINS
 
-// Model based RD estimation for luma intra blocks.
+// Makes a quick luma prediction and estimate the rdcost with a model without
+// going through the whole txfm/quantize/itxfm process.
 static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
                                BLOCK_SIZE bsize, int mode_cost) {
   const AV1_COMMON *cm = &cpi->common;
@@ -820,6 +821,7 @@
                                                                       3, 3, 3,
                                                                       3, 3, 3 };
 
+// Searches for the best palette in the luma plane.
 static void rd_pick_palette_intra_sby(
     const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
     int dc_mode_cost, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
@@ -1018,6 +1020,7 @@
   *mbmi = *best_mbmi;
 }
 
+// Searches for the best palette in the chroma plane.
 static AOM_INLINE void rd_pick_palette_intra_sbuv(
     const AV1_COMP *const cpi, MACROBLOCK *x, int dc_mode_cost,
     uint8_t *best_palette_color_map, MB_MODE_INFO *const best_mbmi,
@@ -1209,40 +1212,6 @@
                            plane_block_height);
 }
 
-static AOM_INLINE void choose_intra_uv_mode(
-    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
-    TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv,
-    int *skip_uv, UV_PREDICTION_MODE *mode_uv) {
-  const AV1_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  // Use an estimated rd for uv_intra based on DC_PRED if the
-  // appropriate speed flag is set.
-  init_sbuv_mode(mbmi);
-  if (!xd->is_chroma_ref) {
-    *rate_uv = 0;
-    *rate_uv_tokenonly = 0;
-    *dist_uv = 0;
-    *skip_uv = 1;
-    *mode_uv = UV_DC_PRED;
-    return;
-  }
-
-  // Only store reconstructed luma when there's chroma RDO. When there's no
-  // chroma RDO, the reconstructed luma will be stored in encode_superblock().
-  xd->cfl.store_y = store_cfl_required_rdo(cm, x);
-  if (xd->cfl.store_y) {
-    // Restore reconstructed luma values.
-    av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
-                                 DRY_RUN_NORMAL,
-                                 cpi->optimize_seg_arr[mbmi->segment_id]);
-    xd->cfl.store_y = 0;
-  }
-  av1_rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
-                              skip_uv, bsize, max_tx_size);
-  *mode_uv = mbmi->uv_mode;
-}
-
 // Run RD calculation with given chroma intra prediction angle., and return
 // the RD cost. Update the best mode info. if the RD cost is the best so far.
 static int64_t pick_intra_angle_routine_sbuv(
@@ -1457,6 +1426,7 @@
                                     int *rate, int *rate_tokenonly,
                                     int64_t *distortion, int *skippable,
                                     BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
   assert(!is_inter_block(mbmi));
@@ -1465,6 +1435,33 @@
   const ModeCosts *mode_costs = &x->mode_costs;
   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
 
+  init_sbuv_mode(mbmi);
+
+  // Return if the current block does not correspond to a chroma block.
+  if (!xd->is_chroma_ref) {
+    *rate = 0;
+    *rate_tokenonly = 0;
+    *distortion = 0;
+    *skippable = 1;
+    return INT64_MAX;
+  }
+
+  // Only store reconstructed luma when there's chroma RDO. When there's no
+  // chroma RDO, the reconstructed luma will be stored in encode_superblock().
+  xd->cfl.store_y = store_cfl_required_rdo(cm, x);
+  if (xd->cfl.store_y) {
+    // Restore reconstructed luma values.
+    // TODO(chiyotsai@google.com): right now we are re-computing the txfm in
+    // this function everytime we search through uv modes. There is some
+    // potential speed up here if we cache the result to avoid redundant
+    // computation.
+    av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y,
+                                 DRY_RUN_NORMAL,
+                                 cpi->optimize_seg_arr[mbmi->segment_id]);
+    xd->cfl.store_y = 0;
+  }
+
+  // Search through all non-palette modes.
   for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
     int this_rate;
     RD_STATS tokenonly_rd_stats;
@@ -1480,6 +1477,8 @@
     if (!intra_mode_cfg->enable_paeth_intra && mode == UV_PAETH_PRED) continue;
 
     mbmi->uv_mode = mode;
+
+    // Init variables for cfl and angle delta
     int cfl_alpha_rate = 0;
     if (mode == UV_CFL_PRED) {
       if (!is_cfl_allowed(xd) || !intra_mode_cfg->enable_cfl_intra) continue;
@@ -1489,14 +1488,17 @@
       if (cfl_alpha_rate == INT_MAX) continue;
     }
     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
+
     if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type) &&
         intra_mode_cfg->enable_angle_delta) {
+      // Search through angle delta
       const int rate_overhead =
           mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
       if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
                                     &this_rate, &tokenonly_rd_stats))
         continue;
     } else {
+      // Predict directly if we don't need to search for angle delta.
       if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
         continue;
       }
@@ -1525,6 +1527,7 @@
     }
   }
 
+  // Search palette mode
   const int try_palette =
       cpi->oxcf.enable_palette &&
       av1_allow_palette(cpi->common.features.allow_screen_content_tools,
@@ -1545,6 +1548,7 @@
   return best_rd;
 }
 
+// Searches palette mode for luma channel in inter frame.
 int av1_search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x,
                             RD_STATS *this_rd_cost, PICK_MODE_CONTEXT *ctx,
                             BLOCK_SIZE bsize, MB_MODE_INFO *const mbmi,
@@ -1559,7 +1563,6 @@
   int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
           best_model_rd_palette = INT64_MAX;
   int skippable = 0;
-  TX_SIZE uv_tx = TX_4X4;
   uint8_t *const best_palette_color_map =
       x->palette_buffer->best_palette_color_map;
   uint8_t *const color_map = xd->plane[0].color_index_map;
@@ -1598,15 +1601,19 @@
   distortion2 = rd_stats_y.dist;
   rate2 = rd_stats_y.rate + ref_costs_single[INTRA_FRAME];
   if (num_planes > 1) {
-    uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
     if (intra_search_state->rate_uv_intra == INT_MAX) {
-      choose_intra_uv_mode(
-          cpi, x, bsize, uv_tx, &intra_search_state->rate_uv_intra,
-          &intra_search_state->rate_uv_tokenonly, &intra_search_state->dist_uvs,
-          &intra_search_state->skip_uvs, &intra_search_state->mode_uv);
+      // We have not found any good uv mode yet, so we need to search for it.
+      TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
+      av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
+                                  &intra_search_state->rate_uv_tokenonly,
+                                  &intra_search_state->dist_uvs,
+                                  &intra_search_state->skip_uvs, bsize, uv_tx);
+      intra_search_state->mode_uv = mbmi->uv_mode;
       intra_search_state->pmi_uv = *pmi;
       intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
     }
+
+    // We have found at least one good uv mode before, so copy and pate it over.
     mbmi->uv_mode = intra_search_state->mode_uv;
     pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
     if (pmi->palette_size[1] > 0) {
@@ -1635,6 +1642,8 @@
 }
 
 // Given selected prediction mode, search for the best tx type and size.
+// Currently this is only used in the intra frame code path for winner-mode
+// processing.
 static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
                                       BLOCK_SIZE bsize, const int *bmode_costs,
                                       int64_t *best_rd, int *rate,
@@ -1743,6 +1752,67 @@
   return best_rd;
 }
 
+// Searches through filter_intra mode in inter frame.
+static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
+                                            BLOCK_SIZE bsize,
+                                            const PICK_MODE_CONTEXT *ctx,
+                                            RD_STATS *rd_stats_y, int mode_cost,
+                                            int64_t best_rd,
+                                            int64_t best_rd_so_far) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = xd->mi[0];
+  assert(mbmi->mode == DC_PRED &&
+         av1_filter_intra_allowed_bsize(&cpi->common, bsize));
+
+  RD_STATS rd_stats_y_fi;
+  int filter_intra_selected_flag = 0;
+  TX_SIZE best_tx_size = mbmi->tx_size;
+  FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
+  uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
+  memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
+         sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
+  uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
+  av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
+  mbmi->filter_intra_mode_info.use_filter_intra = 1;
+  for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES;
+       ++fi_mode) {
+    mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
+    av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
+    if (rd_stats_y_fi.rate == INT_MAX) continue;
+    const int this_rate_tmp =
+        rd_stats_y_fi.rate +
+        intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
+    const int64_t this_rd_tmp =
+        RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
+
+    if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) {
+      break;
+    }
+    if (this_rd_tmp < best_rd_so_far) {
+      best_tx_size = mbmi->tx_size;
+      av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
+      memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
+             sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
+      best_fi_mode = fi_mode;
+      *rd_stats_y = rd_stats_y_fi;
+      filter_intra_selected_flag = 1;
+      best_rd_so_far = this_rd_tmp;
+    }
+  }
+
+  mbmi->tx_size = best_tx_size;
+  av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
+  memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
+         sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
+
+  if (filter_intra_selected_flag) {
+    mbmi->filter_intra_mode_info.use_filter_intra = 1;
+    mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
+  } else {
+    mbmi->filter_intra_mode_info.use_filter_intra = 0;
+  }
+}
+
 int64_t av1_handle_intra_mode(IntraModeSearchState *intra_search_state,
                               const AV1_COMP *cpi, MACROBLOCK *x,
                               BLOCK_SIZE bsize, int ref_frame_cost,
@@ -1779,11 +1849,11 @@
   if (is_directional_mode && av1_use_angle_delta(bsize) &&
       cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
     if (sf->intra_sf.intra_pruning_with_hog &&
-        !intra_search_state->angle_stats_ready) {
+        !intra_search_state->dir_mode_skip_mask_ready) {
       prune_intra_mode_with_hog(x, bsize,
                                 cpi->sf.intra_sf.intra_pruning_with_hog_thresh,
                                 intra_search_state->directional_mode_skip_mask);
-      intra_search_state->angle_stats_ready = 1;
+      intra_search_state->dir_mode_skip_mask_ready = 1;
     }
     if (intra_search_state->directional_mode_skip_mask[mode]) return INT64_MAX;
     av1_init_rd_stats(rd_stats_y);
@@ -1813,54 +1883,8 @@
     }
 
     if (try_filter_intra) {
-      RD_STATS rd_stats_y_fi;
-      int filter_intra_selected_flag = 0;
-      TX_SIZE best_tx_size = mbmi->tx_size;
-      FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
-      uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
-      memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
-             sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
-      uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
-      av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
-      mbmi->filter_intra_mode_info.use_filter_intra = 1;
-      for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
-           fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
-        mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
-        av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize,
-                                          best_rd);
-        if (rd_stats_y_fi.rate == INT_MAX) continue;
-        const int this_rate_tmp =
-            rd_stats_y_fi.rate +
-            intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
-        const int64_t this_rd_tmp =
-            RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
-
-        if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) {
-          break;
-        }
-        if (this_rd_tmp < best_rd_so_far) {
-          best_tx_size = mbmi->tx_size;
-          av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
-          memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
-                 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
-          best_fi_mode = fi_mode;
-          *rd_stats_y = rd_stats_y_fi;
-          filter_intra_selected_flag = 1;
-          best_rd_so_far = this_rd_tmp;
-        }
-      }
-
-      mbmi->tx_size = best_tx_size;
-      av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
-      memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
-             sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
-
-      if (filter_intra_selected_flag) {
-        mbmi->filter_intra_mode_info.use_filter_intra = 1;
-        mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
-      } else {
-        mbmi->filter_intra_mode_info.use_filter_intra = 0;
-      }
+      handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost,
+                               best_rd, best_rd_so_far);
     }
   }
 
@@ -1877,8 +1901,8 @@
         cpi->oxcf.enable_palette &&
         av1_allow_palette(cm->features.allow_screen_content_tools,
                           mbmi->sb_type);
-    const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
     if (intra_search_state->rate_uv_intra == INT_MAX) {
+      // If no good uv-predictor had been found, search for it.
       const int rate_y = rd_stats_y->skip_txfm
                              ? mode_costs->skip_txfm_cost[skip_ctx][1]
                              : rd_stats_y->rate;
@@ -1888,10 +1912,12 @@
         intra_search_state->skip_intra_modes = 1;
         return INT64_MAX;
       }
-      choose_intra_uv_mode(
-          cpi, x, bsize, uv_tx, &intra_search_state->rate_uv_intra,
-          &intra_search_state->rate_uv_tokenonly, &intra_search_state->dist_uvs,
-          &intra_search_state->skip_uvs, &intra_search_state->mode_uv);
+      const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
+      av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
+                                  &intra_search_state->rate_uv_tokenonly,
+                                  &intra_search_state->dist_uvs,
+                                  &intra_search_state->skip_uvs, bsize, uv_tx);
+      intra_search_state->mode_uv = mbmi->uv_mode;
       if (try_palette) intra_search_state->pmi_uv = *pmi;
       intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
 
@@ -1899,11 +1925,17 @@
       const int64_t uv_dist = intra_search_state->dist_uvs;
       const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
       if (uv_rd > best_rd) {
+        // If there is no good intra uv-mode available, we can skip all intra
+        // modes.
         intra_search_state->skip_intra_modes = 1;
         return INT64_MAX;
       }
     }
 
+    // If we are here, then the encoder has found at least one good intra uv
+    // predictor, so we can directly copy its statistics over.
+    // TODO(any): the stats here is probably not right if the current best mode
+    // is cfl.
     rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly;
     rd_stats_uv->dist = intra_search_state->dist_uvs;
     rd_stats_uv->skip_txfm = intra_search_state->skip_uvs;
@@ -1964,7 +1996,7 @@
   return this_rd;
 }
 
-// This function is used only for intra_only frames
+// Finds the best non-intrabc mode on an intra frame.
 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    int *rate, int *rate_tokenonly,
                                    int64_t *distortion, int *skippable,
@@ -2011,7 +2043,7 @@
   av1_zero(x->winner_mode_stats);
   x->winner_mode_count = 0;
 
-  /* Y Search for intra prediction mode */
+  // Searches the intra-modes except for intrabc, palette, and filter_intra.
   for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
     RD_STATS this_rd_stats;
     int this_rate, this_rate_tokenonly, s;
@@ -2036,11 +2068,16 @@
     if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
     if (is_directional_mode && av1_use_angle_delta(bsize) &&
         cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
+      // Searches through the best angle_delta if this option is available.
       this_rd_stats.rate = INT_MAX;
       rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
                               bmode_costs[mbmi->mode], best_rd, &best_model_rd,
                               1);
     } else {
+      // Builds the actual prediction. The prediction from
+      // model_intra_yrd_and_prune was just an estimation that did not take into
+      // account the effect of txfm pipeline, so we need to redo it for real
+      // here.
       av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
     }
     this_rate_tokenonly = this_rd_stats.rate;
@@ -2083,6 +2120,7 @@
     }
   }
 
+  // Searches palette
   if (try_palette) {
     rd_pick_palette_intra_sby(
         cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
@@ -2090,6 +2128,7 @@
         &beat_best_rd, ctx, ctx->blk_skip, ctx->tx_type_map);
   }
 
+  // Searches filter_intra
   if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
     if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
                                  skippable, bsize, bmode_costs[DC_PRED],
@@ -2097,6 +2136,7 @@
       best_mbmi = *mbmi;
     }
   }
+
   // No mode is identified with less rd value than best_rd passed to this
   // function. In such cases winner mode processing is not necessary and return
   // best_rd as INT64_MAX to indicate best mode is not identified
diff --git a/av1/encoder/intra_mode_search.h b/av1/encoder/intra_mode_search.h
index 4b5d31c..e3c6a4b 100644
--- a/av1/encoder/intra_mode_search.h
+++ b/av1/encoder/intra_mode_search.h
@@ -18,11 +18,19 @@
 extern "C" {
 #endif
 
+// Keeps track of the intra-mode search process during inter frame coding.
 typedef struct IntraModeSearchState {
-  int skip_intra_modes;
+  // The best luma intra-mode found so far
   PREDICTION_MODE best_intra_mode;
-  int angle_stats_ready;
+
+  // Terminate intra-mode search
+  int skip_intra_modes;
+  // Skip the directional mode
+  int dir_mode_skip_mask_ready;
   uint8_t directional_mode_skip_mask[INTRA_MODES];
+
+  // Saving a copy of the current best chroma prediction and their statistics so
+  // we don't have to recompute it every time handle_inter_mode is called.
   int rate_uv_intra;
   int rate_uv_tokenonly;
   int64_t dist_uvs;
@@ -30,23 +38,15 @@
   UV_PREDICTION_MODE mode_uv;
   PALETTE_MODE_INFO pmi_uv;
   int8_t uv_angle_delta;
+
+  // Keep track of the best intra rd so it can be used in compound mode.
   int64_t best_pred_rd[REFERENCE_MODES];
 } IntraModeSearchState;
 
-void av1_restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x);
-int av1_search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x,
-                            RD_STATS *this_rd_cost, PICK_MODE_CONTEXT *ctx,
-                            BLOCK_SIZE bsize, MB_MODE_INFO *const mbmi,
-                            PALETTE_MODE_INFO *const pmi,
-                            unsigned int *ref_costs_single,
-                            IntraModeSearchState *intra_search_state,
-                            int64_t best_rd);
-
-int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    int *rate, int *rate_tokenonly,
-                                    int64_t *distortion, int *skippable,
-                                    BLOCK_SIZE bsize, TX_SIZE max_tx_size);
-
+// Handles an intra-mode prediction when the current frame is an inter frame.
+// Its inter-mode counterpart is handle_inter_mode. This function does not
+// support palette mode prediction, which uses on av1_search_palette_mode.
+// The current mode being searched is defined by x->e_mbd.mi[0]->mode.
 int64_t av1_handle_intra_mode(IntraModeSearchState *intra_search_state,
                               const AV1_COMP *cpi, MACROBLOCK *x,
                               BLOCK_SIZE bsize, int ref_frame_cost,
@@ -55,9 +55,46 @@
                               RD_STATS *rd_stats_uv, int64_t best_rd,
                               int64_t *best_intra_rd, int8_t best_mbmode_skip);
 
+// Handles palette-mode search when the current frame is an inter frame. In
+// addition to searching palette-mode in the luma channel, this always searches
+// over all possible modes for the chroma channel by calling
+// av1_rd_pick_intra_sbuv_mode.
+int av1_search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x,
+                            RD_STATS *this_rd_cost, PICK_MODE_CONTEXT *ctx,
+                            BLOCK_SIZE bsize, MB_MODE_INFO *const mbmi,
+                            PALETTE_MODE_INFO *const pmi,
+                            unsigned int *ref_costs_single,
+                            IntraModeSearchState *intra_search_state,
+                            int64_t best_rd);
+
+// Performs intra-mode search on the luma channel when the current frame is
+// intra-only. This function does not search intra-bc mode, but it does search
+// palette and filter_intra.
 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    int *rate, int *rate_tokenonly,
                                    int64_t *distortion, int *skippable,
                                    BLOCK_SIZE bsize, int64_t best_rd,
                                    PICK_MODE_CONTEXT *ctx);
+
+// Performs intra-mode search on the chroma channels. Just like its luma
+// counterpart, this function searches over palette mode as well (filter_intra
+// is not available for chroma channels). Unlike its luma chroma part, this
+// function is used by both inter and intra frames.
+int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                    int *rate, int *rate_tokenonly,
+                                    int64_t *distortion, int *skippable,
+                                    BLOCK_SIZE bsize, TX_SIZE max_tx_size);
+
+// Returns the number of colors in 'src'. This is primarily used by palette
+// mode for screen content encoding.
+int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
+                     int *val_count);
+
+// Same as av1_count_colors(), but for high-bitdepth mode.
+int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
+                            int bit_depth, int *val_count);
+
+// Resets palette color map for chroma channels.
+void av1_restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x);
+
 #endif  // AOM_AV1_ENCODER_INTRA_MODE_SEARCH_H_
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index f67a77a..0493d13 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -1410,8 +1410,8 @@
   pd->dst.buf = dst_buf_base;
 }
 
-void av1_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
-                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
+void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
+                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mi = xd->mi[0];
diff --git a/av1/encoder/palette.h b/av1/encoder/palette.h
index 8b88c47..8f21b1d 100644
--- a/av1/encoder/palette.h
+++ b/av1/encoder/palette.h
@@ -88,7 +88,6 @@
 int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
                               uint16_t *color_cache, int n_cache,
                               int bit_depth);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ca098df..9d85c18 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2784,6 +2784,7 @@
   return rd_stats->rdcost;
 }
 
+// Searches intrabc mode in intraframe.
 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
                                        PICK_MODE_CONTEXT *ctx,
                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
@@ -2998,27 +2999,19 @@
   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
 
   if (intra_yrd < best_rd) {
-    // Only store reconstructed luma when there's chroma RDO. When there's no
-    // chroma RDO, the reconstructed luma will be stored in encode_superblock().
-    xd->cfl.store_y = store_cfl_required_rdo(cm, x);
-    if (xd->cfl.store_y) {
-      // Restore reconstructed luma values.
-      memcpy(txfm_info->blk_skip, ctx->blk_skip,
-             sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
-      av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
-      av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, DRY_RUN_NORMAL,
-                                   cpi->optimize_seg_arr[mbmi->segment_id]);
-      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
-      xd->cfl.store_y = 0;
-    }
+    // Search intra modes for uv planes if needed
     if (num_planes > 1) {
-      init_sbuv_mode(mbmi);
-      if (xd->is_chroma_ref) {
-        const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
-        av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
-                                    &dist_uv, &uv_skip_txfm, bsize,
-                                    max_uv_tx_size);
+      // Set up the tx variables for reproducing the y predictions in case we
+      // need it for chroma-from-luma.
+      if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
+        memcpy(txfm_info->blk_skip, ctx->blk_skip,
+               sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
+        av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
       }
+      const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
+      av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+                                  &dist_uv, &uv_skip_txfm, bsize,
+                                  max_uv_tx_size);
     }
 
     // Intra block is always coded as non-skip
@@ -3719,7 +3712,7 @@
     IntraModeSearchState *intra_search_state) {
   intra_search_state->skip_intra_modes = 0;
   intra_search_state->best_intra_mode = DC_PRED;
-  intra_search_state->angle_stats_ready = 0;
+  intra_search_state->dir_mode_skip_mask_ready = 0;
   av1_zero(intra_search_state->directional_mode_skip_mask);
   intra_search_state->rate_uv_intra = INT_MAX;
   av1_zero(intra_search_state->pmi_uv);
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 3d3aaeb..d962042 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -35,34 +35,11 @@
 struct macroblock;
 struct RD_STATS;
 
-// Returns the number of colors in 'src'.
-int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
-                     int *val_count);
-// Same as av1_count_colors(), but for high-bitdepth mode.
-int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
-                            int bit_depth, int *val_count);
-
-static INLINE int av1_cost_skip_txb(const CoeffCosts *coeff_costs,
-                                    const TXB_CTX *const txb_ctx, int plane,
-                                    TX_SIZE tx_size) {
-  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
-  const PLANE_TYPE plane_type = get_plane_type(plane);
-  const LV_MAP_COEFF_COST *const coeff_costs_ =
-      &coeff_costs->coeff_costs[txs_ctx][plane_type];
-  return coeff_costs_->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
-}
-
+// Top level function for intra mode selection during intra-only frame encoding.
 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
                                PICK_MODE_CONTEXT *ctx, int64_t best_rd);
 
-unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                           const struct buf_2d *ref,
-                                           BLOCK_SIZE bs);
-unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                                const struct buf_2d *ref,
-                                                BLOCK_SIZE bs, int bd);
-
 /*!\brief AV1 inter mode selection.
  *
  * \ingroup inter_mode_search
@@ -95,9 +72,11 @@
                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
                                int64_t best_rd_so_far);
 
-void av1_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
-                         BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
+// Non-rd version of intra mode selection used in av1 real time mode.
+void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
+                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
 
+// Non-rd version of inter mode selection used in av1 real time mode.
 void av1_nonrd_pick_inter_mode_sb(struct AV1_COMP *cpi,
                                   struct TileDataEnc *tile_data,
                                   struct macroblock *x,
@@ -109,6 +88,8 @@
     struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost,
     BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
 
+// TODO(any): The defs below could potentially be moved to rdopt_utils.h instead
+// because they are not the main rdopt functions.
 /*!\cond */
 // The best edge strength seen in the block, as well as the best x and y
 // components of edge strength seen.
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index d2d668a..73f1496 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -664,6 +664,14 @@
       AOMMIN(x->winner_mode_count + 1, max_winner_mode_count);
 }
 
+unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
+                                           const struct buf_2d *ref,
+                                           BLOCK_SIZE bs);
+
+unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
+                                                const struct buf_2d *ref,
+                                                BLOCK_SIZE bs, int bd);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 98e06d3..54f7a8c 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -3449,6 +3449,7 @@
 }
 
 // Calculate the transform coefficient RD cost for the given chroma coding block
+// If the current mode is intra, then this function will compute the predictor.
 // Return value 0: early termination triggered, no valid rd cost available;
 //              1: rd cost values are valid.
 int av1_txfm_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,