~0.1% - 0.3% speedup: inline static functions

Speedup is linearly correlated with --cpu-used and bitrate; higher
values of --cpu-used result in higher speedups, as does encoding at
higher bitrates. Speedup measured via instruction counting.

Change-Id: I97d8e0d0780456845ddf0afd2e5ee6674f877aec
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index f63ac98..9eed108 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -50,8 +50,9 @@
   av1_copy(cm->fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index]);
 }
 
-static void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr, int num_cdfs,
-                                     int cdf_stride, int nsymbs) {
+static AOM_INLINE void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr,
+                                                int num_cdfs, int cdf_stride,
+                                                int nsymbs) {
   for (int i = 0; i < num_cdfs; i++) {
     cdf_ptr[i * cdf_stride + nsymbs] = 0;
   }
@@ -68,7 +69,7 @@
     reset_cdf_symbol_counter(cdf_ptr, num_cdfs, cdf_stride, nsymbs); \
   } while (0)
 
-static void reset_nmv_counter(nmv_context *nmv) {
+static AOM_INLINE void reset_nmv_counter(nmv_context *nmv) {
   RESET_CDF_COUNTER(nmv->joints_cdf, 4);
   for (int i = 0; i < 2; i++) {
     RESET_CDF_COUNTER(nmv->comps[i].classes_cdf, MV_CLASSES);
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index d91c5af..8055d10 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -23,7 +23,7 @@
 
 // TODO(jingning): Consider the use of lookup table for (num / den)
 // altogether.
-static void get_mv_projection(MV *output, MV ref, int num, int den) {
+static AOM_INLINE void get_mv_projection(MV *output, MV ref, int num, int den) {
   den = AOMMIN(den, MAX_FRAME_DISTANCE);
   num = num > 0 ? AOMMIN(num, MAX_FRAME_DISTANCE)
                 : AOMMAX(num, -MAX_FRAME_DISTANCE);
@@ -71,7 +71,7 @@
   }
 }
 
-static void add_ref_mv_candidate(
+static AOM_INLINE void add_ref_mv_candidate(
     const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2],
     uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count,
     CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight,
@@ -140,13 +140,12 @@
   }
 }
 
-static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                          int mi_row, int mi_col,
-                          const MV_REFERENCE_FRAME rf[2], int row_offset,
-                          CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight,
-                          uint8_t *refmv_count, uint8_t *ref_match_count,
-                          uint8_t *newmv_count, int_mv *gm_mv_candidates,
-                          int max_row_offset, int *processed_rows) {
+static AOM_INLINE void scan_row_mbmi(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col,
+    const MV_REFERENCE_FRAME rf[2], int row_offset, CANDIDATE_MV *ref_mv_stack,
+    uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count,
+    uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_row_offset,
+    int *processed_rows) {
   int end_mi = AOMMIN(xd->n4_w, cm->mi_cols - mi_col);
   end_mi = AOMMIN(end_mi, mi_size_wide[BLOCK_64X64]);
   const int n8_w_8 = mi_size_wide[BLOCK_8X8];
@@ -191,13 +190,12 @@
   }
 }
 
-static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                          int mi_row, int mi_col,
-                          const MV_REFERENCE_FRAME rf[2], int col_offset,
-                          CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight,
-                          uint8_t *refmv_count, uint8_t *ref_match_count,
-                          uint8_t *newmv_count, int_mv *gm_mv_candidates,
-                          int max_col_offset, int *processed_cols) {
+static AOM_INLINE void scan_col_mbmi(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col,
+    const MV_REFERENCE_FRAME rf[2], int col_offset, CANDIDATE_MV *ref_mv_stack,
+    uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count,
+    uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_col_offset,
+    int *processed_cols) {
   int end_mi = AOMMIN(xd->n4_h, cm->mi_rows - mi_row);
   end_mi = AOMMIN(end_mi, mi_size_high[BLOCK_64X64]);
   const int n8_h_8 = mi_size_high[BLOCK_8X8];
@@ -241,13 +239,12 @@
   }
 }
 
-static void scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                          const int mi_row, const int mi_col,
-                          const MV_REFERENCE_FRAME rf[2], int row_offset,
-                          int col_offset, CANDIDATE_MV *ref_mv_stack,
-                          uint16_t *ref_mv_weight, uint8_t *ref_match_count,
-                          uint8_t *newmv_count, int_mv *gm_mv_candidates,
-                          uint8_t *refmv_count) {
+static AOM_INLINE void scan_blk_mbmi(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, const int mi_row,
+    const int mi_col, const MV_REFERENCE_FRAME rf[2], int row_offset,
+    int col_offset, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight,
+    uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates,
+    uint8_t *refmv_count) {
   const TileInfo *const tile = &xd->tile;
   POSITION mi_pos;
 
@@ -420,7 +417,7 @@
   return 1;
 }
 
-static void process_compound_ref_mv_candidate(
+static AOM_INLINE void process_compound_ref_mv_candidate(
     const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm,
     const MV_REFERENCE_FRAME *const rf, int_mv ref_id[2][2],
     int ref_id_count[2], int_mv ref_diff[2][2], int ref_diff_count[2]) {
@@ -445,7 +442,7 @@
   }
 }
 
-static void process_single_ref_mv_candidate(
+static AOM_INLINE void process_single_ref_mv_candidate(
     const MB_MODE_INFO *const candidate, const AV1_COMMON *const cm,
     MV_REFERENCE_FRAME ref_frame, uint8_t *const refmv_count,
     CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE],
@@ -476,14 +473,13 @@
   }
 }
 
-static void setup_ref_mv_list(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                              MV_REFERENCE_FRAME ref_frame,
-                              uint8_t *const refmv_count,
-                              CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE],
-                              uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE],
-                              int_mv mv_ref_list[MAX_MV_REF_CANDIDATES],
-                              int_mv *gm_mv_candidates, int mi_row, int mi_col,
-                              int16_t *mode_context) {
+static AOM_INLINE void setup_ref_mv_list(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame,
+    uint8_t *const refmv_count,
+    CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE],
+    uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE],
+    int_mv mv_ref_list[MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates,
+    int mi_row, int mi_col, int16_t *mode_context) {
   const int bs = AOMMAX(xd->n4_w, xd->n4_h);
   const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
   MV_REFERENCE_FRAME rf[2];
@@ -1339,8 +1335,8 @@
   return info_a->map_idx - info_b->map_idx;
 }
 
-static void set_ref_frame_info(int *remapped_ref_idx, int frame_idx,
-                               REF_FRAME_INFO *ref_info) {
+static AOM_INLINE void set_ref_frame_info(int *remapped_ref_idx, int frame_idx,
+                                          REF_FRAME_INFO *ref_info) {
   assert(frame_idx >= 0 && frame_idx < INTER_REFS_PER_FRAME);
 
   remapped_ref_idx[frame_idx] = ref_info->map_idx;
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index bd040cf..a5d1f34 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -118,7 +118,8 @@
   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
 };
 
-static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
+static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
+                                  int width) {
   if (shift >= 0) {
     memcpy(dst + shift, src, width - shift);
     memset(dst, src[0], shift);
@@ -276,10 +277,10 @@
   }
 }
 
-static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
-                             const CONV_BUF_TYPE *src0, int src0_stride,
-                             const CONV_BUF_TYPE *src1, int src1_stride, int h,
-                             int w, ConvolveParams *conv_params, int bd) {
+static AOM_INLINE void diffwtd_mask_d16(
+    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
+    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
+    ConvolveParams *conv_params, int bd) {
   int round =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
   int i, j, m, diff;
@@ -310,9 +311,10 @@
   }
 }
 
-static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
-                         const uint8_t *src0, int src0_stride,
-                         const uint8_t *src1, int src1_stride, int h, int w) {
+static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
+                                    int mask_base, const uint8_t *src0,
+                                    int src0_stride, const uint8_t *src1,
+                                    int src1_stride, int h, int w) {
   int i, j, m, diff;
   for (i = 0; i < h; ++i) {
     for (j = 0; j < w; ++j) {
@@ -420,7 +422,7 @@
   }
 }
 
-static void init_wedge_master_masks() {
+static AOM_INLINE void init_wedge_master_masks() {
   int i, j;
   const int w = MASK_MASTER_SIZE;
   const int h = MASK_MASTER_SIZE;
@@ -485,7 +487,7 @@
 // If the signs for the wedges for various blocksizes are
 // inconsistent flip the sign flag. Do it only once for every
 // wedge codebook.
-static void init_wedge_signs() {
+static AOM_INLINE void init_wedge_signs() {
   BLOCK_SIZE sb_type;
   memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
   for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
@@ -519,7 +521,7 @@
 }
 #endif  // !USE_PRECOMPUTED_WEDGE_SIGN
 
-static void init_wedge_masks() {
+static AOM_INLINE void init_wedge_masks() {
   uint8_t *dst = wedge_mask_buf;
   BLOCK_SIZE bsize;
   memset(wedge_masks, 0, sizeof(wedge_masks));
@@ -558,7 +560,7 @@
   init_wedge_masks();
 }
 
-static void build_masked_compound_no_round(
+static AOM_INLINE void build_masked_compound_no_round(
     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
     const CONV_BUF_TYPE *src1, int src1_stride,
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
@@ -995,9 +997,9 @@
 };
 /* clang-format on */
 
-static void build_smooth_interintra_mask(uint8_t *mask, int stride,
-                                         BLOCK_SIZE plane_bsize,
-                                         INTERINTRA_MODE mode) {
+static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
+                                                    BLOCK_SIZE plane_bsize,
+                                                    INTERINTRA_MODE mode) {
   int i, j;
   const int bw = block_size_wide[plane_bsize];
   const int bh = block_size_high[plane_bsize];
@@ -1036,13 +1038,11 @@
   }
 }
 
-static void combine_interintra(INTERINTRA_MODE mode,
-                               int8_t use_wedge_interintra, int8_t wedge_index,
-                               int8_t wedge_sign, BLOCK_SIZE bsize,
-                               BLOCK_SIZE plane_bsize, uint8_t *comppred,
-                               int compstride, const uint8_t *interpred,
-                               int interstride, const uint8_t *intrapred,
-                               int intrastride) {
+static AOM_INLINE void combine_interintra(
+    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
+    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+    uint8_t *comppred, int compstride, const uint8_t *interpred,
+    int interstride, const uint8_t *intrapred, int intrastride) {
   const int bw = block_size_wide[plane_bsize];
   const int bh = block_size_high[plane_bsize];
 
@@ -1065,7 +1065,7 @@
                      interstride, mask, bw, bw, bh, 0, 0);
 }
 
-static void combine_interintra_highbd(
+static AOM_INLINE void combine_interintra_highbd(
     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index f355ca9..017740d 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -88,9 +88,9 @@
 }
 
 // Use only_chroma = 1 to only set the chroma planes
-static void set_planes_to_neutral_grey(const SequenceHeader *const seq_params,
-                                       const YV12_BUFFER_CONFIG *const buf,
-                                       int only_chroma) {
+static AOM_INLINE void set_planes_to_neutral_grey(
+    const SequenceHeader *const seq_params, const YV12_BUFFER_CONFIG *const buf,
+    int only_chroma) {
   if (seq_params->use_highbitdepth) {
     const int val = 1 << (seq_params->bit_depth - 1);
     for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) {
@@ -117,12 +117,11 @@
   }
 }
 
-static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
-                                            MACROBLOCKD *xd,
-                                            aom_reader *const r, int plane,
-                                            int runit_idx);
+static AOM_INLINE void loop_restoration_read_sb_coeffs(
+    const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+    int runit_idx);
 
-static void setup_compound_reference_mode(AV1_COMMON *cm) {
+static AOM_INLINE void setup_compound_reference_mode(AV1_COMMON *cm) {
   cm->comp_fwd_ref[0] = LAST_FRAME;
   cm->comp_fwd_ref[1] = LAST2_FRAME;
   cm->comp_fwd_ref[2] = LAST3_FRAME;
@@ -151,10 +150,11 @@
   }
 }
 
-static void inverse_transform_block(MACROBLOCKD *xd, int plane,
-                                    const TX_TYPE tx_type,
-                                    const TX_SIZE tx_size, uint8_t *dst,
-                                    int stride, int reduced_tx_set) {
+static AOM_INLINE void inverse_transform_block(MACROBLOCKD *xd, int plane,
+                                               const TX_TYPE tx_type,
+                                               const TX_SIZE tx_size,
+                                               uint8_t *dst, int stride,
+                                               int reduced_tx_set) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *const dqcoeff = pd->dqcoeff_block + xd->cb_offset[plane];
   eob_info *eob_data = pd->eob_data + xd->txb_offset[plane];
@@ -165,11 +165,9 @@
   memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0]));
 }
 
-static void read_coeffs_tx_intra_block(const AV1_COMMON *const cm,
-                                       MACROBLOCKD *const xd,
-                                       aom_reader *const r, const int plane,
-                                       const int row, const int col,
-                                       const TX_SIZE tx_size) {
+static AOM_INLINE void read_coeffs_tx_intra_block(
+    const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_reader *const r,
+    const int plane, const int row, const int col, const TX_SIZE tx_size) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   if (!mbmi->skip) {
 #if TXCOEFF_TIMER
@@ -186,10 +184,11 @@
   }
 }
 
-static void decode_block_void(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
-                              aom_reader *const r, const int plane,
-                              const int row, const int col,
-                              const TX_SIZE tx_size) {
+static AOM_INLINE void decode_block_void(const AV1_COMMON *const cm,
+                                         MACROBLOCKD *const xd,
+                                         aom_reader *const r, const int plane,
+                                         const int row, const int col,
+                                         const TX_SIZE tx_size) {
   (void)cm;
   (void)xd;
   (void)r;
@@ -199,9 +198,10 @@
   (void)tx_size;
 }
 
-static void predict_inter_block_void(AV1_COMMON *const cm,
-                                     MACROBLOCKD *const xd, int mi_row,
-                                     int mi_col, BLOCK_SIZE bsize) {
+static AOM_INLINE void predict_inter_block_void(AV1_COMMON *const cm,
+                                                MACROBLOCKD *const xd,
+                                                int mi_row, int mi_col,
+                                                BLOCK_SIZE bsize) {
   (void)cm;
   (void)xd;
   (void)mi_row;
@@ -209,13 +209,13 @@
   (void)bsize;
 }
 
-static void cfl_store_inter_block_void(AV1_COMMON *const cm,
-                                       MACROBLOCKD *const xd) {
+static AOM_INLINE void cfl_store_inter_block_void(AV1_COMMON *const cm,
+                                                  MACROBLOCKD *const xd) {
   (void)cm;
   (void)xd;
 }
 
-static void predict_and_reconstruct_intra_block(
+static AOM_INLINE void predict_and_reconstruct_intra_block(
     const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_reader *const r,
     const int plane, const int row, const int col, const TX_SIZE tx_size) {
   (void)r;
@@ -243,11 +243,10 @@
   }
 }
 
-static void inverse_transform_inter_block(const AV1_COMMON *const cm,
-                                          MACROBLOCKD *const xd,
-                                          aom_reader *const r, const int plane,
-                                          const int blk_row, const int blk_col,
-                                          const TX_SIZE tx_size) {
+static AOM_INLINE void inverse_transform_inter_block(
+    const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_reader *const r,
+    const int plane, const int blk_row, const int blk_col,
+    const TX_SIZE tx_size) {
   (void)r;
   PLANE_TYPE plane_type = get_plane_type(plane);
   const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -276,18 +275,17 @@
 #endif
 }
 
-static void set_cb_buffer_offsets(MACROBLOCKD *const xd, TX_SIZE tx_size,
-                                  int plane) {
+static AOM_INLINE void set_cb_buffer_offsets(MACROBLOCKD *const xd,
+                                             TX_SIZE tx_size, int plane) {
   xd->cb_offset[plane] += tx_size_wide[tx_size] * tx_size_high[tx_size];
   xd->txb_offset[plane] =
       xd->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
 }
 
-static void decode_reconstruct_tx(AV1_COMMON *cm, ThreadData *const td,
-                                  aom_reader *r, MB_MODE_INFO *const mbmi,
-                                  int plane, BLOCK_SIZE plane_bsize,
-                                  int blk_row, int blk_col, int block,
-                                  TX_SIZE tx_size, int *eob_total) {
+static AOM_INLINE void decode_reconstruct_tx(
+    AV1_COMMON *cm, ThreadData *const td, aom_reader *r,
+    MB_MODE_INFO *const mbmi, int plane, BLOCK_SIZE plane_bsize, int blk_row,
+    int blk_col, int block, TX_SIZE tx_size, int *eob_total) {
   MACROBLOCKD *const xd = &td->xd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const TX_SIZE plane_tx_size =
@@ -335,9 +333,9 @@
   }
 }
 
-static void set_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd,
-                        BLOCK_SIZE bsize, int mi_row, int mi_col, int bw,
-                        int bh, int x_mis, int y_mis) {
+static AOM_INLINE void set_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                   int bw, int bh, int x_mis, int y_mis) {
   const int num_planes = av1_num_planes(cm);
 
   const TileInfo *const tile = &xd->tile;
@@ -373,9 +371,11 @@
                        num_planes);
 }
 
-static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
-                              int mi_row, int mi_col, aom_reader *r,
-                              PARTITION_TYPE partition, BLOCK_SIZE bsize) {
+static AOM_INLINE void decode_mbmi_block(AV1Decoder *const pbi,
+                                         MACROBLOCKD *const xd, int mi_row,
+                                         int mi_col, aom_reader *r,
+                                         PARTITION_TYPE partition,
+                                         BLOCK_SIZE bsize) {
   AV1_COMMON *const cm = &pbi->common;
   const SequenceHeader *const seq_params = &cm->seq_params;
   const int bw = mi_size_wide[bsize];
@@ -407,9 +407,10 @@
   int y1;
 } PadBlock;
 
-static void highbd_build_mc_border(const uint8_t *src8, int src_stride,
-                                   uint8_t *dst8, int dst_stride, int x, int y,
-                                   int b_w, int b_h, int w, int h) {
+static AOM_INLINE void highbd_build_mc_border(const uint8_t *src8,
+                                              int src_stride, uint8_t *dst8,
+                                              int dst_stride, int x, int y,
+                                              int b_w, int b_h, int w, int h) {
   // Get a pointer to the start of the real data for this row.
   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
@@ -445,9 +446,9 @@
   } while (--b_h);
 }
 
-static void build_mc_border(const uint8_t *src, int src_stride, uint8_t *dst,
-                            int dst_stride, int x, int y, int b_w, int b_h,
-                            int w, int h) {
+static AOM_INLINE void build_mc_border(const uint8_t *src, int src_stride,
+                                       uint8_t *dst, int dst_stride, int x,
+                                       int y, int b_w, int b_h, int w, int h) {
   // Get a pointer to the start of the real data for this row.
   const uint8_t *ref_row = src - x - y * src_stride;
 
@@ -834,11 +835,9 @@
   }
 }
 
-static void dec_build_inter_predictors_for_planes(const AV1_COMMON *cm,
-                                                  MACROBLOCKD *xd,
-                                                  BLOCK_SIZE bsize, int mi_row,
-                                                  int mi_col, int plane_from,
-                                                  int plane_to) {
+static AOM_INLINE void dec_build_inter_predictors_for_planes(
+    const AV1_COMMON *cm, MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row,
+    int mi_col, int plane_from, int plane_to) {
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
@@ -855,10 +854,11 @@
   }
 }
 
-static void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
-                                           MACROBLOCKD *xd, int mi_row,
-                                           int mi_col, const BUFFER_SET *ctx,
-                                           BLOCK_SIZE bsize) {
+static AOM_INLINE void dec_build_inter_predictors_sby(const AV1_COMMON *cm,
+                                                      MACROBLOCKD *xd,
+                                                      int mi_row, int mi_col,
+                                                      const BUFFER_SET *ctx,
+                                                      BLOCK_SIZE bsize) {
   dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
 
   if (is_interintra_pred(xd->mi[0])) {
@@ -870,10 +870,11 @@
   }
 }
 
-static void dec_build_inter_predictors_sbuv(const AV1_COMMON *cm,
-                                            MACROBLOCKD *xd, int mi_row,
-                                            int mi_col, const BUFFER_SET *ctx,
-                                            BLOCK_SIZE bsize) {
+static AOM_INLINE void dec_build_inter_predictors_sbuv(const AV1_COMMON *cm,
+                                                       MACROBLOCKD *xd,
+                                                       int mi_row, int mi_col,
+                                                       const BUFFER_SET *ctx,
+                                                       BLOCK_SIZE bsize) {
   dec_build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
                                         MAX_MB_PLANE - 1);
 
@@ -889,9 +890,11 @@
   }
 }
 
-static void dec_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                          int mi_row, int mi_col,
-                                          BUFFER_SET *ctx, BLOCK_SIZE bsize) {
+static AOM_INLINE void dec_build_inter_predictors_sb(const AV1_COMMON *cm,
+                                                     MACROBLOCKD *xd,
+                                                     int mi_row, int mi_col,
+                                                     BUFFER_SET *ctx,
+                                                     BLOCK_SIZE bsize) {
   const int num_planes = av1_num_planes(cm);
   dec_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
   if (num_planes > 1)
@@ -925,7 +928,7 @@
   }
 }
 
-static void dec_build_prediction_by_above_preds(
+static AOM_INLINE void dec_build_prediction_by_above_preds(
     const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
     uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE],
     int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) {
@@ -978,7 +981,7 @@
   }
 }
 
-static void dec_build_prediction_by_left_preds(
+static AOM_INLINE void dec_build_prediction_by_left_preds(
     const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
     uint8_t *tmp_buf[MAX_MB_PLANE], int tmp_width[MAX_MB_PLANE],
     int tmp_height[MAX_MB_PLANE], int tmp_stride[MAX_MB_PLANE]) {
@@ -1005,9 +1008,10 @@
   xd->mb_to_bottom_edge = ctxt.mb_to_far_edge;
 }
 
-static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm,
-                                               MACROBLOCKD *xd, int mi_row,
-                                               int mi_col) {
+static AOM_INLINE void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm,
+                                                          MACROBLOCKD *xd,
+                                                          int mi_row,
+                                                          int mi_col) {
   const int num_planes = av1_num_planes(cm);
   uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
   int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
@@ -1047,15 +1051,17 @@
                                   dst_buf2, dst_stride2);
 }
 
-static void cfl_store_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd) {
+static AOM_INLINE void cfl_store_inter_block(AV1_COMMON *const cm,
+                                             MACROBLOCKD *const xd) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   if (store_cfl_required(cm, xd)) {
     cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
   }
 }
 
-static void predict_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd,
-                                int mi_row, int mi_col, BLOCK_SIZE bsize) {
+static AOM_INLINE void predict_inter_block(AV1_COMMON *const cm,
+                                           MACROBLOCKD *const xd, int mi_row,
+                                           int mi_col, BLOCK_SIZE bsize) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   const int num_planes = av1_num_planes(cm);
   for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
@@ -1096,8 +1102,8 @@
 #endif
 }
 
-static void set_color_index_map_offset(MACROBLOCKD *const xd, int plane,
-                                       aom_reader *r) {
+static AOM_INLINE void set_color_index_map_offset(MACROBLOCKD *const xd,
+                                                  int plane, aom_reader *r) {
   (void)r;
   Av1ColorMapParam params;
   const MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -1106,10 +1112,11 @@
   xd->color_index_map_offset[plane] += params.plane_width * params.plane_height;
 }
 
-static void decode_token_recon_block(AV1Decoder *const pbi,
-                                     ThreadData *const td, int mi_row,
-                                     int mi_col, aom_reader *r,
-                                     BLOCK_SIZE bsize) {
+static AOM_INLINE void decode_token_recon_block(AV1Decoder *const pbi,
+                                                ThreadData *const td,
+                                                int mi_row, int mi_col,
+                                                aom_reader *r,
+                                                BLOCK_SIZE bsize) {
   AV1_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &td->xd;
   const int num_planes = av1_num_planes(cm);
@@ -1234,10 +1241,10 @@
                     set_color_index_map_offset);
 }
 
-static void set_inter_tx_size(MB_MODE_INFO *mbmi, int stride_log2,
-                              int tx_w_log2, int tx_h_log2, int min_txs,
-                              int split_size, int txs, int blk_row,
-                              int blk_col) {
+static AOM_INLINE void set_inter_tx_size(MB_MODE_INFO *mbmi, int stride_log2,
+                                         int tx_w_log2, int tx_h_log2,
+                                         int min_txs, int split_size, int txs,
+                                         int blk_row, int blk_col) {
   for (int idy = 0; idy < tx_size_high_unit[split_size];
        idy += tx_size_high_unit[min_txs]) {
     for (int idx = 0; idx < tx_size_wide_unit[split_size];
@@ -1249,13 +1256,14 @@
   }
 }
 
-static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
-                               TX_SIZE tx_size, int depth,
+static AOM_INLINE void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
+                                          TX_SIZE tx_size, int depth,
 #if CONFIG_LPF_MASK
-                               AV1_COMMON *cm, int mi_row, int mi_col,
-                               int store_bitmask,
+                                          AV1_COMMON *cm, int mi_row,
+                                          int mi_col, int store_bitmask,
 #endif
-                               int blk_row, int blk_col, aom_reader *r) {
+                                          int blk_row, int blk_col,
+                                          aom_reader *r) {
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   int is_split = 0;
   const BLOCK_SIZE bsize = mbmi->sb_type;
@@ -1373,9 +1381,11 @@
   }
 }
 
-static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td,
-                               int mi_row, int mi_col, aom_reader *r,
-                               PARTITION_TYPE partition, BLOCK_SIZE bsize) {
+static AOM_INLINE void parse_decode_block(AV1Decoder *const pbi,
+                                          ThreadData *const td, int mi_row,
+                                          int mi_col, aom_reader *r,
+                                          PARTITION_TYPE partition,
+                                          BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &td->xd;
   decode_mbmi_block(pbi, xd, mi_row, mi_col, r, partition, bsize);
 
@@ -1459,9 +1469,10 @@
   decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize);
 }
 
-static void set_offsets_for_pred_and_recon(AV1Decoder *const pbi,
-                                           ThreadData *const td, int mi_row,
-                                           int mi_col, BLOCK_SIZE bsize) {
+static AOM_INLINE void set_offsets_for_pred_and_recon(AV1Decoder *const pbi,
+                                                      ThreadData *const td,
+                                                      int mi_row, int mi_col,
+                                                      BLOCK_SIZE bsize) {
   AV1_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &td->xd;
   const int bw = mi_size_wide[bsize];
@@ -1485,9 +1496,10 @@
                        num_planes);
 }
 
-static void decode_block(AV1Decoder *const pbi, ThreadData *const td,
-                         int mi_row, int mi_col, aom_reader *r,
-                         PARTITION_TYPE partition, BLOCK_SIZE bsize) {
+static AOM_INLINE void decode_block(AV1Decoder *const pbi, ThreadData *const td,
+                                    int mi_row, int mi_col, aom_reader *r,
+                                    PARTITION_TYPE partition,
+                                    BLOCK_SIZE bsize) {
   (void)partition;
   set_offsets_for_pred_and_recon(pbi, td, mi_row, mi_col, bsize);
   decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize);
@@ -1523,9 +1535,11 @@
 }
 
 // TODO(slavarnway): eliminate bsize and subsize in future commits
-static void decode_partition(AV1Decoder *const pbi, ThreadData *const td,
-                             int mi_row, int mi_col, aom_reader *reader,
-                             BLOCK_SIZE bsize, int parse_decode_flag) {
+static AOM_INLINE void decode_partition(AV1Decoder *const pbi,
+                                        ThreadData *const td, int mi_row,
+                                        int mi_col, aom_reader *reader,
+                                        BLOCK_SIZE bsize,
+                                        int parse_decode_flag) {
   assert(bsize < BLOCK_SIZES_ALL);
   AV1_COMMON *const cm = &pbi->common;
   MACROBLOCKD *const xd = &td->xd;
@@ -1657,10 +1671,10 @@
     update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
 }
 
-static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end,
-                               const size_t read_size,
-                               struct aom_internal_error_info *error_info,
-                               aom_reader *r, uint8_t allow_update_cdf) {
+static AOM_INLINE void setup_bool_decoder(
+    const uint8_t *data, const uint8_t *data_end, const size_t read_size,
+    struct aom_internal_error_info *error_info, aom_reader *r,
+    uint8_t allow_update_cdf) {
   // Validate the calculated partition length. If the buffer
   // described by the partition can't be fully read, then restrict
   // it to the portion that can be (for EC mode) or throw an error.
@@ -1675,8 +1689,8 @@
   r->allow_update_cdf = allow_update_cdf;
 }
 
-static void setup_segmentation(AV1_COMMON *const cm,
-                               struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_segmentation(AV1_COMMON *const cm,
+                                          struct aom_read_bit_buffer *rb) {
   struct segmentation *const seg = &cm->seg;
 
   seg->update_map = 0;
@@ -1748,8 +1762,8 @@
   segfeatures_copy(&cm->cur_frame->seg, seg);
 }
 
-static void decode_restoration_mode(AV1_COMMON *cm,
-                                    struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void decode_restoration_mode(AV1_COMMON *cm,
+                                               struct aom_read_bit_buffer *rb) {
   assert(!cm->all_lossless);
   const int num_planes = av1_num_planes(cm);
   if (cm->allow_intrabc) return;
@@ -1804,8 +1818,10 @@
   }
 }
 
-static void read_wiener_filter(int wiener_win, WienerInfo *wiener_info,
-                               WienerInfo *ref_wiener_info, aom_reader *rb) {
+static AOM_INLINE void read_wiener_filter(int wiener_win,
+                                          WienerInfo *wiener_info,
+                                          WienerInfo *ref_wiener_info,
+                                          aom_reader *rb) {
   memset(wiener_info->vfilter, 0, sizeof(wiener_info->vfilter));
   memset(wiener_info->hfilter, 0, sizeof(wiener_info->hfilter));
 
@@ -1863,8 +1879,9 @@
   memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
 }
 
-static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
-                                SgrprojInfo *ref_sgrproj_info, aom_reader *rb) {
+static AOM_INLINE void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
+                                           SgrprojInfo *ref_sgrproj_info,
+                                           aom_reader *rb) {
   sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
   const sgr_params_type *params = &av1_sgr_params[sgrproj_info->ep];
 
@@ -1899,10 +1916,9 @@
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
 
-static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
-                                            MACROBLOCKD *xd,
-                                            aom_reader *const r, int plane,
-                                            int runit_idx) {
+static AOM_INLINE void loop_restoration_read_sb_coeffs(
+    const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+    int runit_idx) {
   const RestorationInfo *rsi = &cm->rst_info[plane];
   RestorationUnitInfo *rui = &rsi->unit_info[runit_idx];
   if (rsi->frame_restoration_type == RESTORE_NONE) return;
@@ -1943,7 +1959,8 @@
   }
 }
 
-static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_loopfilter(AV1_COMMON *cm,
+                                        struct aom_read_bit_buffer *rb) {
   const int num_planes = av1_num_planes(cm);
   struct loopfilter *lf = &cm->lf;
   if (cm->allow_intrabc || cm->coded_lossless) {
@@ -1994,7 +2011,8 @@
   memcpy(cm->cur_frame->mode_deltas, lf->mode_deltas, MAX_MODE_LF_DELTAS);
 }
 
-static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_cdef(AV1_COMMON *cm,
+                                  struct aom_read_bit_buffer *rb) {
   const int num_planes = av1_num_planes(cm);
   CdefInfo *const cdef_info = &cm->cdef_info;
 
@@ -2013,8 +2031,8 @@
   return aom_rb_read_bit(rb) ? aom_rb_read_inv_signed_literal(rb, 6) : 0;
 }
 
-static void setup_quantization(AV1_COMMON *const cm,
-                               struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_quantization(AV1_COMMON *const cm,
+                                          struct aom_read_bit_buffer *rb) {
   const SequenceHeader *const seq_params = &cm->seq_params;
   const int num_planes = av1_num_planes(cm);
   cm->base_qindex = aom_rb_read_literal(rb, QINDEX_BITS);
@@ -2053,8 +2071,8 @@
 }
 
 // Build y/uv dequant values based on segmentation.
-static void setup_segmentation_dequant(AV1_COMMON *const cm,
-                                       MACROBLOCKD *const xd) {
+static AOM_INLINE void setup_segmentation_dequant(AV1_COMMON *const cm,
+                                                  MACROBLOCKD *const xd) {
   const int bit_depth = cm->seq_params.bit_depth;
   const int using_qm = cm->using_qmatrix;
   // When segmentation is disabled, only the first value is used.  The
@@ -2096,7 +2114,8 @@
                              : aom_rb_read_literal(rb, LOG_SWITCHABLE_FILTERS);
 }
 
-static void setup_render_size(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_render_size(AV1_COMMON *cm,
+                                         struct aom_read_bit_buffer *rb) {
   cm->render_width = cm->superres_upscaled_width;
   cm->render_height = cm->superres_upscaled_height;
   if (aom_rb_read_bit(rb))
@@ -2104,8 +2123,9 @@
 }
 
 // TODO(afergs): make "struct aom_read_bit_buffer *const rb"?
-static void setup_superres(AV1_COMMON *const cm, struct aom_read_bit_buffer *rb,
-                           int *width, int *height) {
+static AOM_INLINE void setup_superres(AV1_COMMON *const cm,
+                                      struct aom_read_bit_buffer *rb,
+                                      int *width, int *height) {
   cm->superres_upscaled_width = *width;
   cm->superres_upscaled_height = *height;
 
@@ -2126,7 +2146,8 @@
   }
 }
 
-static void resize_context_buffers(AV1_COMMON *cm, int width, int height) {
+static AOM_INLINE void resize_context_buffers(AV1_COMMON *cm, int width,
+                                              int height) {
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
     aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
@@ -2164,7 +2185,7 @@
   cm->cur_frame->height = cm->height;
 }
 
-static void setup_buffer_pool(AV1_COMMON *cm) {
+static AOM_INLINE void setup_buffer_pool(AV1_COMMON *cm) {
   BufferPool *const pool = cm->buffer_pool;
   const SequenceHeader *const seq_params = &cm->seq_params;
 
@@ -2193,8 +2214,9 @@
   cm->cur_frame->buf.render_height = cm->render_height;
 }
 
-static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag,
-                             struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_frame_size(AV1_COMMON *cm,
+                                        int frame_size_override_flag,
+                                        struct aom_read_bit_buffer *rb) {
   const SequenceHeader *const seq_params = &cm->seq_params;
   int width, height;
 
@@ -2218,8 +2240,8 @@
   setup_buffer_pool(cm);
 }
 
-static void setup_sb_size(SequenceHeader *seq_params,
-                          struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_sb_size(SequenceHeader *seq_params,
+                                     struct aom_read_bit_buffer *rb) {
   set_sb_size(seq_params, aom_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
 }
 
@@ -2231,8 +2253,8 @@
          ref_yss == this_yss;
 }
 
-static void setup_frame_size_with_refs(AV1_COMMON *cm,
-                                       struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void setup_frame_size_with_refs(
+    AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
   int width, height;
   int found = 0;
   int has_valid_ref_frame = 0;
@@ -2311,8 +2333,8 @@
     return (v << 1) - m + aom_rb_read_bit(rb);
 }
 
-static void read_tile_info_max_tile(AV1_COMMON *const cm,
-                                    struct aom_read_bit_buffer *const rb) {
+static AOM_INLINE void read_tile_info_max_tile(
+    AV1_COMMON *const cm, struct aom_read_bit_buffer *const rb) {
   int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
   int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
   int width_sb = width_mi >> cm->seq_params.mib_size_log2;
@@ -2392,8 +2414,8 @@
   }
 }
 
-static void read_tile_info(AV1Decoder *const pbi,
-                           struct aom_read_bit_buffer *const rb) {
+static AOM_INLINE void read_tile_info(AV1Decoder *const pbi,
+                                      struct aom_read_bit_buffer *const rb) {
   AV1_COMMON *const cm = &pbi->common;
 
   read_tile_info_max_tile(cm, rb);
@@ -2413,8 +2435,8 @@
 }
 
 #if EXT_TILE_DEBUG
-static void read_ext_tile_info(AV1Decoder *const pbi,
-                               struct aom_read_bit_buffer *const rb) {
+static AOM_INLINE void read_ext_tile_info(
+    AV1Decoder *const pbi, struct aom_read_bit_buffer *const rb) {
   AV1_COMMON *const cm = &pbi->common;
 
   // This information is stored as a separate byte.
@@ -2444,7 +2466,7 @@
 // Reads the next tile returning its size and adjusting '*data' accordingly
 // based on 'is_last'. On return, '*data' is updated to point to the end of the
 // raw tile buffer in the bit stream.
-static void get_ls_tile_buffer(
+static AOM_INLINE void get_ls_tile_buffer(
     const uint8_t *const data_end, struct aom_internal_error_info *error_info,
     const uint8_t **data, TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
     int tile_size_bytes, int col, int row, int tile_copy_mode) {
@@ -2591,10 +2613,10 @@
 
 // Reads the next tile returning its size and adjusting '*data' accordingly
 // based on 'is_last'.
-static void get_tile_buffer(const uint8_t *const data_end,
-                            const int tile_size_bytes, int is_last,
-                            struct aom_internal_error_info *error_info,
-                            const uint8_t **data, TileBufferDec *const buf) {
+static AOM_INLINE void get_tile_buffer(
+    const uint8_t *const data_end, const int tile_size_bytes, int is_last,
+    struct aom_internal_error_info *error_info, const uint8_t **data,
+    TileBufferDec *const buf) {
   size_t size;
 
   if (!is_last) {
@@ -2618,10 +2640,10 @@
   *data += size;
 }
 
-static void get_tile_buffers(AV1Decoder *pbi, const uint8_t *data,
-                             const uint8_t *data_end,
-                             TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
-                             int start_tile, int end_tile) {
+static AOM_INLINE void get_tile_buffers(
+    AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end,
+    TileBufferDec (*const tile_buffers)[MAX_TILE_COLS], int start_tile,
+    int end_tile) {
   AV1_COMMON *const cm = &pbi->common;
   const int tile_cols = cm->tile_cols;
   const int tile_rows = cm->tile_rows;
@@ -2646,9 +2668,10 @@
   }
 }
 
-static void set_cb_buffer(AV1Decoder *pbi, MACROBLOCKD *const xd,
-                          CB_BUFFER *cb_buffer_base, const int num_planes,
-                          int mi_row, int mi_col) {
+static AOM_INLINE void set_cb_buffer(AV1Decoder *pbi, MACROBLOCKD *const xd,
+                                     CB_BUFFER *cb_buffer_base,
+                                     const int num_planes, int mi_row,
+                                     int mi_col) {
   AV1_COMMON *const cm = &pbi->common;
   int mib_size_log2 = cm->seq_params.mib_size_log2;
   int stride = (cm->mi_cols >> mib_size_log2) + 1;
@@ -2667,7 +2690,8 @@
   xd->color_index_map_offset[1] = 0;
 }
 
-static void decoder_alloc_tile_data(AV1Decoder *pbi, const int n_tiles) {
+static AOM_INLINE void decoder_alloc_tile_data(AV1Decoder *pbi,
+                                               const int n_tiles) {
   AV1_COMMON *const cm = &pbi->common;
   aom_free(pbi->tile_data);
   CHECK_MEM_ERROR(cm, pbi->tile_data,
@@ -2699,8 +2723,8 @@
 }
 
 // Allocate memory for decoder row synchronization
-static void dec_row_mt_alloc(AV1DecRowMTSync *dec_row_mt_sync, AV1_COMMON *cm,
-                             int rows) {
+static AOM_INLINE void dec_row_mt_alloc(AV1DecRowMTSync *dec_row_mt_sync,
+                                        AV1_COMMON *cm, int rows) {
   dec_row_mt_sync->allocated_sb_rows = rows;
 #if CONFIG_MULTITHREAD
   {
@@ -2808,8 +2832,9 @@
 #endif  // CONFIG_MULTITHREAD
 }
 
-static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
-                               TileInfo tile_info, const int mi_row) {
+static AOM_INLINE void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
+                                          TileInfo tile_info,
+                                          const int mi_row) {
   AV1_COMMON *const cm = &pbi->common;
   const int num_planes = av1_num_planes(cm);
   TileDataDec *const tile_data =
@@ -2858,7 +2883,8 @@
   return 0;
 }
 
-static void set_decode_func_pointers(ThreadData *td, int parse_decode_flag) {
+static AOM_INLINE void set_decode_func_pointers(ThreadData *td,
+                                                int parse_decode_flag) {
   td->read_coeffs_tx_intra_block_visit = decode_block_void;
   td->predict_and_recon_intra_block_visit = decode_block_void;
   td->read_coeffs_tx_inter_block_visit = decode_block_void;
@@ -2879,8 +2905,8 @@
   }
 }
 
-static void decode_tile(AV1Decoder *pbi, ThreadData *const td, int tile_row,
-                        int tile_col) {
+static AOM_INLINE void decode_tile(AV1Decoder *pbi, ThreadData *const td,
+                                   int tile_row, int tile_col) {
   TileInfo tile_info;
 
   AV1_COMMON *const cm = &pbi->common;
@@ -3074,11 +3100,10 @@
   return cur_job_info;
 }
 
-static void tile_worker_hook_init(AV1Decoder *const pbi,
-                                  DecWorkerData *const thread_data,
-                                  const TileBufferDec *const tile_buffer,
-                                  TileDataDec *const tile_data,
-                                  uint8_t allow_update_cdf) {
+static AOM_INLINE void tile_worker_hook_init(
+    AV1Decoder *const pbi, DecWorkerData *const thread_data,
+    const TileBufferDec *const tile_buffer, TileDataDec *const tile_data,
+    uint8_t allow_update_cdf) {
   AV1_COMMON *cm = &pbi->common;
   ThreadData *const td = thread_data->td;
   int tile_row = tile_data->tile_info.tile_row;
@@ -3311,8 +3336,8 @@
 
 // This function is very similar to decode_tile(). It would be good to figure
 // out how to share code.
-static void parse_tile_row_mt(AV1Decoder *pbi, ThreadData *const td,
-                              TileDataDec *const tile_data) {
+static AOM_INLINE void parse_tile_row_mt(AV1Decoder *pbi, ThreadData *const td,
+                                         TileDataDec *const tile_data) {
   AV1_COMMON *const cm = &pbi->common;
   const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
   const int num_planes = av1_num_planes(cm);
@@ -3479,10 +3504,10 @@
   return (((int)buf2->tile_buffer->size) - ((int)buf1->tile_buffer->size));
 }
 
-static void enqueue_tile_jobs(AV1Decoder *pbi, AV1_COMMON *cm,
-                              int tile_rows_start, int tile_rows_end,
-                              int tile_cols_start, int tile_cols_end,
-                              int start_tile, int end_tile) {
+static AOM_INLINE void enqueue_tile_jobs(AV1Decoder *pbi, AV1_COMMON *cm,
+                                         int tile_rows_start, int tile_rows_end,
+                                         int tile_cols_start, int tile_cols_end,
+                                         int start_tile, int end_tile) {
   AV1DecTileMT *tile_mt_info = &pbi->tile_mt_info;
   TileJobsDec *tile_job_queue = tile_mt_info->job_queue;
   tile_mt_info->jobs_enqueued = 0;
@@ -3501,8 +3526,9 @@
   }
 }
 
-static void alloc_dec_jobs(AV1DecTileMT *tile_mt_info, AV1_COMMON *cm,
-                           int tile_rows, int tile_cols) {
+static AOM_INLINE void alloc_dec_jobs(AV1DecTileMT *tile_mt_info,
+                                      AV1_COMMON *cm, int tile_rows,
+                                      int tile_cols) {
   tile_mt_info->alloc_tile_rows = tile_rows;
   tile_mt_info->alloc_tile_cols = tile_cols;
   int num_tiles = tile_rows * tile_cols;
@@ -3540,8 +3566,9 @@
   }
 }
 
-static void allocate_mc_tmp_buf(AV1_COMMON *const cm, ThreadData *thread_data,
-                                int buf_size, int use_highbd) {
+static AOM_INLINE void allocate_mc_tmp_buf(AV1_COMMON *const cm,
+                                           ThreadData *thread_data,
+                                           int buf_size, int use_highbd) {
   for (int ref = 0; ref < 2; ref++) {
     if (use_highbd) {
       uint16_t *hbd_mc_buf;
@@ -3566,8 +3593,9 @@
   }
 }
 
-static void reset_dec_workers(AV1Decoder *pbi, AVxWorkerHook worker_hook,
-                              int num_workers) {
+static AOM_INLINE void reset_dec_workers(AV1Decoder *pbi,
+                                         AVxWorkerHook worker_hook,
+                                         int num_workers) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
 
   // Reset tile decoding hook
@@ -3595,8 +3623,9 @@
 #endif
 }
 
-static void launch_dec_workers(AV1Decoder *pbi, const uint8_t *data_end,
-                               int num_workers) {
+static AOM_INLINE void launch_dec_workers(AV1Decoder *pbi,
+                                          const uint8_t *data_end,
+                                          int num_workers) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
 
   for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) {
@@ -3614,7 +3643,7 @@
   }
 }
 
-static void sync_dec_workers(AV1Decoder *pbi, int num_workers) {
+static AOM_INLINE void sync_dec_workers(AV1Decoder *pbi, int num_workers) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   int corrupted = 0;
 
@@ -3626,7 +3655,7 @@
   pbi->mb.corrupted = corrupted;
 }
 
-static void decode_mt_init(AV1Decoder *pbi) {
+static AOM_INLINE void decode_mt_init(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   int worker_idx;
@@ -3675,10 +3704,11 @@
   }
 }
 
-static void tile_mt_queue(AV1Decoder *pbi, int tile_cols, int tile_rows,
-                          int tile_rows_start, int tile_rows_end,
-                          int tile_cols_start, int tile_cols_end,
-                          int start_tile, int end_tile) {
+static AOM_INLINE void tile_mt_queue(AV1Decoder *pbi, int tile_cols,
+                                     int tile_rows, int tile_rows_start,
+                                     int tile_rows_end, int tile_cols_start,
+                                     int tile_cols_end, int start_tile,
+                                     int end_tile) {
   AV1_COMMON *const cm = &pbi->common;
   if (pbi->tile_mt_info.alloc_tile_cols != tile_cols ||
       pbi->tile_mt_info.alloc_tile_rows != tile_rows) {
@@ -3786,7 +3816,7 @@
   return aom_reader_find_end(&tile_data->bit_reader);
 }
 
-static void dec_alloc_cb_buf(AV1Decoder *pbi) {
+static AOM_INLINE void dec_alloc_cb_buf(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
   int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) *
              ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1);
@@ -3800,10 +3830,10 @@
   }
 }
 
-static void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start,
-                              int tile_rows_end, int tile_cols_start,
-                              int tile_cols_end, int start_tile, int end_tile,
-                              int max_sb_rows) {
+static AOM_INLINE void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start,
+                                         int tile_rows_end, int tile_cols_start,
+                                         int tile_cols_end, int start_tile,
+                                         int end_tile, int max_sb_rows) {
   AV1_COMMON *const cm = &pbi->common;
   AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info;
 
@@ -3988,7 +4018,7 @@
   return aom_reader_find_end(&tile_data->bit_reader);
 }
 
-static void error_handler(void *data) {
+static AOM_INLINE void error_handler(void *data) {
   AV1_COMMON *const cm = (AV1_COMMON *)data;
   aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Truncated packet");
 }
@@ -3997,9 +4027,9 @@
 // seq_params->bit_depth based on the values of those fields and
 // seq_params->profile. Reports errors by calling rb->error_handler() or
 // aom_internal_error().
-static void read_bitdepth(struct aom_read_bit_buffer *rb,
-                          SequenceHeader *seq_params,
-                          struct aom_internal_error_info *error_info) {
+static AOM_INLINE void read_bitdepth(
+    struct aom_read_bit_buffer *rb, SequenceHeader *seq_params,
+    struct aom_internal_error_info *error_info) {
   const int high_bitdepth = aom_rb_read_bit(rb);
   if (seq_params->profile == PROFILE_2 && high_bitdepth) {
     const int twelve_bit = aom_rb_read_bit(rb);
@@ -4183,7 +4213,8 @@
   pars->clip_to_restricted_range = aom_rb_read_bit(rb);
 }
 
-static void read_film_grain(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void read_film_grain(AV1_COMMON *cm,
+                                       struct aom_read_bit_buffer *rb) {
   if (cm->seq_params.film_grain_params_present &&
       (cm->show_frame || cm->showable_frame)) {
     av1_read_film_grain_params(cm, rb);
@@ -4326,8 +4357,8 @@
   cm->op_params[op_num].low_delay_mode_flag = aom_rb_read_bit(rb);
 }
 
-static void read_temporal_point_info(AV1_COMMON *const cm,
-                                     struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void read_temporal_point_info(
+    AV1_COMMON *const cm, struct aom_read_bit_buffer *rb) {
   cm->frame_presentation_time = aom_rb_read_unsigned_literal(
       rb, cm->buffer_model.frame_presentation_time_length);
 }
@@ -4488,7 +4519,8 @@
   return 1;
 }
 
-static void read_global_motion(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
+static AOM_INLINE void read_global_motion(AV1_COMMON *cm,
+                                          struct aom_read_bit_buffer *rb) {
   for (int frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
     const WarpedMotionParams *ref_params =
         cm->prev_frame ? &cm->prev_frame->global_motion[frame]
@@ -4531,7 +4563,7 @@
 
 // Release the references to the frame buffers in cm->ref_frame_map and reset
 // all elements of cm->ref_frame_map to NULL.
-static void reset_ref_frame_map(AV1_COMMON *const cm) {
+static AOM_INLINE void reset_ref_frame_map(AV1_COMMON *const cm) {
   BufferPool *const pool = cm->buffer_pool;
 
   for (int i = 0; i < REF_FRAMES; i++) {
@@ -4542,7 +4574,7 @@
 
 // If the refresh_frame_flags bitmask is set, update reference frame id values
 // and mark frames as valid for reference.
-static void update_ref_frame_id(AV1_COMMON *const cm, int frame_id) {
+static AOM_INLINE void update_ref_frame_id(AV1_COMMON *const cm, int frame_id) {
   int refresh_frame_flags = cm->current_frame.refresh_frame_flags;
   for (int i = 0; i < REF_FRAMES; i++) {
     if ((refresh_frame_flags >> i) & 1) {
@@ -4552,8 +4584,8 @@
   }
 }
 
-static void show_existing_frame_reset(AV1Decoder *const pbi,
-                                      int existing_frame_idx) {
+static AOM_INLINE void show_existing_frame_reset(AV1Decoder *const pbi,
+                                                 int existing_frame_idx) {
   AV1_COMMON *const cm = &pbi->common;
 
   assert(cm->show_existing_frame);
@@ -5242,7 +5274,7 @@
   return (BITSTREAM_PROFILE)profile;
 }
 
-static void superres_post_decode(AV1Decoder *pbi) {
+static AOM_INLINE void superres_post_decode(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
   BufferPool *const pool = cm->buffer_pool;
 
@@ -5331,7 +5363,7 @@
 }
 
 // Once-per-frame initialization
-static void setup_frame_info(AV1Decoder *pbi) {
+static AOM_INLINE void setup_frame_info(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
 
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 3f24901..f3ba4b8 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -60,25 +60,25 @@
   }
 }
 
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
-                                             MACROBLOCKD *xd,
-                                             const RestorationUnitInfo *rui,
-                                             aom_writer *const w, int plane,
-                                             FRAME_COUNTS *counts);
+static AOM_INLINE void loop_restoration_write_sb_coeffs(
+    const AV1_COMMON *const cm, MACROBLOCKD *xd, const RestorationUnitInfo *rui,
+    aom_writer *const w, int plane, FRAME_COUNTS *counts);
 
-static void write_intra_y_mode_kf(FRAME_CONTEXT *frame_ctx,
-                                  const MB_MODE_INFO *mi,
-                                  const MB_MODE_INFO *above_mi,
-                                  const MB_MODE_INFO *left_mi,
-                                  PREDICTION_MODE mode, aom_writer *w) {
+static AOM_INLINE void write_intra_y_mode_kf(FRAME_CONTEXT *frame_ctx,
+                                             const MB_MODE_INFO *mi,
+                                             const MB_MODE_INFO *above_mi,
+                                             const MB_MODE_INFO *left_mi,
+                                             PREDICTION_MODE mode,
+                                             aom_writer *w) {
   assert(!is_intrabc_block(mi));
   (void)mi;
   aom_write_symbol(w, mode, get_y_mode_cdf(frame_ctx, above_mi, left_mi),
                    INTRA_MODES);
 }
 
-static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
-                             FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) {
+static AOM_INLINE void write_inter_mode(aom_writer *w, PREDICTION_MODE mode,
+                                        FRAME_CONTEXT *ec_ctx,
+                                        const int16_t mode_ctx) {
   const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
 
   aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2);
@@ -95,9 +95,9 @@
   }
 }
 
-static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
-                          const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
-                          aom_writer *w) {
+static AOM_INLINE void write_drl_idx(
+    FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
+    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, aom_writer *w) {
   assert(mbmi->ref_mv_idx < 3);
 
   const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
@@ -130,18 +130,20 @@
   }
 }
 
-static void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w,
-                                      PREDICTION_MODE mode,
-                                      const int16_t mode_ctx) {
+static AOM_INLINE void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w,
+                                                 PREDICTION_MODE mode,
+                                                 const int16_t mode_ctx) {
   assert(is_inter_compound_mode(mode));
   aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode),
                    xd->tile_ctx->inter_compound_mode_cdf[mode_ctx],
                    INTER_COMPOUND_MODES);
 }
 
-static void write_tx_size_vartx(MACROBLOCKD *xd, const MB_MODE_INFO *mbmi,
-                                TX_SIZE tx_size, int depth, int blk_row,
-                                int blk_col, aom_writer *w) {
+static AOM_INLINE void write_tx_size_vartx(MACROBLOCKD *xd,
+                                           const MB_MODE_INFO *mbmi,
+                                           TX_SIZE tx_size, int depth,
+                                           int blk_row, int blk_col,
+                                           aom_writer *w) {
   FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
   const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
   const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
@@ -190,7 +192,8 @@
   }
 }
 
-static void write_selected_tx_size(const MACROBLOCKD *xd, aom_writer *w) {
+static AOM_INLINE void write_selected_tx_size(const MACROBLOCKD *xd,
+                                              aom_writer *w) {
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   const BLOCK_SIZE bsize = mbmi->sb_type;
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -248,8 +251,9 @@
   return skip_mode;
 }
 
-static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                           int segment_id, aom_writer *w, const int is_inter) {
+static AOM_INLINE void write_is_inter(const AV1_COMMON *cm,
+                                      const MACROBLOCKD *xd, int segment_id,
+                                      aom_writer *w, const int is_inter) {
   if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
     if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
       assert(is_inter);
@@ -261,8 +265,9 @@
   }
 }
 
-static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                              const MB_MODE_INFO *mbmi, aom_writer *w) {
+static AOM_INLINE void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         const MB_MODE_INFO *mbmi,
+                                         aom_writer *w) {
   MOTION_MODE last_motion_mode_allowed =
       cm->switchable_motion_mode
           ? motion_mode_allowed(cm->global_motion, xd, mbmi,
@@ -282,8 +287,8 @@
   }
 }
 
-static void write_delta_qindex(const MACROBLOCKD *xd, int delta_qindex,
-                               aom_writer *w) {
+static AOM_INLINE void write_delta_qindex(const MACROBLOCKD *xd,
+                                          int delta_qindex, aom_writer *w) {
   int sign = delta_qindex < 0;
   int abs = sign ? -delta_qindex : delta_qindex;
   int rem_bits, thr;
@@ -304,8 +309,9 @@
   }
 }
 
-static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                                int lf_id, int delta_lflevel, aom_writer *w) {
+static AOM_INLINE void write_delta_lflevel(const AV1_COMMON *cm,
+                                           const MACROBLOCKD *xd, int lf_id,
+                                           int delta_lflevel, aom_writer *w) {
   int sign = delta_lflevel < 0;
   int abs = sign ? -delta_lflevel : delta_lflevel;
   int rem_bits, thr;
@@ -333,8 +339,8 @@
   }
 }
 
-static void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
-                            int num) {
+static AOM_INLINE void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp,
+                                       int n, int num) {
   const TOKENEXTRA *p = *tp;
   write_uniform(w, n, p->token);  // The first color index.
   ++p;
@@ -346,13 +352,11 @@
   *tp = p;
 }
 
-static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x,
-                            const TOKENEXTRA **tp,
-                            const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
-                            MB_MODE_INFO *mbmi, int plane,
-                            BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth,
-                            int block, int blk_row, int blk_col,
-                            TX_SIZE tx_size, TOKEN_STATS *token_stats) {
+static AOM_INLINE void pack_txb_tokens(
+    aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x, const TOKENEXTRA **tp,
+    const TOKENEXTRA *const tok_end, MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
+    int plane, BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth, int block,
+    int blk_row, int blk_col, TX_SIZE tx_size, TOKEN_STATS *token_stats) {
   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
 
@@ -448,10 +452,12 @@
   }
 }
 
-static void write_segment_id(AV1_COMP *cpi, const MB_MODE_INFO *const mbmi,
-                             aom_writer *w, const struct segmentation *seg,
-                             struct segmentation_probs *segp, int mi_row,
-                             int mi_col, int skip) {
+static AOM_INLINE void write_segment_id(AV1_COMP *cpi,
+                                        const MB_MODE_INFO *const mbmi,
+                                        aom_writer *w,
+                                        const struct segmentation *seg,
+                                        struct segmentation_probs *segp,
+                                        int mi_row, int mi_col, int skip) {
   if (!seg->enabled || !seg->update_map) return;
 
   AV1_COMMON *const cm = &cpi->common;
@@ -486,8 +492,8 @@
   aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(xd), 2)
 
 // This function encodes the reference frame
-static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                             aom_writer *w) {
+static AOM_INLINE void write_ref_frames(const AV1_COMMON *cm,
+                                        const MACROBLOCKD *xd, aom_writer *w) {
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   const int is_compound = has_second_ref(mbmi);
   const int segment_id = mbmi->segment_id;
@@ -590,10 +596,9 @@
   }
 }
 
-static void write_filter_intra_mode_info(const AV1_COMMON *cm,
-                                         const MACROBLOCKD *xd,
-                                         const MB_MODE_INFO *const mbmi,
-                                         aom_writer *w) {
+static AOM_INLINE void write_filter_intra_mode_info(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
+    aom_writer *w) {
   if (av1_filter_intra_allowed(cm, mbmi)) {
     aom_write_symbol(w, mbmi->filter_intra_mode_info.use_filter_intra,
                      xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type], 2);
@@ -606,14 +611,15 @@
   }
 }
 
-static void write_angle_delta(aom_writer *w, int angle_delta,
-                              aom_cdf_prob *cdf) {
+static AOM_INLINE void write_angle_delta(aom_writer *w, int angle_delta,
+                                         aom_cdf_prob *cdf) {
   aom_write_symbol(w, angle_delta + MAX_ANGLE_DELTA, cdf,
                    2 * MAX_ANGLE_DELTA + 1);
 }
 
-static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd,
-                                   aom_writer *w) {
+static AOM_INLINE void write_mb_interp_filter(AV1_COMP *cpi,
+                                              const MACROBLOCKD *xd,
+                                              aom_writer *w) {
   AV1_COMMON *const cm = &cpi->common;
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -642,9 +648,9 @@
 // Transmit color values with delta encoding. Write the first value as
 // literal, and the deltas between each value and the previous one. "min_val" is
 // the smallest possible value of the deltas.
-static void delta_encode_palette_colors(const int *colors, int num,
-                                        int bit_depth, int min_val,
-                                        aom_writer *w) {
+static AOM_INLINE void delta_encode_palette_colors(const int *colors, int num,
+                                                   int bit_depth, int min_val,
+                                                   aom_writer *w) {
   if (num <= 0) return;
   assert(colors[0] < (1 << bit_depth));
   aom_write_literal(w, colors[0], bit_depth);
@@ -674,9 +680,9 @@
 // Transmit luma palette color values. First signal if each color in the color
 // cache is used. Those colors that are not in the cache are transmitted with
 // delta encoding.
-static void write_palette_colors_y(const MACROBLOCKD *const xd,
-                                   const PALETTE_MODE_INFO *const pmi,
-                                   int bit_depth, aom_writer *w) {
+static AOM_INLINE void write_palette_colors_y(
+    const MACROBLOCKD *const xd, const PALETTE_MODE_INFO *const pmi,
+    int bit_depth, aom_writer *w) {
   const int n = pmi->palette_size[0];
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
   const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
@@ -698,9 +704,9 @@
 // Write chroma palette color values. U channel is handled similarly to the luma
 // channel. For v channel, either use delta encoding or transmit raw values
 // directly, whichever costs less.
-static void write_palette_colors_uv(const MACROBLOCKD *const xd,
-                                    const PALETTE_MODE_INFO *const pmi,
-                                    int bit_depth, aom_writer *w) {
+static AOM_INLINE void write_palette_colors_uv(
+    const MACROBLOCKD *const xd, const PALETTE_MODE_INFO *const pmi,
+    int bit_depth, aom_writer *w) {
   const int n = pmi->palette_size[1];
   const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE;
   const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE;
@@ -757,9 +763,11 @@
   }
 }
 
-static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd,
-                                    const MB_MODE_INFO *const mbmi, int mi_row,
-                                    int mi_col, aom_writer *w) {
+static AOM_INLINE void write_palette_mode_info(const AV1_COMMON *cm,
+                                               const MACROBLOCKD *xd,
+                                               const MB_MODE_INFO *const mbmi,
+                                               int mi_row, int mi_col,
+                                               aom_writer *w) {
   const int num_planes = av1_num_planes(cm);
   const BLOCK_SIZE bsize = mbmi->sb_type;
   assert(av1_allow_palette(cm->allow_screen_content_tools, bsize));
@@ -843,22 +851,26 @@
   }
 }
 
-static void write_intra_y_mode_nonkf(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize,
-                                     PREDICTION_MODE mode, aom_writer *w) {
+static AOM_INLINE void write_intra_y_mode_nonkf(FRAME_CONTEXT *frame_ctx,
+                                                BLOCK_SIZE bsize,
+                                                PREDICTION_MODE mode,
+                                                aom_writer *w) {
   aom_write_symbol(w, mode, frame_ctx->y_mode_cdf[size_group_lookup[bsize]],
                    INTRA_MODES);
 }
 
-static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
-                                UV_PREDICTION_MODE uv_mode,
-                                PREDICTION_MODE y_mode,
-                                CFL_ALLOWED_TYPE cfl_allowed, aom_writer *w) {
+static AOM_INLINE void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx,
+                                           UV_PREDICTION_MODE uv_mode,
+                                           PREDICTION_MODE y_mode,
+                                           CFL_ALLOWED_TYPE cfl_allowed,
+                                           aom_writer *w) {
   aom_write_symbol(w, uv_mode, frame_ctx->uv_mode_cdf[cfl_allowed][y_mode],
                    UV_INTRA_MODES - !cfl_allowed);
 }
 
-static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, uint8_t idx,
-                             int8_t joint_sign, aom_writer *w) {
+static AOM_INLINE void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx,
+                                        uint8_t idx, int8_t joint_sign,
+                                        aom_writer *w) {
   aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS);
   // Magnitudes are only signaled for nonzero codes.
   if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
@@ -871,8 +883,9 @@
   }
 }
 
-static void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd, aom_writer *w,
-                       int skip, int mi_col, int mi_row) {
+static AOM_INLINE void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd,
+                                  aom_writer *w, int skip, int mi_col,
+                                  int mi_row) {
   if (cm->coded_lossless || cm->allow_intrabc) return;
 
   const int m = ~((1 << (6 - MI_SIZE_LOG2)) - 1);
@@ -896,11 +909,10 @@
   }
 }
 
-static void write_inter_segment_id(AV1_COMP *cpi, aom_writer *w,
-                                   const struct segmentation *const seg,
-                                   struct segmentation_probs *const segp,
-                                   int mi_row, int mi_col, int skip,
-                                   int preskip) {
+static AOM_INLINE void write_inter_segment_id(
+    AV1_COMP *cpi, aom_writer *w, const struct segmentation *const seg,
+    struct segmentation_probs *const segp, int mi_row, int mi_col, int skip,
+    int preskip) {
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   AV1_COMMON *const cm = &cpi->common;
@@ -935,8 +947,9 @@
 
 // If delta q is present, writes delta_q index.
 // Also writes delta_q loop filter levels, if present.
-static void write_delta_q_params(AV1_COMP *cpi, const int mi_row,
-                                 const int mi_col, int skip, aom_writer *w) {
+static AOM_INLINE void write_delta_q_params(AV1_COMP *cpi, const int mi_row,
+                                            const int mi_col, int skip,
+                                            aom_writer *w) {
   AV1_COMMON *const cm = &cpi->common;
   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
 
@@ -980,9 +993,11 @@
   }
 }
 
-static void write_intra_prediction_modes(AV1_COMP *cpi, const int mi_row,
-                                         const int mi_col, int is_keyframe,
-                                         aom_writer *w) {
+static AOM_INLINE void write_intra_prediction_modes(AV1_COMP *cpi,
+                                                    const int mi_row,
+                                                    const int mi_col,
+                                                    int is_keyframe,
+                                                    aom_writer *w) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1072,8 +1087,8 @@
                                x->mbmi_ext_frame);
 }
 
-static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
-                                const int mi_col, aom_writer *w) {
+static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
+                                           const int mi_col, aom_writer *w) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1229,9 +1244,9 @@
   }
 }
 
-static void write_intrabc_info(MACROBLOCKD *xd,
-                               const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
-                               aom_writer *w) {
+static AOM_INLINE void write_intrabc_info(
+    MACROBLOCKD *xd, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
+    aom_writer *w) {
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   int use_intrabc = is_intrabc_block(mbmi);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -1245,10 +1260,10 @@
   }
 }
 
-static void write_mb_modes_kf(AV1_COMP *cpi, MACROBLOCKD *xd,
-                              const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
-                              const int mi_row, const int mi_col,
-                              aom_writer *w) {
+static AOM_INLINE void write_mb_modes_kf(
+    AV1_COMP *cpi, MACROBLOCKD *xd,
+    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, const int mi_row,
+    const int mi_col, aom_writer *w) {
   AV1_COMMON *const cm = &cpi->common;
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   const struct segmentation *const seg = &cm->seg;
@@ -1276,7 +1291,7 @@
 }
 
 #if CONFIG_RD_DEBUG
-static void dump_mode_info(MB_MODE_INFO *mi) {
+static AOM_INLINE void dump_mode_info(MB_MODE_INFO *mi) {
   printf("\nmi->mi_row == %d\n", mi->mi_row);
   printf("&& mi->mi_col == %d\n", mi->mi_col);
   printf("&& mi->sb_type == %d\n", mi->sb_type);
@@ -1312,7 +1327,7 @@
 #endif
 
 #if ENC_MISMATCH_DEBUG
-static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
+static AOM_INLINE void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
   const MB_MODE_INFO *const mbmi =
       *(cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col));
@@ -1365,7 +1380,8 @@
 }
 #endif  // ENC_MISMATCH_DEBUG
 
-static void write_mbmi_b(AV1_COMP *cpi, aom_writer *w, int mi_row, int mi_col) {
+static AOM_INLINE void write_mbmi_b(AV1_COMP *cpi, aom_writer *w, int mi_row,
+                                    int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   MB_MODE_INFO *m = xd->mi[0];
@@ -1386,12 +1402,11 @@
   }
 }
 
-static void write_inter_txb_coeff(AV1_COMMON *const cm, MACROBLOCK *const x,
-                                  MB_MODE_INFO *const mbmi, aom_writer *w,
-                                  const TOKENEXTRA **tok,
-                                  const TOKENEXTRA *const tok_end,
-                                  TOKEN_STATS *token_stats, const int row,
-                                  const int col, int *block, const int plane) {
+static AOM_INLINE void write_inter_txb_coeff(
+    AV1_COMMON *const cm, MACROBLOCK *const x, MB_MODE_INFO *const mbmi,
+    aom_writer *w, const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+    TOKEN_STATS *token_stats, const int row, const int col, int *block,
+    const int plane) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const BLOCK_SIZE bsize = mbmi->sb_type;
@@ -1435,9 +1450,10 @@
   }
 }
 
-static void write_tokens_b(AV1_COMP *cpi, aom_writer *w, const TOKENEXTRA **tok,
-                           const TOKENEXTRA *const tok_end, int mi_row,
-                           int mi_col) {
+static AOM_INLINE void write_tokens_b(AV1_COMP *cpi, aom_writer *w,
+                                      const TOKENEXTRA **tok,
+                                      const TOKENEXTRA *const tok_end,
+                                      int mi_row, int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1496,10 +1512,10 @@
   }
 }
 
-static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
-                          aom_writer *w, const TOKENEXTRA **tok,
-                          const TOKENEXTRA *const tok_end, int mi_row,
-                          int mi_col) {
+static AOM_INLINE void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
+                                     aom_writer *w, const TOKENEXTRA **tok,
+                                     const TOKENEXTRA *const tok_end,
+                                     int mi_row, int mi_col) {
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
   xd->mi = cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col);
@@ -1565,10 +1581,10 @@
   }
 }
 
-static void write_partition(const AV1_COMMON *const cm,
-                            const MACROBLOCKD *const xd, int hbs, int mi_row,
-                            int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
-                            aom_writer *w) {
+static AOM_INLINE void write_partition(const AV1_COMMON *const cm,
+                                       const MACROBLOCKD *const xd, int hbs,
+                                       int mi_row, int mi_col, PARTITION_TYPE p,
+                                       BLOCK_SIZE bsize, aom_writer *w) {
   const int is_partition_point = bsize >= BLOCK_8X8;
 
   if (!is_partition_point) return;
@@ -1602,10 +1618,10 @@
   }
 }
 
-static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
-                           aom_writer *const w, const TOKENEXTRA **tok,
-                           const TOKENEXTRA *const tok_end, int mi_row,
-                           int mi_col, BLOCK_SIZE bsize) {
+static AOM_INLINE void write_modes_sb(
+    AV1_COMP *const cpi, const TileInfo *const tile, aom_writer *const w,
+    const TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row,
+    int mi_col, BLOCK_SIZE bsize) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   assert(bsize < BLOCK_SIZES_ALL);
@@ -1700,8 +1716,10 @@
   update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
 }
 
-static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile,
-                        aom_writer *const w, int tile_row, int tile_col) {
+static AOM_INLINE void write_modes(AV1_COMP *const cpi,
+                                   const TileInfo *const tile,
+                                   aom_writer *const w, int tile_row,
+                                   int tile_col) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   const int mi_row_start = tile->mi_row_start;
@@ -1741,8 +1759,8 @@
   }
 }
 
-static void encode_restoration_mode(AV1_COMMON *cm,
-                                    struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void encode_restoration_mode(
+    AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
   assert(!cm->all_lossless);
   if (!cm->seq_params.enable_restoration) return;
   if (cm->allow_intrabc) return;
@@ -1812,8 +1830,10 @@
   }
 }
 
-static void write_wiener_filter(int wiener_win, const WienerInfo *wiener_info,
-                                WienerInfo *ref_wiener_info, aom_writer *wb) {
+static AOM_INLINE void write_wiener_filter(int wiener_win,
+                                           const WienerInfo *wiener_info,
+                                           WienerInfo *ref_wiener_info,
+                                           aom_writer *wb) {
   if (wiener_win == WIENER_WIN)
     aom_write_primitive_refsubexpfin(
         wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
@@ -1855,9 +1875,9 @@
   memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
 }
 
-static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
-                                 SgrprojInfo *ref_sgrproj_info,
-                                 aom_writer *wb) {
+static AOM_INLINE void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
+                                            SgrprojInfo *ref_sgrproj_info,
+                                            aom_writer *wb) {
   aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
   const sgr_params_type *params = &av1_sgr_params[sgrproj_info->ep];
 
@@ -1886,11 +1906,9 @@
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
 
-static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
-                                             MACROBLOCKD *xd,
-                                             const RestorationUnitInfo *rui,
-                                             aom_writer *const w, int plane,
-                                             FRAME_COUNTS *counts) {
+static AOM_INLINE void loop_restoration_write_sb_coeffs(
+    const AV1_COMMON *const cm, MACROBLOCKD *xd, const RestorationUnitInfo *rui,
+    aom_writer *const w, int plane, FRAME_COUNTS *counts) {
   const RestorationInfo *rsi = cm->rst_info + plane;
   RestorationType frame_rtype = rsi->frame_restoration_type;
   if (frame_rtype == RESTORE_NONE) return;
@@ -1939,7 +1957,8 @@
   }
 }
 
-static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void encode_loopfilter(AV1_COMMON *cm,
+                                         struct aom_write_bit_buffer *wb) {
   assert(!cm->coded_lossless);
   if (cm->allow_intrabc) return;
   const int num_planes = av1_num_planes(cm);
@@ -1995,7 +2014,8 @@
   }
 }
 
-static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void encode_cdef(const AV1_COMMON *cm,
+                                   struct aom_write_bit_buffer *wb) {
   assert(!cm->coded_lossless);
   if (!cm->seq_params.enable_cdef) return;
   if (cm->allow_intrabc) return;
@@ -2012,7 +2032,8 @@
   }
 }
 
-static void write_delta_q(struct aom_write_bit_buffer *wb, int delta_q) {
+static AOM_INLINE void write_delta_q(struct aom_write_bit_buffer *wb,
+                                     int delta_q) {
   if (delta_q != 0) {
     aom_wb_write_bit(wb, 1);
     aom_wb_write_inv_signed_literal(wb, delta_q, 6);
@@ -2021,8 +2042,8 @@
   }
 }
 
-static void encode_quantization(const AV1_COMMON *const cm,
-                                struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void encode_quantization(const AV1_COMMON *const cm,
+                                           struct aom_write_bit_buffer *wb) {
   const int num_planes = av1_num_planes(cm);
 
   aom_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
@@ -2049,8 +2070,8 @@
   }
 }
 
-static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
+                                           struct aom_write_bit_buffer *wb) {
   int i, j;
   struct segmentation *seg = &cm->seg;
 
@@ -2095,15 +2116,16 @@
   }
 }
 
-static void write_frame_interp_filter(InterpFilter filter,
-                                      struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_frame_interp_filter(
+    InterpFilter filter, struct aom_write_bit_buffer *wb) {
   aom_wb_write_bit(wb, filter == SWITCHABLE);
   if (filter != SWITCHABLE)
     aom_wb_write_literal(wb, filter, LOG_SWITCHABLE_FILTERS);
 }
 
 // Same function as write_uniform but writing to uncompresses header wb
-static void wb_write_uniform(struct aom_write_bit_buffer *wb, int n, int v) {
+static AOM_INLINE void wb_write_uniform(struct aom_write_bit_buffer *wb, int n,
+                                        int v) {
   const int l = get_unsigned_bits(n);
   const int m = (1 << l) - n;
   if (l == 0) return;
@@ -2115,8 +2137,8 @@
   }
 }
 
-static void write_tile_info_max_tile(const AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_tile_info_max_tile(
+    const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
   int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2);
   int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2);
   int width_sb = width_mi >> cm->seq_params.mib_size_log2;
@@ -2166,9 +2188,9 @@
   }
 }
 
-static void write_tile_info(const AV1_COMMON *const cm,
-                            struct aom_write_bit_buffer *saved_wb,
-                            struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_tile_info(const AV1_COMMON *const cm,
+                                       struct aom_write_bit_buffer *saved_wb,
+                                       struct aom_write_bit_buffer *wb) {
   write_tile_info_max_tile(cm, wb);
 
   *saved_wb = *wb;
@@ -2180,9 +2202,9 @@
   }
 }
 
-static void write_ext_tile_info(const AV1_COMMON *const cm,
-                                struct aom_write_bit_buffer *saved_wb,
-                                struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_ext_tile_info(
+    const AV1_COMMON *const cm, struct aom_write_bit_buffer *saved_wb,
+    struct aom_write_bit_buffer *wb) {
   // This information is stored as a separate byte.
   int mod = wb->bit_offset % CHAR_BIT;
   if (mod > 0) aom_wb_write_literal(wb, 0, CHAR_BIT - mod);
@@ -2257,8 +2279,8 @@
   return 0;
 }
 
-static void write_render_size(const AV1_COMMON *cm,
-                              struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_render_size(const AV1_COMMON *cm,
+                                         struct aom_write_bit_buffer *wb) {
   const int scaling_active = av1_resize_scaled(cm);
   aom_wb_write_bit(wb, scaling_active);
   if (scaling_active) {
@@ -2267,8 +2289,8 @@
   }
 }
 
-static void write_superres_scale(const AV1_COMMON *const cm,
-                                 struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_superres_scale(const AV1_COMMON *const cm,
+                                            struct aom_write_bit_buffer *wb) {
   const SequenceHeader *const seq_params = &cm->seq_params;
   if (!seq_params->enable_superres) {
     assert(cm->superres_scale_denominator == SCALE_NUMERATOR);
@@ -2289,8 +2311,9 @@
   }
 }
 
-static void write_frame_size(const AV1_COMMON *cm, int frame_size_override,
-                             struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_frame_size(const AV1_COMMON *cm,
+                                        int frame_size_override,
+                                        struct aom_write_bit_buffer *wb) {
   const int coded_width = cm->superres_upscaled_width - 1;
   const int coded_height = cm->superres_upscaled_height - 1;
 
@@ -2306,8 +2329,8 @@
   write_render_size(cm, wb);
 }
 
-static void write_frame_size_with_refs(const AV1_COMMON *const cm,
-                                       struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_frame_size_with_refs(
+    const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
   int found = 0;
 
   MV_REFERENCE_FRAME ref_frame;
@@ -2333,14 +2356,14 @@
   }
 }
 
-static void write_profile(BITSTREAM_PROFILE profile,
-                          struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_profile(BITSTREAM_PROFILE profile,
+                                     struct aom_write_bit_buffer *wb) {
   assert(profile >= PROFILE_0 && profile < MAX_PROFILES);
   aom_wb_write_literal(wb, profile, PROFILE_BITS);
 }
 
-static void write_bitdepth(const SequenceHeader *const seq_params,
-                           struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_bitdepth(const SequenceHeader *const seq_params,
+                                      struct aom_write_bit_buffer *wb) {
   // Profile 0/1: [0] for 8 bit, [1]  10-bit
   // Profile   2: [0] for 8 bit, [10] 10-bit, [11] - 12-bit
   aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_8 ? 0 : 1);
@@ -2349,8 +2372,8 @@
   }
 }
 
-static void write_color_config(const SequenceHeader *const seq_params,
-                               struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_color_config(
+    const SequenceHeader *const seq_params, struct aom_write_bit_buffer *wb) {
   write_bitdepth(seq_params, wb);
   const int is_monochrome = seq_params->monochrome;
   // monochrome bit
@@ -2415,8 +2438,8 @@
   aom_wb_write_bit(wb, seq_params->separate_uv_delta_q);
 }
 
-static void write_timing_info_header(AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_timing_info_header(
+    AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
   aom_wb_write_unsigned_literal(wb, cm->timing_info.num_units_in_display_tick,
                                 32);  // Number of units in tick
   aom_wb_write_unsigned_literal(wb, cm->timing_info.time_scale,
@@ -2431,8 +2454,8 @@
   }
 }
 
-static void write_decoder_model_info(AV1_COMMON *const cm,
-                                     struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_decoder_model_info(
+    AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
   aom_wb_write_literal(
       wb, cm->buffer_model.encoder_decoder_buffer_delay_length - 1, 5);
   aom_wb_write_unsigned_literal(wb, cm->buffer_model.num_units_in_decoding_tick,
@@ -2442,9 +2465,8 @@
                        5);
 }
 
-static void write_dec_model_op_parameters(AV1_COMMON *const cm,
-                                          struct aom_write_bit_buffer *wb,
-                                          int op_num) {
+static AOM_INLINE void write_dec_model_op_parameters(
+    AV1_COMMON *const cm, struct aom_write_bit_buffer *wb, int op_num) {
   if (op_num > MAX_NUM_OPERATING_POINTS)
     aom_internal_error(
         &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
@@ -2467,15 +2489,15 @@
       0;  // reset the decoded frame counter
 }
 
-static void write_tu_pts_info(AV1_COMMON *const cm,
-                              struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_tu_pts_info(AV1_COMMON *const cm,
+                                         struct aom_write_bit_buffer *wb) {
   aom_wb_write_unsigned_literal(
       wb, cm->frame_presentation_time,
       cm->buffer_model.frame_presentation_time_length);
 }
 
-static void write_film_grain_params(const AV1_COMP *const cpi,
-                                    struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_film_grain_params(
+    const AV1_COMP *const cpi, struct aom_write_bit_buffer *wb) {
   const AV1_COMMON *const cm = &cpi->common;
   const aom_film_grain_t *const pars = &cm->cur_frame->film_grain_params;
 
@@ -2579,8 +2601,8 @@
   aom_wb_write_bit(wb, pars->clip_to_restricted_range);
 }
 
-static void write_sb_size(const SequenceHeader *const seq_params,
-                          struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_sb_size(const SequenceHeader *const seq_params,
+                                     struct aom_write_bit_buffer *wb) {
   (void)seq_params;
   (void)wb;
   assert(seq_params->mib_size == mi_size_wide[seq_params->sb_size]);
@@ -2590,8 +2612,8 @@
   aom_wb_write_bit(wb, seq_params->sb_size == BLOCK_128X128 ? 1 : 0);
 }
 
-static void write_sequence_header(const SequenceHeader *const seq_params,
-                                  struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_sequence_header(
+    const SequenceHeader *const seq_params, struct aom_write_bit_buffer *wb) {
   aom_wb_write_literal(wb, seq_params->num_bits_width - 1, 4);
   aom_wb_write_literal(wb, seq_params->num_bits_height - 1, 4);
   aom_wb_write_literal(wb, seq_params->max_frame_width - 1,
@@ -2656,10 +2678,9 @@
   aom_wb_write_bit(wb, seq_params->enable_restoration);
 }
 
-static void write_global_motion_params(const WarpedMotionParams *params,
-                                       const WarpedMotionParams *ref_params,
-                                       struct aom_write_bit_buffer *wb,
-                                       int allow_hp) {
+static AOM_INLINE void write_global_motion_params(
+    const WarpedMotionParams *params, const WarpedMotionParams *ref_params,
+    struct aom_write_bit_buffer *wb, int allow_hp) {
   const TransformationType type = params->wmtype;
 
   aom_wb_write_bit(wb, type != IDENTITY);
@@ -2710,8 +2731,8 @@
   }
 }
 
-static void write_global_motion(AV1_COMP *cpi,
-                                struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_global_motion(AV1_COMP *cpi,
+                                           struct aom_write_bit_buffer *wb) {
   AV1_COMMON *const cm = &cpi->common;
   int frame;
   for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
@@ -2814,9 +2835,9 @@
 }
 
 // New function based on HLS R18
-static void write_uncompressed_header_obu(AV1_COMP *cpi,
-                                          struct aom_write_bit_buffer *saved_wb,
-                                          struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_uncompressed_header_obu(
+    AV1_COMP *cpi, struct aom_write_bit_buffer *saved_wb,
+    struct aom_write_bit_buffer *wb) {
   AV1_COMMON *const cm = &cpi->common;
   const SequenceHeader *const seq_params = &cm->seq_params;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -3142,7 +3163,8 @@
     return 1;
 }
 
-static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
+static AOM_INLINE void mem_put_varsize(uint8_t *const dst, const int sz,
+                                       const int val) {
   switch (sz) {
     case 1: dst[0] = (uint8_t)(val & 0xff); break;
     case 2: mem_put_le16(dst, val); break;
@@ -3304,7 +3326,7 @@
   return length_field_size;
 }
 
-static void add_trailing_bits(struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void add_trailing_bits(struct aom_write_bit_buffer *wb) {
   if (aom_wb_is_byte_aligned(wb)) {
     aom_wb_write_literal(wb, 0x80, 8);
   } else {
@@ -3313,8 +3335,8 @@
   }
 }
 
-static void write_bitstream_level(AV1_LEVEL seq_level_idx,
-                                  struct aom_write_bit_buffer *wb) {
+static AOM_INLINE void write_bitstream_level(AV1_LEVEL seq_level_idx,
+                                             struct aom_write_bit_buffer *wb) {
   assert(is_valid_seq_level_idx(seq_level_idx));
   aom_wb_write_literal(wb, seq_level_idx, LEVEL_BITS);
 }
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 55a4e6f..763b070 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -22,9 +22,9 @@
   tran_low_t *dqcoeff_buf[MAX_MB_PLANE];
 } PC_TREE_SHARED_BUFFERS;
 
-static void alloc_mode_context(AV1_COMMON *cm, int num_pix,
-                               PICK_MODE_CONTEXT *ctx,
-                               PC_TREE_SHARED_BUFFERS *shared_bufs) {
+static AOM_INLINE void alloc_mode_context(AV1_COMMON *cm, int num_pix,
+                                          PICK_MODE_CONTEXT *ctx,
+                                          PC_TREE_SHARED_BUFFERS *shared_bufs) {
   const int num_planes = av1_num_planes(cm);
   int i;
   const int num_blk = num_pix / 16;
@@ -51,7 +51,8 @@
   }
 }
 
-static void free_mode_context(PICK_MODE_CONTEXT *ctx, const int num_planes) {
+static AOM_INLINE void free_mode_context(PICK_MODE_CONTEXT *ctx,
+                                         const int num_planes) {
   int i;
   aom_free(ctx->blk_skip);
   ctx->blk_skip = 0;
@@ -71,9 +72,9 @@
   }
 }
 
-static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, int num_pix,
-                                int is_leaf,
-                                PC_TREE_SHARED_BUFFERS *shared_bufs) {
+static AOM_INLINE void alloc_tree_contexts(
+    AV1_COMMON *cm, PC_TREE *tree, int num_pix, int is_leaf,
+    PC_TREE_SHARED_BUFFERS *shared_bufs) {
   alloc_mode_context(cm, num_pix, &tree->none, shared_bufs);
 
   if (is_leaf) return;
@@ -106,7 +107,7 @@
   }
 }
 
-static void free_tree_contexts(PC_TREE *tree, const int num_planes) {
+static AOM_INLINE void free_tree_contexts(PC_TREE *tree, const int num_planes) {
   int i;
   for (i = 0; i < 3; i++) {
     free_mode_context(&tree->horizontala[i], num_planes);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 8eb50c1..4e63e34 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -68,10 +68,11 @@
 #include "av1/encoder/tpl_model.h"
 #include "av1/encoder/var_based_part.h"
 
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                              ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              int *rate);
+static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
+                                         TileDataEnc *tile_data, ThreadData *td,
+                                         TOKENEXTRA **t, RUN_TYPE dry_run,
+                                         int mi_row, int mi_col,
+                                         BLOCK_SIZE bsize, int *rate);
 
 // This is used as a reference when computing the source variance for the
 //  purposes of activity masking.
@@ -217,9 +218,10 @@
       cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
 }
 
-static void set_ssim_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                            const BLOCK_SIZE bsize, const int mi_row,
-                            const int mi_col, int *const rdmult) {
+static AOM_INLINE void set_ssim_rdmult(const AV1_COMP *const cpi,
+                                       MACROBLOCK *const x,
+                                       const BLOCK_SIZE bsize, const int mi_row,
+                                       const int mi_col, int *const rdmult) {
   const AV1_COMMON *const cm = &cpi->common;
 
   const int bsize_base = BLOCK_16X16;
@@ -315,9 +317,10 @@
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
-static void setup_block_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                               int mi_row, int mi_col, BLOCK_SIZE bsize,
-                               AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
+static AOM_INLINE void setup_block_rdmult(const AV1_COMP *const cpi,
+                                          MACROBLOCK *const x, int mi_row,
+                                          int mi_col, BLOCK_SIZE bsize,
+                                          AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
   x->rdmult = cpi->rd.RDMULT;
 
   if (aq_mode != NO_AQ) {
@@ -349,10 +352,9 @@
   }
 }
 
-static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
-                                           const TileInfo *const tile,
-                                           MACROBLOCK *const x, int mi_row,
-                                           int mi_col, BLOCK_SIZE bsize) {
+static AOM_INLINE void set_offsets_without_segment_id(
+    const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x,
+    int mi_row, int mi_col, BLOCK_SIZE bsize) {
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -396,9 +398,10 @@
   xd->cfl.mi_col = mi_col;
 }
 
-static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
-                        MACROBLOCK *const x, int mi_row, int mi_col,
-                        BLOCK_SIZE bsize) {
+static AOM_INLINE void set_offsets(const AV1_COMP *const cpi,
+                                   const TileInfo *const tile,
+                                   MACROBLOCK *const x, int mi_row, int mi_col,
+                                   BLOCK_SIZE bsize) {
   const AV1_COMMON *const cm = &cpi->common;
   const struct segmentation *const seg = &cm->seg;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -420,10 +423,10 @@
   }
 }
 
-static void update_filter_type_count(uint8_t allow_update_cdf,
-                                     FRAME_COUNTS *counts,
-                                     const MACROBLOCKD *xd,
-                                     const MB_MODE_INFO *mbmi) {
+static AOM_INLINE void update_filter_type_count(uint8_t allow_update_cdf,
+                                                FRAME_COUNTS *counts,
+                                                const MACROBLOCKD *xd,
+                                                const MB_MODE_INFO *mbmi) {
   int dir;
   for (dir = 0; dir < 2; ++dir) {
     const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
@@ -436,9 +439,10 @@
   }
 }
 
-static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
-                                      const MB_MODE_INFO *mbmi,
-                                      RD_COUNTS *rdc) {
+static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode,
+                                                 BLOCK_SIZE bsize,
+                                                 const MB_MODE_INFO *mbmi,
+                                                 RD_COUNTS *rdc) {
   if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
     const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
     int ref;
@@ -448,8 +452,8 @@
   }
 }
 
-static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
-                          const TX_MODE tx_mode) {
+static AOM_INLINE void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
+                                     const TX_MODE tx_mode) {
   MACROBLOCKD *const xd = &x->e_mbd;
   if (xd->lossless[mbmi->segment_id]) {
     mbmi->tx_size = TX_4X4;
@@ -468,10 +472,12 @@
   x->skip = 0;
 }
 
-static void update_state(const AV1_COMP *const cpi,
-                         const TileDataEnc *const tile_data, ThreadData *td,
-                         const PICK_MODE_CONTEXT *const ctx, int mi_row,
-                         int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
+static AOM_INLINE void update_state(const AV1_COMP *const cpi,
+                                    const TileDataEnc *const tile_data,
+                                    ThreadData *td,
+                                    const PICK_MODE_CONTEXT *const ctx,
+                                    int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                    RUN_TYPE dry_run) {
   int i, x_idx, y;
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
@@ -649,11 +655,13 @@
          sizeof(x->mbmi_ext->global_mvs));
 }
 
-static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
-                          MACROBLOCK *const x, int mi_row, int mi_col,
-                          RD_STATS *rd_cost, PARTITION_TYPE partition,
-                          BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                          RD_STATS best_rd, int pick_mode_type) {
+static AOM_INLINE void pick_sb_modes(AV1_COMP *const cpi,
+                                     TileDataEnc *tile_data,
+                                     MACROBLOCK *const x, int mi_row,
+                                     int mi_col, RD_STATS *rd_cost,
+                                     PARTITION_TYPE partition, BLOCK_SIZE bsize,
+                                     PICK_MODE_CONTEXT *ctx, RD_STATS best_rd,
+                                     int pick_mode_type) {
   AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   TileInfo *const tile_info = &tile_data->tile_info;
@@ -834,9 +842,10 @@
 #endif
 }
 
-static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
-                                    PREDICTION_MODE mode,
-                                    int16_t mode_context) {
+static AOM_INLINE void update_inter_mode_stats(FRAME_CONTEXT *fc,
+                                               FRAME_COUNTS *counts,
+                                               PREDICTION_MODE mode,
+                                               int16_t mode_context) {
   (void)counts;
 
   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
@@ -874,8 +883,9 @@
   update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
 }
 
-static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
-                               FRAME_COUNTS *counts) {
+static AOM_INLINE void update_palette_cdf(MACROBLOCKD *xd,
+                                          const MB_MODE_INFO *const mbmi,
+                                          FRAME_COUNTS *counts) {
   FRAME_CONTEXT *fc = xd->tile_ctx;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -920,11 +930,13 @@
   }
 }
 
-static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
-                            MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
-                            const MB_MODE_INFO *above_mi,
-                            const MB_MODE_INFO *left_mi, const int intraonly,
-                            const int mi_row, const int mi_col) {
+static AOM_INLINE void sum_intra_stats(const AV1_COMMON *const cm,
+                                       FRAME_COUNTS *counts, MACROBLOCKD *xd,
+                                       const MB_MODE_INFO *const mbmi,
+                                       const MB_MODE_INFO *above_mi,
+                                       const MB_MODE_INFO *left_mi,
+                                       const int intraonly, const int mi_row,
+                                       const int mi_col) {
   FRAME_CONTEXT *fc = xd->tile_ctx;
   const PREDICTION_MODE y_mode = mbmi->mode;
   (void)counts;
@@ -1025,8 +1037,8 @@
   }
 }
 
-static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row,
-                         int mi_col) {
+static AOM_INLINE void update_stats(const AV1_COMMON *const cm, ThreadData *td,
+                                    int mi_row, int mi_col) {
   MACROBLOCK *x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -1426,10 +1438,10 @@
   TXFM_CONTEXT tl[MAX_MIB_SIZE];
 } RD_SEARCH_MACROBLOCK_CONTEXT;
 
-static void restore_context(MACROBLOCK *x,
-                            const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
-                            int mi_col, BLOCK_SIZE bsize,
-                            const int num_planes) {
+static AOM_INLINE void restore_context(MACROBLOCK *x,
+                                       const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+                                       int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                       const int num_planes) {
   MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide =
@@ -1462,9 +1474,10 @@
          sizeof(*xd->left_txfm_context) * mi_height);
 }
 
-static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
-                         int mi_row, int mi_col, BLOCK_SIZE bsize,
-                         const int num_planes) {
+static AOM_INLINE void save_context(const MACROBLOCK *x,
+                                    RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+                                    int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                    const int num_planes) {
   const MACROBLOCKD *xd = &x->e_mbd;
   int p;
   const int num_4x4_blocks_wide =
@@ -1499,11 +1512,12 @@
   ctx->p_tl = xd->left_txfm_context;
 }
 
-static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                     ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
-                     RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                     PARTITION_TYPE partition,
-                     const PICK_MODE_CONTEXT *const ctx, int *rate) {
+static AOM_INLINE void encode_b(const AV1_COMP *const cpi,
+                                TileDataEnc *tile_data, ThreadData *td,
+                                TOKENEXTRA **tp, int mi_row, int mi_col,
+                                RUN_TYPE dry_run, BLOCK_SIZE bsize,
+                                PARTITION_TYPE partition,
+                                const PICK_MODE_CONTEXT *const ctx, int *rate) {
   TileInfo *const tile = &tile_data->tile_info;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -1604,10 +1618,11 @@
   x->rdmult = origin_mult;
 }
 
-static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
-                      TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row,
-                      int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                      PC_TREE *pc_tree, int *rate) {
+static AOM_INLINE void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
+                                 TileDataEnc *tile_data, TOKENEXTRA **tp,
+                                 int mi_row, int mi_col, RUN_TYPE dry_run,
+                                 BLOCK_SIZE bsize, PC_TREE *pc_tree,
+                                 int *rate) {
   assert(bsize < BLOCK_SIZES_ALL);
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
@@ -1733,11 +1748,10 @@
 }
 
 #if !CONFIG_REALTIME_ONLY
-static void set_partial_sb_partition(const AV1_COMMON *const cm,
-                                     MB_MODE_INFO *mi, int bh_in, int bw_in,
-                                     int mi_rows_remaining,
-                                     int mi_cols_remaining, BLOCK_SIZE bsize,
-                                     MB_MODE_INFO **mib) {
+static AOM_INLINE void set_partial_sb_partition(
+    const AV1_COMMON *const cm, MB_MODE_INFO *mi, int bh_in, int bw_in,
+    int mi_rows_remaining, int mi_cols_remaining, BLOCK_SIZE bsize,
+    MB_MODE_INFO **mib) {
   int bh = bh_in;
   int r, c;
   for (r = 0; r < cm->seq_params.mib_size; r += bh) {
@@ -1756,9 +1770,10 @@
 // However, at the bottom and right borders of the image the requested size
 // may not be allowed in which case this code attempts to choose the largest
 // allowable partition.
-static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
-                                   MB_MODE_INFO **mib, int mi_row, int mi_col,
-                                   BLOCK_SIZE bsize) {
+static AOM_INLINE void set_fixed_partitioning(AV1_COMP *cpi,
+                                              const TileInfo *const tile,
+                                              MB_MODE_INFO **mib, int mi_row,
+                                              int mi_col, BLOCK_SIZE bsize) {
   AV1_COMMON *const cm = &cpi->common;
   const int mi_rows_remaining = tile->mi_row_end - mi_row;
   const int mi_cols_remaining = tile->mi_col_end - mi_col;
@@ -1792,11 +1807,10 @@
 }
 #endif  // !CONFIG_REALTIME_ONLY
 
-static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
-                             TileDataEnc *tile_data, MB_MODE_INFO **mib,
-                             TOKENEXTRA **tp, int mi_row, int mi_col,
-                             BLOCK_SIZE bsize, int *rate, int64_t *dist,
-                             int do_recon, PC_TREE *pc_tree) {
+static AOM_INLINE void rd_use_partition(
+    AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MB_MODE_INFO **mib,
+    TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate,
+    int64_t *dist, int do_recon, PC_TREE *pc_tree) {
   AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   TileInfo *const tile_info = &tile_data->tile_info;
@@ -2062,10 +2076,11 @@
   x->rdmult = orig_rdmult;
 }
 
-static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
-                                TileDataEnc *tile_data, MB_MODE_INFO **mib,
-                                TOKENEXTRA **tp, int mi_row, int mi_col,
-                                BLOCK_SIZE bsize, PC_TREE *pc_tree) {
+static AOM_INLINE void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
+                                           TileDataEnc *tile_data,
+                                           MB_MODE_INFO **mib, TOKENEXTRA **tp,
+                                           int mi_row, int mi_col,
+                                           BLOCK_SIZE bsize, PC_TREE *pc_tree) {
   AV1_COMMON *const cm = &cpi->common;
   TileInfo *const tile_info = &tile_data->tile_info;
   const SPEED_FEATURES *const sf = &cpi->sf;
@@ -2328,7 +2343,7 @@
   return true;
 }
 
-static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
+static AOM_INLINE void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
   pc_tree->partitioning = PARTITION_NONE;
   pc_tree->none.rd_stats.skip = 0;
 
@@ -2340,9 +2355,11 @@
 }
 
 // Record the ref frames that have been selected by square partition blocks.
-static void update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type,
-                                          BLOCK_SIZE bsize, int mib_size,
-                                          int mi_row, int mi_col) {
+static AOM_INLINE void update_picked_ref_frames_mask(MACROBLOCK *const x,
+                                                     int ref_type,
+                                                     BLOCK_SIZE bsize,
+                                                     int mib_size, int mi_row,
+                                                     int mi_col) {
   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
   const int sb_size_mask = mib_size - 1;
   const int mi_row_in_sb = mi_row & sb_size_mask;
@@ -3696,9 +3713,10 @@
   return qindex;
 }
 
-static void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
-                          MACROBLOCK *const x, const TileInfo *const tile_info,
-                          int mi_row, int mi_col, int num_planes) {
+static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
+                                     MACROBLOCK *const x,
+                                     const TileInfo *const tile_info,
+                                     int mi_row, int mi_col, int num_planes) {
   AV1_COMMON *const cm = &cpi->common;
   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
   assert(delta_q_info->delta_q_present_flag);
@@ -3779,9 +3797,10 @@
 #define AVG_CDF_WEIGHT_LEFT 3
 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
 
-static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr,
-                           int num_cdfs, int cdf_stride, int nsymbs,
-                           int wt_left, int wt_tr) {
+static AOM_INLINE void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left,
+                                      aom_cdf_prob *cdf_ptr_tr, int num_cdfs,
+                                      int cdf_stride, int nsymbs, int wt_left,
+                                      int wt_tr) {
   for (int i = 0; i < num_cdfs; i++) {
     for (int j = 0; j <= nsymbs; j++) {
       cdf_ptr_left[i * cdf_stride + j] =
@@ -3808,8 +3827,8 @@
                    wt_left, wt_tr);                                        \
   } while (0)
 
-static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left,
-                    int wt_tr) {
+static AOM_INLINE void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr,
+                               int wt_left, int wt_tr) {
   AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
   for (int i = 0; i < 2; i++) {
     AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
@@ -3832,8 +3851,9 @@
 // the left SB's CDFs and use the same for current SB's encoding to
 // improve the performance. This function facilitates the averaging
 // of CDF and used only when row-mt is enabled in encoder.
-static void avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
-                            int wt_left, int wt_tr) {
+static AOM_INLINE void avg_cdf_symbols(FRAME_CONTEXT *ctx_left,
+                                       FRAME_CONTEXT *ctx_tr, int wt_left,
+                                       int wt_tr) {
   AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
   AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
   AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
@@ -3953,8 +3973,8 @@
 }
 
 #if !CONFIG_REALTIME_ONLY
-static void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                                    int mi_col) {
+static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
+                                               int mi_row, int mi_col) {
   const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
   const int orig_rdmult = cpi->rd.RDMULT;
 
@@ -3986,8 +4006,9 @@
   }
 }
 
-static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
-                          int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) {
+static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
+                                     TileDataEnc *tile_data, int mi_row,
+                                     TOKENEXTRA **tp, int use_nonrd_mode) {
   AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   const TileInfo *const tile_info = &tile_data->tile_info;
@@ -4227,7 +4248,7 @@
 #endif
 }
 
-static void init_encode_frame_mb_context(AV1_COMP *cpi) {
+static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCK *const x = &cpi->td.mb;
@@ -4353,7 +4374,7 @@
   }
 }
 
-static void encode_tiles(AV1_COMP *cpi) {
+static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   const int tile_cols = cm->tile_cols;
   const int tile_rows = cm->tile_rows;
@@ -4455,7 +4476,7 @@
 }
 
 // Set the relative distance of a reference frame w.r.t. current frame
-static void set_rel_frame_dist(AV1_COMP *cpi) {
+static AOM_INLINE void set_rel_frame_dist(AV1_COMP *cpi) {
   const AV1_COMMON *const cm = &cpi->common;
   const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
   MV_REFERENCE_FRAME ref_frame;
@@ -4486,7 +4507,7 @@
 
 // Enforce the number of references for each arbitrary frame based on user
 // options and speed.
-static void enforce_max_ref_frames(AV1_COMP *cpi) {
+static AOM_INLINE void enforce_max_ref_frames(AV1_COMP *cpi) {
   MV_REFERENCE_FRAME ref_frame;
   int total_valid_refs = 0;
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
@@ -4613,7 +4634,7 @@
   return 0;
 }
 
-static void set_default_interp_skip_flags(AV1_COMP *cpi) {
+static AOM_INLINE void set_default_interp_skip_flags(AV1_COMP *cpi) {
   const int num_planes = av1_num_planes(&cpi->common);
   cpi->default_interp_skip_flags = (num_planes == 1)
                                        ? INTERP_SKIP_LUMA_EVAL_CHROMA
@@ -4630,7 +4651,7 @@
     return INT64_MAX;
 }
 
-static void encode_frame_internal(AV1_COMP *cpi) {
+static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
   ThreadData *const td = &cpi->td;
   MACROBLOCK *const x = &td->mb;
   AV1_COMMON *const cm = &cpi->common;
@@ -5178,10 +5199,10 @@
   }
 }
 
-static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
-                              FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
-                              int blk_row, int blk_col,
-                              uint8_t allow_update_cdf) {
+static AOM_INLINE void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
+                                         FRAME_COUNTS *counts, TX_SIZE tx_size,
+                                         int depth, int blk_row, int blk_col,
+                                         uint8_t allow_update_cdf) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int max_blocks_high = max_block_high(xd, bsize, 0);
@@ -5244,10 +5265,9 @@
   }
 }
 
-static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
-                                      BLOCK_SIZE plane_bsize, int mi_row,
-                                      int mi_col, FRAME_COUNTS *td_counts,
-                                      uint8_t allow_update_cdf) {
+static AOM_INLINE void tx_partition_count_update(
+    const AV1_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE plane_bsize,
+    int mi_row, int mi_col, FRAME_COUNTS *td_counts, uint8_t allow_update_cdf) {
   MACROBLOCKD *xd = &x->e_mbd;
   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
   const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
@@ -5266,8 +5286,8 @@
                         allow_update_cdf);
 }
 
-static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
-                             int blk_col) {
+static AOM_INLINE void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
+                                        int blk_row, int blk_col) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int max_blocks_high = max_block_high(xd, bsize, 0);
@@ -5304,9 +5324,10 @@
   }
 }
 
-static void tx_partition_set_contexts(const AV1_COMMON *const cm,
-                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
-                                      int mi_row, int mi_col) {
+static AOM_INLINE void tx_partition_set_contexts(const AV1_COMMON *const cm,
+                                                 MACROBLOCKD *xd,
+                                                 BLOCK_SIZE plane_bsize,
+                                                 int mi_row, int mi_col) {
   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
   const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
   const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
@@ -5323,10 +5344,11 @@
       set_txfm_context(xd, max_tx_size, idy, idx);
 }
 
-static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
-                              ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
-                              int mi_row, int mi_col, BLOCK_SIZE bsize,
-                              int *rate) {
+static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
+                                         TileDataEnc *tile_data, ThreadData *td,
+                                         TOKENEXTRA **t, RUN_TYPE dry_run,
+                                         int mi_row, int mi_col,
+                                         BLOCK_SIZE bsize, int *rate) {
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCK *const x = &td->mb;
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 1856d69..6424d63 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -16,7 +16,7 @@
 #include "av1/encoder/rdopt.h"
 #include "aom_dsp/aom_dsp_common.h"
 
-static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
+static AOM_INLINE void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
   for (int i = 0; i < REFERENCE_MODES; i++)
     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
 
@@ -37,7 +37,7 @@
   }
 }
 
-static void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
+static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
   const int mib_size = cm->seq_params.mib_size;
@@ -202,8 +202,8 @@
   }
 }
 
-static void assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
-                                  int num_tiles, int num_workers) {
+static AOM_INLINE void assign_tile_to_thread(
+    MultiThreadHandle *multi_thread_ctxt, int num_tiles, int num_workers) {
   int tile_id = 0;
   int i;
 
@@ -228,9 +228,10 @@
   return 0;
 }
 
-static void switch_tile_and_get_next_job(AV1_COMP *const cpi, int *cur_tile_id,
-                                         int *current_mi_row,
-                                         int *end_of_frame) {
+static AOM_INLINE void switch_tile_and_get_next_job(AV1_COMP *const cpi,
+                                                    int *cur_tile_id,
+                                                    int *current_mi_row,
+                                                    int *end_of_frame) {
   AV1_COMMON *const cm = &cpi->common;
   const int tile_cols = cm->tile_cols;
   const int tile_rows = cm->tile_rows;
@@ -382,7 +383,7 @@
   return 1;
 }
 
-static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
+static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
   AV1_COMMON *const cm = &cpi->common;
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   int sb_mi_size = av1_get_sb_mi_size(cm);
@@ -496,7 +497,7 @@
   }
 }
 
-static void launch_enc_workers(AV1_COMP *cpi, int num_workers) {
+static AOM_INLINE void launch_enc_workers(AV1_COMP *cpi, int num_workers) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   // Encode a frame
   for (int i = num_workers - 1; i >= 0; i--) {
@@ -513,7 +514,7 @@
   }
 }
 
-static void sync_enc_workers(AV1_COMP *cpi, int num_workers) {
+static AOM_INLINE void sync_enc_workers(AV1_COMP *cpi, int num_workers) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   int had_error = 0;
 
@@ -528,7 +529,8 @@
                        "Failed to encode tile data");
 }
 
-static void accumulate_counters_enc_workers(AV1_COMP *cpi, int num_workers) {
+static AOM_INLINE void accumulate_counters_enc_workers(AV1_COMP *cpi,
+                                                       int num_workers) {
   for (int i = num_workers - 1; i >= 0; i--) {
     AVxWorker *const worker = &cpi->workers[i];
     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
@@ -547,8 +549,8 @@
   }
 }
 
-static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
-                                int num_workers) {
+static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
+                                           int num_workers) {
   for (int i = num_workers - 1; i >= 0; i--) {
     AVxWorker *const worker = &cpi->workers[i];
     EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index cafd443..fb8dff0 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -53,8 +53,8 @@
 #define NCOUNT_INTRA_THRESH 8192
 #define NCOUNT_INTRA_FACTOR 3
 
-static void output_stats(FIRSTPASS_STATS *stats,
-                         struct aom_codec_pkt_list *pktlist) {
+static AOM_INLINE void output_stats(FIRSTPASS_STATS *stats,
+                                    struct aom_codec_pkt_list *pktlist) {
   struct aom_codec_cx_pkt pkt;
   pkt.kind = AOM_CODEC_STATS_PKT;
   pkt.data.twopass_stats.buf = stats;
@@ -109,8 +109,8 @@
   section->duration = 1.0;
 }
 
-static void accumulate_stats(FIRSTPASS_STATS *section,
-                             const FIRSTPASS_STATS *frame) {
+static AOM_INLINE void accumulate_stats(FIRSTPASS_STATS *section,
+                                        const FIRSTPASS_STATS *frame) {
   section->frame += frame->frame;
   section->weight += frame->weight;
   section->intra_error += frame->intra_error;
@@ -214,9 +214,9 @@
   return sr;
 }
 
-static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
-                                     const MV *ref_mv, MV *best_mv,
-                                     int *best_motion_err) {
+static AOM_INLINE void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
+                                                const MV *ref_mv, MV *best_mv,
+                                                int *best_motion_err) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MV tmp_mv = kZeroMv;
   MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 24b27ff..ebe9e22 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -383,7 +383,8 @@
 // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
 // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
 // The code below is an integerized version of that.
-static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
+static AOM_INLINE void get_cost_surf_min(int *cost_list, int *ir, int *ic,
+                                         int bits) {
   *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
                          (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
   *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 4dcfe0c..7170fdd 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -29,7 +29,7 @@
 #include "av1/encoder/rdopt.h"
 
 #if !CONFIG_REALTIME_ONLY
-static void simple_motion_search_prune_part_features(
+static AOM_INLINE void simple_motion_search_prune_part_features(
     AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
     int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get);
 #endif
@@ -377,7 +377,7 @@
 //  - whether a left marcoblock exists
 //  - width of left macroblock
 //  - height of left macroblock
-static void simple_motion_search_prune_part_features(
+static AOM_INLINE void simple_motion_search_prune_part_features(
     AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
     int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get) {
   const int w_mi = mi_size_wide[bsize];
@@ -769,7 +769,8 @@
 
 // Get the minimum partition block width and height(in log scale) under a
 // PC_TREE.
-static void get_min_bsize(const PC_TREE *pc_tree, int *min_bw, int *min_bh) {
+static AOM_INLINE void get_min_bsize(const PC_TREE *pc_tree, int *min_bw,
+                                     int *min_bh) {
   if (!pc_tree) return;
 
   const BLOCK_SIZE bsize = pc_tree->block_size;
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index a2b3ed8..6908a72 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -131,22 +131,23 @@
   AV1PixelRect tile_rect;
 } RestSearchCtxt;
 
-static void rsc_on_tile(void *priv) {
+static AOM_INLINE void rsc_on_tile(void *priv) {
   RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
   set_default_sgrproj(&rsc->sgrproj);
   set_default_wiener(&rsc->wiener);
   rsc->tile_stripe0 = 0;
 }
 
-static void reset_rsc(RestSearchCtxt *rsc) {
+static AOM_INLINE void reset_rsc(RestSearchCtxt *rsc) {
   rsc->sse = 0;
   rsc->bits = 0;
 }
 
-static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
-                     const MACROBLOCK *x, const SPEED_FEATURES *sf, int plane,
-                     RestUnitSearchInfo *rusi, YV12_BUFFER_CONFIG *dst,
-                     RestSearchCtxt *rsc) {
+static AOM_INLINE void init_rsc(const YV12_BUFFER_CONFIG *src,
+                                const AV1_COMMON *cm, const MACROBLOCK *x,
+                                const SPEED_FEATURES *sf, int plane,
+                                RestUnitSearchInfo *rusi,
+                                YV12_BUFFER_CONFIG *dst, RestSearchCtxt *rsc) {
   rsc->src = src;
   rsc->dst = dst;
   rsc->cm = cm;
@@ -428,12 +429,13 @@
     return (dividend + divisor / 2) / divisor;
 }
 
-static void get_proj_subspace(const uint8_t *src8, int width, int height,
-                              int src_stride, const uint8_t *dat8,
-                              int dat_stride, int use_highbitdepth,
-                              int32_t *flt0, int flt0_stride, int32_t *flt1,
-                              int flt1_stride, int *xq,
-                              const sgr_params_type *params) {
+static AOM_INLINE void get_proj_subspace(const uint8_t *src8, int width,
+                                         int height, int src_stride,
+                                         const uint8_t *dat8, int dat_stride,
+                                         int use_highbitdepth, int32_t *flt0,
+                                         int flt0_stride, int32_t *flt1,
+                                         int flt1_stride, int *xq,
+                                         const sgr_params_type *params) {
   int i, j;
   int64_t H[2][2] = { { 0, 0 }, { 0, 0 } };
   int64_t C[2] = { 0, 0 };
@@ -525,7 +527,8 @@
   }
 }
 
-static void encode_xq(int *xq, int *xqd, const sgr_params_type *params) {
+static AOM_INLINE void encode_xq(int *xq, int *xqd,
+                                 const sgr_params_type *params) {
   if (params->r[0] == 0) {
     xqd[0] = 0;
     xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
@@ -542,10 +545,11 @@
 }
 
 // Apply the self-guided filter across an entire restoration unit.
-static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width,
-                      int height, int dat_stride, int use_highbd, int bit_depth,
-                      int pu_width, int pu_height, int32_t *flt0, int32_t *flt1,
-                      int flt_stride) {
+static AOM_INLINE void apply_sgr(int sgr_params_idx, const uint8_t *dat8,
+                                 int width, int height, int dat_stride,
+                                 int use_highbd, int bit_depth, int pu_width,
+                                 int pu_height, int32_t *flt0, int32_t *flt1,
+                                 int flt_stride) {
   for (int i = 0; i < height; i += pu_height) {
     const int h = AOMMIN(pu_height, height - i);
     int32_t *flt0_row = flt0 + i * flt_stride;
@@ -564,13 +568,12 @@
   }
 }
 
-static void compute_sgrproj_err(const uint8_t *dat8, const int width,
-                                const int height, const int dat_stride,
-                                const uint8_t *src8, const int src_stride,
-                                const int use_highbitdepth, const int bit_depth,
-                                const int pu_width, const int pu_height,
-                                const int ep, int32_t *flt0, int32_t *flt1,
-                                const int flt_stride, int *exqd, int64_t *err) {
+static AOM_INLINE void compute_sgrproj_err(
+    const uint8_t *dat8, const int width, const int height,
+    const int dat_stride, const uint8_t *src8, const int src_stride,
+    const int use_highbitdepth, const int bit_depth, const int pu_width,
+    const int pu_height, const int ep, int32_t *flt0, int32_t *flt1,
+    const int flt_stride, int *exqd, int64_t *err) {
   int exq[2];
   apply_sgr(ep, dat8, width, height, dat_stride, use_highbitdepth, bit_depth,
             pu_width, pu_height, flt0, flt1, flt_stride);
@@ -586,8 +589,9 @@
       flt_stride, flt1, flt_stride, 2, exqd, params);
 }
 
-static void get_best_error(int64_t *besterr, const int64_t err, const int *exqd,
-                           int *bestxqd, int *bestep, const int ep) {
+static AOM_INLINE void get_best_error(int64_t *besterr, const int64_t err,
+                                      const int *exqd, int *bestxqd,
+                                      int *bestep, const int ep) {
   if (*besterr == -1 || err < *besterr) {
     *bestep = ep;
     *besterr = err;
@@ -674,10 +678,11 @@
   return bits;
 }
 
-static void search_sgrproj(const RestorationTileLimits *limits,
-                           const AV1PixelRect *tile, int rest_unit_idx,
-                           void *priv, int32_t *tmpbuf,
-                           RestorationLineBuffers *rlbs) {
+static AOM_INLINE void search_sgrproj(const RestorationTileLimits *limits,
+                                      const AV1PixelRect *tile,
+                                      int rest_unit_idx, void *priv,
+                                      int32_t *tmpbuf,
+                                      RestorationLineBuffers *rlbs) {
   (void)rlbs;
   RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
   RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
@@ -876,8 +881,8 @@
 }
 
 // Fix vector b, update vector a
-static void update_a_sep_sym(int wiener_win, int64_t **Mc, int64_t **Hc,
-                             int32_t *a, int32_t *b) {
+static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
+                                        int64_t **Hc, int32_t *a, int32_t *b) {
   int i, j;
   int32_t S[WIENER_WIN];
   int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
@@ -932,8 +937,8 @@
 }
 
 // Fix vector a, update vector b
-static void update_b_sep_sym(int wiener_win, int64_t **Mc, int64_t **Hc,
-                             int32_t *a, int32_t *b) {
+static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
+                                        int64_t **Hc, int32_t *a, int32_t *b) {
   int i, j;
   int32_t S[WIENER_WIN];
   int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
@@ -1065,7 +1070,8 @@
   return Score - iScore;
 }
 
-static void finalize_sym_filter(int wiener_win, int32_t *f, InterpKernel fi) {
+static AOM_INLINE void finalize_sym_filter(int wiener_win, int32_t *f,
+                                           InterpKernel fi) {
   int i;
   const int wiener_halfwin = (wiener_win >> 1);
 
@@ -1243,10 +1249,11 @@
   return err;
 }
 
-static void search_wiener(const RestorationTileLimits *limits,
-                          const AV1PixelRect *tile_rect, int rest_unit_idx,
-                          void *priv, int32_t *tmpbuf,
-                          RestorationLineBuffers *rlbs) {
+static AOM_INLINE void search_wiener(const RestorationTileLimits *limits,
+                                     const AV1PixelRect *tile_rect,
+                                     int rest_unit_idx, void *priv,
+                                     int32_t *tmpbuf,
+                                     RestorationLineBuffers *rlbs) {
   (void)tmpbuf;
   (void)rlbs;
   RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
@@ -1338,10 +1345,11 @@
   if (cost_wiener < cost_none) rsc->wiener = rusi->wiener;
 }
 
-static void search_norestore(const RestorationTileLimits *limits,
-                             const AV1PixelRect *tile_rect, int rest_unit_idx,
-                             void *priv, int32_t *tmpbuf,
-                             RestorationLineBuffers *rlbs) {
+static AOM_INLINE void search_norestore(const RestorationTileLimits *limits,
+                                        const AV1PixelRect *tile_rect,
+                                        int rest_unit_idx, void *priv,
+                                        int32_t *tmpbuf,
+                                        RestorationLineBuffers *rlbs) {
   (void)tile_rect;
   (void)tmpbuf;
   (void)rlbs;
@@ -1356,10 +1364,11 @@
   rsc->sse += rusi->sse[RESTORE_NONE];
 }
 
-static void search_switchable(const RestorationTileLimits *limits,
-                              const AV1PixelRect *tile_rect, int rest_unit_idx,
-                              void *priv, int32_t *tmpbuf,
-                              RestorationLineBuffers *rlbs) {
+static AOM_INLINE void search_switchable(const RestorationTileLimits *limits,
+                                         const AV1PixelRect *tile_rect,
+                                         int rest_unit_idx, void *priv,
+                                         int32_t *tmpbuf,
+                                         RestorationLineBuffers *rlbs) {
   (void)limits;
   (void)tile_rect;
   (void)tmpbuf;
@@ -1418,9 +1427,9 @@
   if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj;
 }
 
-static void copy_unit_info(RestorationType frame_rtype,
-                           const RestUnitSearchInfo *rusi,
-                           RestorationUnitInfo *rui) {
+static AOM_INLINE void copy_unit_info(RestorationType frame_rtype,
+                                      const RestUnitSearchInfo *rusi,
+                                      RestorationUnitInfo *rui) {
   assert(frame_rtype > 0);
   rui->restoration_type = rusi->best_rtype[frame_rtype - 1];
   if (rui->restoration_type == RESTORE_WIENER)
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b857d0b..4b423d9 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -79,51 +79,51 @@
                                        int64_t sse, int num_samples, int *rate,
                                        int64_t *dist);
 
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
-                            int plane_to, int mi_row, int mi_col,
-                            int *out_rate_sum, int64_t *out_dist_sum,
-                            int *skip_txfm_sb, int64_t *skip_sse_sb,
-                            int *plane_rate, int64_t *plane_sse,
-                            int64_t *plane_dist);
-static void model_rd_for_sb_with_curvfit(
+static AOM_INLINE void model_rd_for_sb(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
     int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_surffit(
+static AOM_INLINE void model_rd_for_sb_with_curvfit(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
     int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_dnn(
+static AOM_INLINE void model_rd_for_sb_with_surffit(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
     int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_for_sb_with_fullrdy(
+static AOM_INLINE void model_rd_for_sb_with_dnn(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
     int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
-static void model_rd_from_sse(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist);
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist);
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist);
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist);
+static AOM_INLINE void model_rd_for_sb_with_fullrdy(
+    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
+    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
+    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
+    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist);
+static AOM_INLINE void model_rd_from_sse(const AV1_COMP *const cpi,
+                                         const MACROBLOCK *const x,
+                                         BLOCK_SIZE plane_bsize, int plane,
+                                         int64_t sse, int num_samples,
+                                         int *rate, int64_t *dist);
+static AOM_INLINE void model_rd_with_dnn(const AV1_COMP *const cpi,
+                                         const MACROBLOCK *const x,
+                                         BLOCK_SIZE plane_bsize, int plane,
+                                         int64_t sse, int num_samples,
+                                         int *rate, int64_t *dist);
+static AOM_INLINE void model_rd_with_curvfit(const AV1_COMP *const cpi,
+                                             const MACROBLOCK *const x,
+                                             BLOCK_SIZE plane_bsize, int plane,
+                                             int64_t sse, int num_samples,
+                                             int *rate, int64_t *dist);
+static AOM_INLINE void model_rd_with_surffit(const AV1_COMP *const cpi,
+                                             const MACROBLOCK *const x,
+                                             BLOCK_SIZE plane_bsize, int plane,
+                                             int64_t sse, int num_samples,
+                                             int *rate, int64_t *dist);
 
 enum {
   MODELRD_LEGACY,
@@ -851,8 +851,9 @@
   }
 }
 
-static void inter_mode_data_push(TileDataEnc *tile_data, BLOCK_SIZE bsize,
-                                 int64_t sse, int64_t dist, int residue_cost) {
+static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
+                                            BLOCK_SIZE bsize, int64_t sse,
+                                            int64_t dist, int residue_cost) {
   if (residue_cost == 0 || sse == dist) return;
   const int block_idx = inter_mode_data_block_idx(bsize);
   if (block_idx == -1) return;
@@ -869,12 +870,10 @@
   }
 }
 
-static void inter_modes_info_push(InterModesInfo *inter_modes_info,
-                                  int mode_rate, int64_t sse, int64_t rd,
-                                  bool true_rd, uint8_t *blk_skip,
-                                  RD_STATS *rd_cost, RD_STATS *rd_cost_y,
-                                  RD_STATS *rd_cost_uv,
-                                  const MB_MODE_INFO *mbmi) {
+static AOM_INLINE void inter_modes_info_push(
+    InterModesInfo *inter_modes_info, int mode_rate, int64_t sse, int64_t rd,
+    bool true_rd, uint8_t *blk_skip, RD_STATS *rd_cost, RD_STATS *rd_cost_y,
+    RD_STATS *rd_cost_uv, const MB_MODE_INFO *mbmi) {
   const int num = inter_modes_info->num;
   assert(num < MAX_INTER_MODES);
   inter_modes_info->mbmi_arr[num] = *mbmi;
@@ -902,8 +901,8 @@
   }
 }
 
-static void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
-                                  RdIdxPair *rd_idx_pair_arr) {
+static AOM_INLINE void inter_modes_info_sort(
+    const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
   if (inter_modes_info->num == 0) {
     return;
   }
@@ -1375,9 +1374,10 @@
 }
 #endif  // CONFIG_DIST_8X8
 
-static void get_energy_distribution_finer(const int16_t *diff, int stride,
-                                          int bw, int bh, float *hordist,
-                                          float *verdist) {
+static AOM_INLINE void get_energy_distribution_finer(const int16_t *diff,
+                                                     int stride, int bw, int bh,
+                                                     float *hordist,
+                                                     float *verdist) {
   // First compute downscaled block energy values (esq); downscale factors
   // are defined by w_shift and h_shift.
   unsigned int esq[256];
@@ -1723,10 +1723,11 @@
   return prune_bitmask;
 }
 
-static void model_rd_from_sse(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist) {
+static AOM_INLINE void model_rd_from_sse(const AV1_COMP *const cpi,
+                                         const MACROBLOCK *const x,
+                                         BLOCK_SIZE plane_bsize, int plane,
+                                         int64_t sse, int num_samples,
+                                         int *rate, int64_t *dist) {
   (void)num_samples;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblock_plane *const p = &x->plane[plane];
@@ -1796,13 +1797,11 @@
   return sse;
 }
 
-static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
-                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
-                            int plane_to, int mi_row, int mi_col,
-                            int *out_rate_sum, int64_t *out_dist_sum,
-                            int *skip_txfm_sb, int64_t *skip_sse_sb,
-                            int *plane_rate, int64_t *plane_sse,
-                            int64_t *plane_dist) {
+static AOM_INLINE void model_rd_for_sb(
+    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
+    int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
+    int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
+    int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
   // we need to divide by 8 before sending to modeling function.
@@ -1890,10 +1889,12 @@
 }
 
 // Get transform block visible dimensions cropped to the MI units.
-static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
-                               BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
-                               BLOCK_SIZE tx_bsize, int *width, int *height,
-                               int *visible_width, int *visible_height) {
+static AOM_INLINE void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
+                                          BLOCK_SIZE plane_bsize, int blk_row,
+                                          int blk_col, BLOCK_SIZE tx_bsize,
+                                          int *width, int *height,
+                                          int *visible_width,
+                                          int *visible_height) {
   assert(tx_bsize <= plane_bsize);
   const int txb_height = block_size_high[tx_bsize];
   const int txb_width = block_size_wide[tx_bsize];
@@ -2036,9 +2037,11 @@
   return n;
 }
 
-static void inverse_transform_block_facade(MACROBLOCKD *xd, int plane,
-                                           int block, int blk_row, int blk_col,
-                                           int eob, int reduced_tx_set) {
+static AOM_INLINE void inverse_transform_block_facade(MACROBLOCKD *xd,
+                                                      int plane, int block,
+                                                      int blk_row, int blk_col,
+                                                      int eob,
+                                                      int reduced_tx_set) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const PLANE_TYPE plane_type = get_plane_type(plane);
@@ -2210,7 +2213,7 @@
   return sum / (w * h);
 }
 
-static void get_2x2_normalized_sses_and_sads(
+static AOM_INLINE void get_2x2_normalized_sses_and_sads(
     const AV1_COMP *const cpi, BLOCK_SIZE tx_bsize, const uint8_t *const src,
     int src_stride, const uint8_t *const dst, int dst_stride,
     const int16_t *const src_diff, int diff_stride, double *const sse_norm_arr,
@@ -2268,11 +2271,10 @@
 // 2: Collect RD stats for partition units
 #if CONFIG_COLLECT_RD_STATS
 
-static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
-                                         const uint8_t *src, int src_stride,
-                                         const uint8_t *dst, int dst_stride,
-                                         int need_4th, double *hordist,
-                                         double *verdist) {
+static AOM_INLINE void get_energy_distribution_fine(
+    const AV1_COMP *cpi, BLOCK_SIZE bsize, const uint8_t *src, int src_stride,
+    const uint8_t *dst, int dst_stride, int need_4th, double *hordist,
+    double *verdist) {
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
@@ -2388,11 +2390,10 @@
   return sum / (w * h);
 }
 
-static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    const RD_STATS *const rd_stats, int blk_row,
-                                    int blk_col, BLOCK_SIZE plane_bsize,
-                                    TX_SIZE tx_size, TX_TYPE tx_type,
-                                    int64_t rd) {
+static AOM_INLINE void PrintTransformUnitStats(
+    const AV1_COMP *const cpi, MACROBLOCK *x, const RD_STATS *const rd_stats,
+    int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+    TX_TYPE tx_type, int64_t rd) {
   if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
 
   // Generate small sample to restrict output size.
@@ -2484,11 +2485,11 @@
 #endif  // CONFIG_COLLECT_RD_STATS == 1
 
 #if CONFIG_COLLECT_RD_STATS >= 2
-static void PrintPredictionUnitStats(const AV1_COMP *const cpi,
-                                     const TileDataEnc *tile_data,
-                                     MACROBLOCK *x,
-                                     const RD_STATS *const rd_stats,
-                                     BLOCK_SIZE plane_bsize) {
+static AOM_INLINE void PrintPredictionUnitStats(const AV1_COMP *const cpi,
+                                                const TileDataEnc *tile_data,
+                                                MACROBLOCK *x,
+                                                const RD_STATS *const rd_stats,
+                                                BLOCK_SIZE plane_bsize) {
   if (rd_stats->invalid_rate) return;
   if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return;
 
@@ -2612,10 +2613,11 @@
 #endif  // CONFIG_COLLECT_RD_STATS >= 2
 #endif  // CONFIG_COLLECT_RD_STATS
 
-static void model_rd_with_dnn(const AV1_COMP *const cpi,
-                              const MACROBLOCK *const x, BLOCK_SIZE plane_bsize,
-                              int plane, int64_t sse, int num_samples,
-                              int *rate, int64_t *dist) {
+static AOM_INLINE void model_rd_with_dnn(const AV1_COMP *const cpi,
+                                         const MACROBLOCK *const x,
+                                         BLOCK_SIZE plane_bsize, int plane,
+                                         int64_t sse, int num_samples,
+                                         int *rate, int64_t *dist) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   const struct macroblock_plane *const p = &x->plane[plane];
@@ -2711,7 +2713,7 @@
   return;
 }
 
-static void model_rd_for_sb_with_dnn(
+static AOM_INLINE void model_rd_for_sb_with_dnn(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
@@ -2763,11 +2765,11 @@
 
 // Fits a surface for rate and distortion using as features:
 // log2(sse_norm + 1) and log2(sse_norm/qstep^2)
-static void model_rd_with_surffit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist) {
+static AOM_INLINE void model_rd_with_surffit(const AV1_COMP *const cpi,
+                                             const MACROBLOCK *const x,
+                                             BLOCK_SIZE plane_bsize, int plane,
+                                             int64_t sse, int num_samples,
+                                             int *rate, int64_t *dist) {
   (void)cpi;
   (void)plane_bsize;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2807,7 +2809,7 @@
   if (dist) *dist = dist_i;
 }
 
-static void model_rd_for_sb_with_surffit(
+static AOM_INLINE void model_rd_for_sb_with_surffit(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
@@ -2860,11 +2862,11 @@
 
 // Fits a curve for rate and distortion using as feature:
 // log2(sse_norm/qstep^2)
-static void model_rd_with_curvfit(const AV1_COMP *const cpi,
-                                  const MACROBLOCK *const x,
-                                  BLOCK_SIZE plane_bsize, int plane,
-                                  int64_t sse, int num_samples, int *rate,
-                                  int64_t *dist) {
+static AOM_INLINE void model_rd_with_curvfit(const AV1_COMP *const cpi,
+                                             const MACROBLOCK *const x,
+                                             BLOCK_SIZE plane_bsize, int plane,
+                                             int64_t sse, int num_samples,
+                                             int *rate, int64_t *dist) {
   (void)cpi;
   (void)plane_bsize;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2904,7 +2906,7 @@
   if (dist) *dist = dist_i;
 }
 
-static void model_rd_for_sb_with_curvfit(
+static AOM_INLINE void model_rd_for_sb_with_curvfit(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
@@ -3390,8 +3392,9 @@
   return best_rd;
 }
 
-static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
-                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+static AOM_INLINE void block_rd_txfm(int plane, int block, int blk_row,
+                                     int blk_col, BLOCK_SIZE plane_bsize,
+                                     TX_SIZE tx_size, void *arg) {
   struct rdcost_block_args *args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -3455,11 +3458,13 @@
   if (args->this_rd > args->best_rd) args->exit_early = 1;
 }
 
-static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
-                             RD_STATS *rd_stats, int64_t ref_best_rd,
-                             int64_t this_rd, int plane, BLOCK_SIZE bsize,
-                             TX_SIZE tx_size, int use_fast_coef_casting,
-                             FAST_TX_SEARCH_MODE ftxs_mode, int skip_trellis) {
+static AOM_INLINE void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
+                                        RD_STATS *rd_stats, int64_t ref_best_rd,
+                                        int64_t this_rd, int plane,
+                                        BLOCK_SIZE bsize, TX_SIZE tx_size,
+                                        int use_fast_coef_casting,
+                                        FAST_TX_SEARCH_MODE ftxs_mode,
+                                        int skip_trellis) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
   struct rdcost_block_args args;
@@ -3606,9 +3611,10 @@
   return rd;
 }
 
-static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                   RD_STATS *rd_stats, int64_t ref_best_rd,
-                                   BLOCK_SIZE bs) {
+static AOM_INLINE void choose_largest_tx_size(const AV1_COMP *const cpi,
+                                              MACROBLOCK *x, RD_STATS *rd_stats,
+                                              int64_t ref_best_rd,
+                                              BLOCK_SIZE bs) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   mbmi->tx_size = tx_size_from_tx_mode(bs, x->tx_mode);
@@ -3626,9 +3632,11 @@
                    cpi->sf.use_fast_coef_costing, FTXS_NONE, 0);
 }
 
-static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                    RD_STATS *rd_stats, int64_t ref_best_rd,
-                                    BLOCK_SIZE bs) {
+static AOM_INLINE void choose_smallest_tx_size(const AV1_COMP *const cpi,
+                                               MACROBLOCK *x,
+                                               RD_STATS *rd_stats,
+                                               int64_t ref_best_rd,
+                                               BLOCK_SIZE bs) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
 
@@ -3663,9 +3671,11 @@
   }
 }
 
-static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
-                                        MACROBLOCK *x, RD_STATS *rd_stats,
-                                        int64_t ref_best_rd, BLOCK_SIZE bs) {
+static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
+                                                   MACROBLOCK *x,
+                                                   RD_STATS *rd_stats,
+                                                   int64_t ref_best_rd,
+                                                   BLOCK_SIZE bs) {
   av1_invalid_rd_stats(rd_stats);
 
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -3815,8 +3825,8 @@
 }
 
 // Used to set proper context for early termination with skip = 1.
-static void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats, int bsize,
-                          int64_t dist) {
+static AOM_INLINE void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats,
+                                     int bsize, int64_t dist) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   const int n4 = bsize_to_num_blk(bsize);
@@ -3863,9 +3873,10 @@
   return (hash << 5) + bsize;
 }
 
-static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
-                            const RD_STATS *const rd_stats,
-                            MB_RD_RECORD *tx_rd_record) {
+static AOM_INLINE void save_tx_rd_info(int n4, uint32_t hash,
+                                       const MACROBLOCK *const x,
+                                       const RD_STATS *const rd_stats,
+                                       MB_RD_RECORD *tx_rd_record) {
   int index;
   if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
     index =
@@ -3888,8 +3899,10 @@
   tx_rd_info->rd_stats = *rd_stats;
 }
 
-static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info,
-                             RD_STATS *const rd_stats, MACROBLOCK *const x) {
+static AOM_INLINE void fetch_tx_rd_info(int n4,
+                                        const MB_RD_INFO *const tx_rd_info,
+                                        RD_STATS *const rd_stats,
+                                        MACROBLOCK *const x) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   mbmi->tx_size = tx_rd_info->tx_size;
@@ -3918,9 +3931,9 @@
   return match_index;
 }
 
-static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            RD_STATS *rd_stats, BLOCK_SIZE bs,
-                            int64_t ref_best_rd) {
+static AOM_INLINE void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                       RD_STATS *rd_stats, BLOCK_SIZE bs,
+                                       int64_t ref_best_rd) {
   MACROBLOCKD *xd = &x->e_mbd;
   av1_init_rd_stats(rd_stats);
   int is_inter = is_inter_block(xd->mi[0]);
@@ -4146,9 +4159,9 @@
 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
 // new_height'. Extra rows and columns are filled in by copying last valid
 // row/column.
-static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
-                                     int orig_height, int new_width,
-                                     int new_height) {
+static AOM_INLINE void extend_palette_color_map(uint8_t *const color_map,
+                                                int orig_width, int orig_height,
+                                                int new_width, int new_height) {
   int j;
   assert(new_width >= orig_width);
   assert(new_height >= orig_height);
@@ -4169,8 +4182,9 @@
 
 // Bias toward using colors in the cache.
 // TODO(huisu): Try other schemes to improve compression.
-static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
-                                    int n_colors, int stride, int *centroids) {
+static AOM_INLINE void optimize_palette_colors(uint16_t *color_cache,
+                                               int n_cache, int n_colors,
+                                               int stride, int *centroids) {
   if (n_cache <= 0) return;
   for (int i = 0; i < n_colors * stride; i += stride) {
     int min_diff = abs(centroids[i] - (int)color_cache[0]);
@@ -4188,16 +4202,14 @@
 
 // Given the base colors as specified in centroids[], calculate the RD cost
 // of palette mode.
-static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
-                         MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int mi_row,
-                         int mi_col, int dc_mode_cost, const int *data,
-                         int *centroids, int n, uint16_t *color_cache,
-                         int n_cache, MB_MODE_INFO *best_mbmi,
-                         uint8_t *best_palette_color_map, int64_t *best_rd,
-                         int64_t *best_model_rd, int *rate, int *rate_tokenonly,
-                         int *rate_overhead, int64_t *distortion,
-                         int *skippable, PICK_MODE_CONTEXT *ctx,
-                         uint8_t *blk_skip) {
+static AOM_INLINE void palette_rd_y(
+    const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
+    BLOCK_SIZE bsize, int mi_row, int mi_col, int dc_mode_cost, const int *data,
+    int *centroids, int n, uint16_t *color_cache, int n_cache,
+    MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
+    int64_t *best_model_rd, int *rate, int *rate_tokenonly, int *rate_overhead,
+    int64_t *distortion, int *skippable, PICK_MODE_CONTEXT *ctx,
+    uint8_t *blk_skip) {
   optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
   int k = av1_remove_duplicates(centroids, n);
   if (k < PALETTE_MIN_SIZE) {
@@ -4585,8 +4597,8 @@
 };
 /* clang-format on */
 
-static void get_gradient_hist(const uint8_t *src, int src_stride, int rows,
-                              int cols, uint64_t *hist) {
+static AOM_INLINE void get_gradient_hist(const uint8_t *src, int src_stride,
+                                         int rows, int cols, uint64_t *hist) {
   src += src_stride;
   for (int r = 1; r < rows; ++r) {
     for (int c = 1; c < cols; ++c) {
@@ -4610,8 +4622,9 @@
   }
 }
 
-static void get_highbd_gradient_hist(const uint8_t *src8, int src_stride,
-                                     int rows, int cols, uint64_t *hist) {
+static AOM_INLINE void get_highbd_gradient_hist(const uint8_t *src8,
+                                                int src_stride, int rows,
+                                                int cols, uint64_t *hist) {
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
   src += src_stride;
   for (int r = 1; r < rows; ++r) {
@@ -4636,9 +4649,10 @@
   }
 }
 
-static void angle_estimation(const uint8_t *src, int src_stride, int rows,
-                             int cols, BLOCK_SIZE bsize, int is_hbd,
-                             uint8_t *directional_mode_skip_mask) {
+static AOM_INLINE void angle_estimation(const uint8_t *src, int src_stride,
+                                        int rows, int cols, BLOCK_SIZE bsize,
+                                        int is_hbd,
+                                        uint8_t *directional_mode_skip_mask) {
   // Check if angle_delta is used
   if (!av1_use_angle_delta(bsize)) return;
 
@@ -4671,11 +4685,12 @@
 }
 
 // Given selected prediction mode, search for the best tx type and size.
-static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
-                            BLOCK_SIZE bsize, const int *bmode_costs,
-                            int64_t *best_rd, int *rate, int *rate_tokenonly,
-                            int64_t *distortion, int *skippable,
-                            MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) {
+static AOM_INLINE void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                       BLOCK_SIZE bsize, const int *bmode_costs,
+                                       int64_t *best_rd, int *rate,
+                                       int *rate_tokenonly, int64_t *distortion,
+                                       int *skippable, MB_MODE_INFO *best_mbmi,
+                                       PICK_MODE_CONTEXT *ctx) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   RD_STATS rd_stats;
@@ -4903,11 +4918,13 @@
 }
 
 // Pick transform type for a transform block of tx_size.
-static void tx_type_rd(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
-                       int blk_row, int blk_col, int plane, int block,
-                       int plane_bsize, TXB_CTX *txb_ctx, RD_STATS *rd_stats,
-                       FAST_TX_SEARCH_MODE ftxs_mode, int64_t ref_rdcost,
-                       TXB_RD_INFO *rd_info_array) {
+static AOM_INLINE void tx_type_rd(const AV1_COMP *cpi, MACROBLOCK *x,
+                                  TX_SIZE tx_size, int blk_row, int blk_col,
+                                  int plane, int block, int plane_bsize,
+                                  TXB_CTX *txb_ctx, RD_STATS *rd_stats,
+                                  FAST_TX_SEARCH_MODE ftxs_mode,
+                                  int64_t ref_rdcost,
+                                  TXB_RD_INFO *rd_info_array) {
   const struct macroblock_plane *const p = &x->plane[plane];
   const uint16_t cur_joint_ctx =
       (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
@@ -4963,8 +4980,8 @@
 
 // Feature used by the model to predict tx split: the mean and standard
 // deviation values of the block and sub-blocks.
-static void get_mean_dev_features(const int16_t *data, int stride, int bw,
-                                  int bh, float *feature) {
+static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
+                                             int bw, int bh, float *feature) {
   const int16_t *const data_ptr = &data[0];
   const int subh = (bh >= bw) ? (bh >> 1) : bh;
   const int subw = (bw >= bh) ? (bw >> 1) : bw;
@@ -5038,7 +5055,7 @@
   TX_TYPE tx_type;
 } TxCandidateInfo;
 
-static void try_tx_block_no_split(
+static AOM_INLINE void try_tx_block_no_split(
     const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
     TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
     const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl,
@@ -5100,16 +5117,15 @@
   no_split->tx_type = mbmi->txk_type[txk_type_idx];
 }
 
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                            int blk_col, int block, TX_SIZE tx_size, int depth,
-                            BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
-                            ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
-                            TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
-                            int64_t prev_level_rd, int64_t ref_best_rd,
-                            int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
-                            TXB_RD_INFO_NODE *rd_info_node);
+static AOM_INLINE void select_tx_block(
+    const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
+    TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
+    ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
+    RD_STATS *rd_stats, int64_t prev_level_rd, int64_t ref_best_rd,
+    int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
+    TXB_RD_INFO_NODE *rd_info_node);
 
-static void try_tx_block_split(
+static AOM_INLINE void try_tx_block_split(
     const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
     TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
     ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
@@ -5158,14 +5174,13 @@
 }
 
 // Search for the best tx partition/type for a given luma block.
-static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                            int blk_col, int block, TX_SIZE tx_size, int depth,
-                            BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
-                            ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
-                            TXFM_CONTEXT *tx_left, RD_STATS *rd_stats,
-                            int64_t prev_level_rd, int64_t ref_best_rd,
-                            int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
-                            TXB_RD_INFO_NODE *rd_info_node) {
+static AOM_INLINE void select_tx_block(
+    const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
+    TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
+    ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
+    RD_STATS *rd_stats, int64_t prev_level_rd, int64_t ref_best_rd,
+    int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
+    TXB_RD_INFO_NODE *rd_info_node) {
   assert(tx_size < TX_SIZES_ALL);
   av1_init_rd_stats(rd_stats);
   if (ref_best_rd < 0) {
@@ -5368,13 +5383,12 @@
 }
 
 // Finds rd cost for a y block, given the transform size partitions
-static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
-                         int blk_col, int block, TX_SIZE tx_size,
-                         BLOCK_SIZE plane_bsize, int depth,
-                         ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
-                         TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
-                         int64_t ref_best_rd, RD_STATS *rd_stats,
-                         FAST_TX_SEARCH_MODE ftxs_mode) {
+static AOM_INLINE void tx_block_yrd(
+    const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block,
+    TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int depth,
+    ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
+    TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, int64_t ref_best_rd,
+    RD_STATS *rd_stats, FAST_TX_SEARCH_MODE ftxs_mode) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
@@ -5786,9 +5800,10 @@
 }
 
 // Search for best transform size and type for luma inter blocks.
-static void pick_tx_size_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
-                                  RD_STATS *rd_stats, BLOCK_SIZE bsize,
-                                  int mi_row, int mi_col, int64_t ref_best_rd) {
+static AOM_INLINE void pick_tx_size_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
+                                             RD_STATS *rd_stats,
+                                             BLOCK_SIZE bsize, int mi_row,
+                                             int mi_col, int64_t ref_best_rd) {
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   assert(is_inter_block(xd->mi[0]));
@@ -5886,7 +5901,7 @@
   }
 }
 
-static void model_rd_for_sb_with_fullrdy(
+static AOM_INLINE void model_rd_for_sb_with_fullrdy(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum,
     int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb,
@@ -5947,13 +5962,11 @@
   *out_dist_sum = dist_sum;
 }
 
-static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                       int dc_mode_cost,
-                                       uint8_t *best_palette_color_map,
-                                       MB_MODE_INFO *const best_mbmi,
-                                       int64_t *best_rd, int *rate,
-                                       int *rate_tokenonly, int64_t *distortion,
-                                       int *skippable) {
+static AOM_INLINE void rd_pick_palette_intra_sbuv(
+    const AV1_COMP *const cpi, MACROBLOCK *x, int dc_mode_cost,
+    uint8_t *best_palette_color_map, MB_MODE_INFO *const best_mbmi,
+    int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
+    int *skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   assert(!is_inter_block(mbmi));
@@ -6309,7 +6322,7 @@
   return best_rate_overhead;
 }
 
-static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
+static AOM_INLINE void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
   mbmi->uv_mode = UV_DC_PRED;
   mbmi->palette_mode_info.palette_size[1] = 0;
 }
@@ -6402,11 +6415,10 @@
   return best_rd;
 }
 
-static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
-                                 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
-                                 int *rate_uv, int *rate_uv_tokenonly,
-                                 int64_t *dist_uv, int *skip_uv,
-                                 UV_PREDICTION_MODE *mode_uv) {
+static AOM_INLINE void choose_intra_uv_mode(
+    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
+    TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv,
+    int *skip_uv, UV_PREDICTION_MODE *mode_uv) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
@@ -6509,11 +6521,12 @@
   return single_mode;
 }
 
-static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
-                                BLOCK_SIZE bsize, int_mv *cur_mv, int mi_row,
-                                int mi_col, int_mv *ref_mv_sub8x8[2],
-                                const uint8_t *mask, int mask_stride,
-                                int *rate_mv, const int block) {
+static AOM_INLINE void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
+                                           BLOCK_SIZE bsize, int_mv *cur_mv,
+                                           int mi_row, int mi_col,
+                                           int_mv *ref_mv_sub8x8[2],
+                                           const uint8_t *mask, int mask_stride,
+                                           int *rate_mv, const int block) {
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   const int pw = block_size_wide[bsize];
@@ -6705,7 +6718,7 @@
   }
 }
 
-static void estimate_ref_frame_costs(
+static AOM_INLINE void estimate_ref_frame_costs(
     const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
     int segment_id, unsigned int *ref_costs_single,
     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
@@ -6855,10 +6868,9 @@
   }
 }
 
-static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
-                                 int mode_index,
-                                 int64_t comp_pred_diff[REFERENCE_MODES],
-                                 int skippable) {
+static AOM_INLINE void store_coding_context(
+    MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
+    int64_t comp_pred_diff[REFERENCE_MODES], int skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
   // Take a snapshot of the coding context so it can be
@@ -6873,7 +6885,7 @@
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
 }
 
-static void setup_buffer_ref_mvs_inter(
+static AOM_INLINE void setup_buffer_ref_mvs_inter(
     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
     BLOCK_SIZE block_size, int mi_row, int mi_col,
     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
@@ -6921,9 +6933,10 @@
   }
 }
 
-static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                 int ref_idx, int *rate_mv) {
+static AOM_INLINE void single_motion_search(const AV1_COMP *const cpi,
+                                            MACROBLOCK *x, BLOCK_SIZE bsize,
+                                            int mi_row, int mi_col, int ref_idx,
+                                            int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
@@ -7130,10 +7143,12 @@
   }
 }
 
-static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
-                                    BLOCK_SIZE bsize, const MV *other_mv,
-                                    int mi_row, int mi_col, const int block,
-                                    int ref_idx, uint8_t *second_pred) {
+static AOM_INLINE void build_second_inter_pred(const AV1_COMP *cpi,
+                                               MACROBLOCK *x, BLOCK_SIZE bsize,
+                                               const MV *other_mv, int mi_row,
+                                               int mi_col, const int block,
+                                               int ref_idx,
+                                               uint8_t *second_pred) {
   const AV1_COMMON *const cm = &cpi->common;
   const int pw = block_size_wide[bsize];
   const int ph = block_size_high[bsize];
@@ -7178,12 +7193,10 @@
 
 // Search for the best mv for one component of a compound,
 // given that the other component is fixed.
-static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
-                                          BLOCK_SIZE bsize, MV *this_mv,
-                                          int mi_row, int mi_col,
-                                          const uint8_t *second_pred,
-                                          const uint8_t *mask, int mask_stride,
-                                          int *rate_mv, int ref_idx) {
+static AOM_INLINE void compound_single_motion_search(
+    const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
+    int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
+    int mask_stride, int *rate_mv, int ref_idx) {
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   const int pw = block_size_wide[bsize];
@@ -7288,7 +7301,7 @@
 
 // Wrapper for compound_single_motion_search, for the common case
 // where the second prediction is also an inter mode.
-static void compound_single_motion_search_interinter(
+static AOM_INLINE void compound_single_motion_search_interinter(
     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *cur_mv,
     int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
     const int block, int ref_idx) {
@@ -7315,7 +7328,7 @@
                                 ref_idx);
 }
 
-static void do_masked_motion_search_indexed(
+static AOM_INLINE void do_masked_motion_search_indexed(
     const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
     int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
@@ -7679,7 +7692,7 @@
   return tmp_rate_mv;
 }
 
-static void get_inter_predictors_masked_compound(
+static AOM_INLINE void get_inter_predictors_masked_compound(
     const AV1_COMP *const cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
     int mi_row, int mi_col, uint8_t **preds0, uint8_t **preds1,
     int16_t *residual1, int16_t *diff10, int *strides) {
@@ -10883,7 +10896,8 @@
   ctx->mbmi_ext = *x->mbmi_ext;
 }
 
-static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
+static AOM_INLINE void restore_uv_color_map(const AV1_COMP *const cpi,
+                                            MACROBLOCK *x) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -10925,17 +10939,15 @@
                            plane_block_height);
 }
 
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
-                                      const MACROBLOCKD *xd, int mi_row,
-                                      int mi_col, const uint8_t *above,
-                                      int above_stride, const uint8_t *left,
-                                      int left_stride);
+static AOM_INLINE void calc_target_weighted_pred(
+    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
+    int mi_row, int mi_col, const uint8_t *above, int above_stride,
+    const uint8_t *left, int left_stride);
 
-static void rd_pick_skip_mode(RD_STATS *rd_cost,
-                              InterModeSearchState *search_state,
-                              const AV1_COMP *const cpi, MACROBLOCK *const x,
-                              BLOCK_SIZE bsize, int mi_row, int mi_col,
-                              struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
+static AOM_INLINE void rd_pick_skip_mode(
+    RD_STATS *rd_cost, InterModeSearchState *search_state,
+    const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
+    int mi_row, int mi_col, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
   const AV1_COMMON *const cm = &cpi->common;
   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
   const int num_planes = av1_num_planes(cm);
@@ -11089,7 +11101,7 @@
 // When this speed feature is on, in rd mode search, only DCT is used.
 // After the mode is determined, this function is called, to select
 // transform types and get accurate rdcost.
-static void refine_winner_mode_tx(
+static AOM_INLINE void refine_winner_mode_tx(
     const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
     int best_mode_index, MB_MODE_INFO *best_mbmode,
@@ -11191,15 +11203,15 @@
 } mode_skip_mask_t;
 
 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
-static void disable_reference(MV_REFERENCE_FRAME ref,
-                              bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
+static AOM_INLINE void disable_reference(
+    MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
     ref_combo[ref][ref2 + 1] = true;
   }
 }
 
 // Update 'ref_combo' mask to disable all inter references except ALTREF.
-static void disable_inter_references_except_altref(
+static AOM_INLINE void disable_inter_references_except_altref(
     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
   disable_reference(LAST_FRAME, ref_combo);
   disable_reference(LAST2_FRAME, ref_combo);
@@ -11229,7 +11241,8 @@
 
 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
 
-static void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
+static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
+                                         REF_SET ref_set) {
   if (ref_set == REF_SET_FULL) {
     // Everything available by default.
     memset(mask, 0, sizeof(*mask));
@@ -11267,8 +11280,9 @@
   }
 }
 
-static void init_mode_skip_mask(mode_skip_mask_t *mask, const AV1_COMP *cpi,
-                                MACROBLOCK *x, BLOCK_SIZE bsize) {
+static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
+                                           const AV1_COMP *cpi, MACROBLOCK *x,
+                                           BLOCK_SIZE bsize) {
   const AV1_COMMON *const cm = &cpi->common;
   const struct segmentation *const seg = &cm->seg;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -11370,7 +11384,7 @@
 
 // Please add/modify parameter setting in this function, making it consistent
 // and easy to read and maintain.
-static void set_params_rd_pick_inter_mode(
+static AOM_INLINE void set_params_rd_pick_inter_mode(
     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
     BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask,
     int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES],
@@ -11507,13 +11521,11 @@
   x->comp_rd_stats_idx = 0;
 }
 
-static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                                int mi_col, RD_STATS *rd_cost,
-                                PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
-                                MB_MODE_INFO *const mbmi,
-                                PALETTE_MODE_INFO *const pmi,
-                                unsigned int *ref_costs_single,
-                                InterModeSearchState *search_state) {
+static AOM_INLINE void search_palette_mode(
+    const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
+    RD_STATS *rd_cost, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
+    MB_MODE_INFO *const mbmi, PALETTE_MODE_INFO *const pmi,
+    unsigned int *ref_costs_single, InterModeSearchState *search_state) {
   const AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -11600,10 +11612,9 @@
   }
 }
 
-static void init_inter_mode_search_state(InterModeSearchState *search_state,
-                                         const AV1_COMP *cpi,
-                                         const MACROBLOCK *x, BLOCK_SIZE bsize,
-                                         int64_t best_rd_so_far) {
+static AOM_INLINE void init_inter_mode_search_state(
+    InterModeSearchState *search_state, const AV1_COMP *cpi,
+    const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
   search_state->best_rd = best_rd_so_far;
 
   av1_zero(search_state->best_mbmode);
@@ -12077,9 +12088,9 @@
   return this_rd;
 }
 
-static void collect_single_states(MACROBLOCK *x,
-                                  InterModeSearchState *search_state,
-                                  const MB_MODE_INFO *const mbmi) {
+static AOM_INLINE void collect_single_states(MACROBLOCK *x,
+                                             InterModeSearchState *search_state,
+                                             const MB_MODE_INFO *const mbmi) {
   int i, j;
   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
   const PREDICTION_MODE this_mode = mbmi->mode;
@@ -12123,8 +12134,8 @@
   search_state->single_state_modelled_cnt[dir][mode_offset]++;
 }
 
-static void analyze_single_states(const AV1_COMP *cpi,
-                                  InterModeSearchState *search_state) {
+static AOM_INLINE void analyze_single_states(
+    const AV1_COMP *cpi, InterModeSearchState *search_state) {
   const int prune_level = cpi->sf.prune_comp_search_by_single_result;
   assert(prune_level >= 1);
   int i, j, dir, mode;
@@ -12415,7 +12426,7 @@
 
 // Find the best RD for a reference frame (among single reference modes)
 // and store +10% of it in the 0-th element in ref_frame_rd.
-static void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
+static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
   assert(ref_frame_rd[0] == INT64_MAX);
   int64_t ref_copy[REF_FRAMES - 1];
   memcpy(ref_copy, ref_frame_rd + 1,
@@ -12975,7 +12986,7 @@
 // (except that doesn't set ALTREF parameters)
 //               consider passing a flag to select non-rd path (similar to
 //               encode_sb_row)
-static void set_params_nonrd_pick_inter_mode(
+static AOM_INLINE void set_params_nonrd_pick_inter_mode(
     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
     BLOCK_SIZE bsize, int mi_row, int mi_col, mode_skip_mask_t *mode_skip_mask,
     int skip_ref_frame_mask, unsigned int ref_costs_single[REF_FRAMES],
@@ -13800,11 +13811,10 @@
 //  error(x, y) =
 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
 //
-static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
-                                      const MACROBLOCKD *xd, int mi_row,
-                                      int mi_col, const uint8_t *above,
-                                      int above_stride, const uint8_t *left,
-                                      int left_stride) {
+static AOM_INLINE void calc_target_weighted_pred(
+    const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
+    int mi_row, int mi_col, const uint8_t *above, int above_stride,
+    const uint8_t *left, int left_stride) {
   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
   const int bw = xd->n4_w << MI_SIZE_LOG2;
   const int bh = xd->n4_h << MI_SIZE_LOG2;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index cd3677c..7b6d369 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -42,10 +42,10 @@
   }
 }
 
-static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
-                               tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                               TX_SIZE tx_size, int64_t *recon_error,
-                               int64_t *sse) {
+static AOM_INLINE void get_quantize_error(MACROBLOCK *x, int plane,
+                                          tran_low_t *coeff, tran_low_t *qcoeff,
+                                          tran_low_t *dqcoeff, TX_SIZE tx_size,
+                                          int64_t *recon_error, int64_t *sse) {
   const struct macroblock_plane *const p = &x->plane[plane];
   const SCAN_ORDER *const scan_order = &av1_default_scan_orders[tx_size];
   uint16_t eob;
@@ -63,8 +63,9 @@
   *sse = AOMMAX(*sse, 1);
 }
 
-static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
-                         TX_SIZE tx_size, int is_hbd) {
+static AOM_INLINE void wht_fwd_txfm(int16_t *src_diff, int bw,
+                                    tran_low_t *coeff, TX_SIZE tx_size,
+                                    int is_hbd) {
   if (is_hbd) {
     switch (tx_size) {
       case TX_8X8: aom_highbd_hadamard_8x8(src_diff, bw, coeff); break;
@@ -137,7 +138,7 @@
   return bestsme;
 }
 
-static void mode_estimation(
+static AOM_INLINE void mode_estimation(
     AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, struct scale_factors *sf,
     int frame_idx, int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff,
     tran_low_t *dqcoeff, int use_satd, int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -383,11 +384,12 @@
   return (mi_row >> right_shift) * stride + (mi_col >> right_shift);
 }
 
-static void tpl_model_update_b(AV1_COMP *cpi, TplDepFrame *tpl_frame,
-                               TplDepStats *tpl_stats_ptr, int mi_row,
-                               int mi_col, double quant_ratio,
-                               const BLOCK_SIZE bsize, int ref_frame_index,
-                               int_mv mv) {
+static AOM_INLINE void tpl_model_update_b(AV1_COMP *cpi, TplDepFrame *tpl_frame,
+                                          TplDepStats *tpl_stats_ptr,
+                                          int mi_row, int mi_col,
+                                          double quant_ratio,
+                                          const BLOCK_SIZE bsize,
+                                          int ref_frame_index, int_mv mv) {
   TplDepFrame *ref_tpl_frame = &tpl_frame[ref_frame_index];
   TplDepStats *ref_stats_ptr = ref_tpl_frame->tpl_stats_ptr;
 
@@ -440,10 +442,11 @@
   }
 }
 
-static void tpl_model_update(AV1_COMP *cpi, TplDepFrame *tpl_frame,
-                             TplDepStats *tpl_stats_ptr, int mi_row, int mi_col,
-                             double quant_ratio, const BLOCK_SIZE bsize,
-                             int ref_frame_index, int_mv mv) {
+static AOM_INLINE void tpl_model_update(AV1_COMP *cpi, TplDepFrame *tpl_frame,
+                                        TplDepStats *tpl_stats_ptr, int mi_row,
+                                        int mi_col, double quant_ratio,
+                                        const BLOCK_SIZE bsize,
+                                        int ref_frame_index, int_mv mv) {
   const int mi_height = mi_size_high[bsize];
   const int mi_width = mi_size_wide[bsize];
   const int step = 1 << cpi->tpl_stats_block_mis_log2;
@@ -460,9 +463,10 @@
   }
 }
 
-static void tpl_model_store(AV1_COMP *cpi, TplDepStats *tpl_stats_ptr,
-                            int mi_row, int mi_col, BLOCK_SIZE bsize,
-                            int stride, const TplDepStats *src_stats) {
+static AOM_INLINE void tpl_model_store(AV1_COMP *cpi,
+                                       TplDepStats *tpl_stats_ptr, int mi_row,
+                                       int mi_col, BLOCK_SIZE bsize, int stride,
+                                       const TplDepStats *src_stats) {
   const int mi_height = mi_size_high[bsize];
   const int mi_width = mi_size_wide[bsize];
   const int step = 1 << cpi->tpl_stats_block_mis_log2;
@@ -503,7 +507,7 @@
   }
 }
 
-static void mc_flow_dispenser(AV1_COMP *cpi, int frame_idx) {
+static AOM_INLINE void mc_flow_dispenser(AV1_COMP *cpi, int frame_idx) {
   const GF_GROUP *gf_group = &cpi->gf_group;
   if (frame_idx == gf_group->size) return;
   TplDepFrame *tpl_frame = &cpi->tpl_frame[frame_idx];
@@ -609,7 +613,7 @@
   xd->mi = backup_mi_grid;
 }
 
-static void init_gop_frames_for_tpl(
+static AOM_INLINE void init_gop_frames_for_tpl(
     AV1_COMP *cpi, const EncodeFrameParams *const init_frame_params,
     GF_GROUP *gf_group, int *tpl_group_frames,
     const EncodeFrameInput *const frame_input) {
@@ -743,7 +747,7 @@
   av1_get_ref_frames(cpi, &cpi->ref_buffer_stack);
 }
 
-static void init_tpl_stats(AV1_COMP *cpi) {
+static AOM_INLINE void init_tpl_stats(AV1_COMP *cpi) {
   for (int frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) {
     TplDepFrame *tpl_frame = &cpi->tpl_stats_buffer[frame_idx];
     memset(tpl_frame->tpl_stats_ptr, 0,
@@ -798,11 +802,12 @@
   cm->current_frame.frame_type = frame_params->frame_type;
 }
 
-static void get_tpl_forward_stats(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
-                                  BLOCK_SIZE bsize, int use_satd,
-                                  YV12_BUFFER_CONFIG *ref,
-                                  YV12_BUFFER_CONFIG *src,
-                                  TplDepFrame *ref_tpl_frame) {
+static AOM_INLINE void get_tpl_forward_stats(AV1_COMP *cpi, MACROBLOCK *x,
+                                             MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                             int use_satd,
+                                             YV12_BUFFER_CONFIG *ref,
+                                             YV12_BUFFER_CONFIG *src,
+                                             TplDepFrame *ref_tpl_frame) {
 // TODO(yuec) Consider deleting forward tpl model completely
 #if !USE_TPL_CLASSIC_MODEL
   AV1_COMMON *cm = &cpi->common;
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index 6ee7c16..1cd508b 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -87,7 +87,8 @@
   var *split[4];
 } variance_node;
 
-static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
+static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize,
+                                    variance_node *node) {
   int i;
   node->part_variances = NULL;
   switch (bsize) {
@@ -137,13 +138,13 @@
 }
 
 // Set variance values given sum square error, sum error, count.
-static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
+static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
   v->sum_square_error = s2;
   v->sum_error = s;
   v->log2_count = c;
 }
 
-static void get_variance(var *v) {
+static AOM_INLINE void get_variance(var *v) {
   v->variance =
       (int)(256 * (v->sum_square_error -
                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
@@ -151,13 +152,13 @@
             v->log2_count);
 }
 
-static void sum_2_variances(const var *a, const var *b, var *r) {
+static AOM_INLINE void sum_2_variances(const var *a, const var *b, var *r) {
   assert(a->log2_count == b->log2_count);
   fill_variance(a->sum_square_error + b->sum_square_error,
                 a->sum_error + b->sum_error, a->log2_count + 1, r);
 }
 
-static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
+static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
   variance_node node;
   memset(&node, 0, sizeof(node));
   tree_to_node(data, bsize, &node);
@@ -169,9 +170,9 @@
                   &node.part_variances->none);
 }
 
-static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
-                           MACROBLOCKD *const xd, int mi_row, int mi_col,
-                           BLOCK_SIZE bsize) {
+static AOM_INLINE void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
+                                      MACROBLOCKD *const xd, int mi_row,
+                                      int mi_col, BLOCK_SIZE bsize) {
   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
     set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
     xd->mi[0]->sb_type = bsize;
@@ -258,10 +259,11 @@
   return 0;
 }
 
-static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
-                                 int dp, int x16_idx, int y16_idx, v16x16 *vst,
-                                 int pixels_wide, int pixels_high,
-                                 int is_key_frame) {
+static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
+                                            const uint8_t *d, int dp,
+                                            int x16_idx, int y16_idx,
+                                            v16x16 *vst, int pixels_wide,
+                                            int pixels_high, int is_key_frame) {
   int k;
   for (k = 0; k < 4; k++) {
     int x8_idx = x16_idx + ((k & 1) << 3);
@@ -303,10 +305,11 @@
   return (minmax_max - minmax_min);
 }
 
-static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
-                                 int dp, int x8_idx, int y8_idx, v8x8 *vst,
-                                 int pixels_wide, int pixels_high,
-                                 int is_key_frame) {
+static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
+                                            const uint8_t *d, int dp,
+                                            int x8_idx, int y8_idx, v8x8 *vst,
+                                            int pixels_wide, int pixels_high,
+                                            int is_key_frame) {
   int k;
   for (k = 0; k < 4; k++) {
     int x4_idx = x8_idx + ((k & 1) << 2);
@@ -342,8 +345,8 @@
 // 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32,
 // 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is
 // currently only used on key frame.
-static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q,
-                               int content_state) {
+static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
+                                          int q, int content_state) {
   AV1_COMMON *const cm = &cpi->common;
   const int is_key_frame = frame_is_intra_only(cm);
   const int threshold_multiplier = is_key_frame ? 40 : 1;
@@ -380,10 +383,10 @@
   }
 }
 
-static void set_low_temp_var_flag(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
-                                  v128x128 *vt, int64_t thresholds[],
-                                  MV_REFERENCE_FRAME ref_frame_partition,
-                                  int mi_col, int mi_row) {
+static AOM_INLINE void set_low_temp_var_flag(
+    AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, v128x128 *vt,
+    int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, int mi_col,
+    int mi_row) {
   int i, j, k;
   AV1_COMMON *const cm = &cpi->common;
   const int mv_thr = cm->width > 640 ? 8 : 4;
@@ -496,8 +499,9 @@
   }
 }
 
-static void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                         unsigned int y_sad, int is_key_frame) {
+static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
+                                    BLOCK_SIZE bsize, unsigned int y_sad,
+                                    int is_key_frame) {
   int i;
   MACROBLOCKD *xd = &x->e_mbd;