Make parameter names consistent

BUG=aomedia:2228

Change-Id: If701f16aec272e1df43174fb39d34c5a0f69babb
diff --git a/aom/src/aom_encoder.c b/aom/src/aom_encoder.c
index 01917c9..7270797 100644
--- a/aom/src/aom_encoder.c
+++ b/aom/src/aom_encoder.c
@@ -144,12 +144,12 @@
 
 aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface,
                                              aom_codec_enc_cfg_t *cfg,
-                                             unsigned int usage) {
+                                             unsigned int reserved) {
   aom_codec_err_t res;
   aom_codec_enc_cfg_map_t *map;
   int i;
 
-  if (!iface || !cfg || usage > INT_MAX)
+  if (!iface || !cfg || reserved > INT_MAX)
     res = AOM_CODEC_INVALID_PARAM;
   else if (!(iface->caps & AOM_CODEC_CAP_ENCODER))
     res = AOM_CODEC_INCAPABLE;
@@ -158,9 +158,9 @@
 
     for (i = 0; i < iface->enc.cfg_map_count; ++i) {
       map = iface->enc.cfg_maps + i;
-      if (map->usage == (int)usage) {
+      if (map->usage == (int)reserved) {
         *cfg = map->cfg;
-        cfg->g_usage = usage;
+        cfg->g_usage = reserved;
         res = AOM_CODEC_OK;
         break;
       }
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index aa90ab7..b654a9a 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -350,7 +350,7 @@
 #
 # Sub Pixel Filters
 #
-add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, const int16_t *filter_y, int filter_y_stride, int w, int h";
 add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 
@@ -358,7 +358,7 @@
 specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
 specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
 
-add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
+add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, const int16_t *filter_y, int filter_y_stride, int w, int h, int bd";
 specialize qw/aom_highbd_convolve_copy sse2 avx2/;
 
 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
@@ -445,7 +445,7 @@
 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
 specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
 
-add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limt1, const uint8_t *thresh1,int bd";
+add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
 specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
 
 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
@@ -548,19 +548,19 @@
 #
 # Alpha blending with mask
 #
-add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params";
+add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params";
 specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
-add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby";
+add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh";
 add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
 add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
 specialize "aom_blend_a64_mask", qw/sse4_1 avx2/;
 specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
 specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
 
-add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd";
+add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd";
 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
-add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params, const int bd";
+add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd";
 specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
diff --git a/aom_dsp/loopfilter.c b/aom_dsp/loopfilter.c
index a3f2618..40a02cc 100644
--- a/aom_dsp/loopfilter.c
+++ b/aom_dsp/loopfilter.c
@@ -865,10 +865,10 @@
   }
 }
 
-void aom_highbd_lpf_horizontal_14_c(uint16_t *s, int p, const uint8_t *blimit,
-                                    const uint8_t *limit, const uint8_t *thresh,
-                                    int bd) {
-  highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
+void aom_highbd_lpf_horizontal_14_c(uint16_t *s, int pitch,
+                                    const uint8_t *blimit, const uint8_t *limit,
+                                    const uint8_t *thresh, int bd) {
+  highbd_mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 1, bd);
 }
 
 void aom_highbd_lpf_horizontal_14_dual_c(
diff --git a/aom_dsp/x86/blend_a64_mask_avx2.c b/aom_dsp/x86/blend_a64_mask_avx2.c
index 057f615..f38c43f 100644
--- a/aom_dsp/x86/blend_a64_mask_avx2.c
+++ b/aom_dsp/x86/blend_a64_mask_avx2.c
@@ -870,7 +870,7 @@
                              const uint8_t *src0, uint32_t src0_stride,
                              const uint8_t *src1, uint32_t src1_stride,
                              const uint8_t *mask, uint32_t mask_stride, int w,
-                             int h, int subx, int suby) {
+                             int h, int subw, int subh) {
   assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
   assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
 
@@ -881,15 +881,15 @@
 
   if (UNLIKELY((h | w) & 3)) {  // if (w <= 2 || h <= 2)
     aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                         mask, mask_stride, w, h, subx, suby);
+                         mask, mask_stride, w, h, subw, subh);
   } else {
-    if (subx & suby) {
+    if (subw & subh) {
       blend_a64_mask_sx_sy_avx2(dst, dst_stride, src0, src0_stride, src1,
                                 src1_stride, mask, mask_stride, w, h);
-    } else if (subx) {
+    } else if (subw) {
       blend_a64_mask_sx_avx2(dst, dst_stride, src0, src0_stride, src1,
                              src1_stride, mask, mask_stride, w, h);
-    } else if (suby) {
+    } else if (subh) {
       blend_a64_mask_sy_avx2(dst, dst_stride, src0, src0_stride, src1,
                              src1_stride, mask, mask_stride, w, h);
     } else {
diff --git a/aom_dsp/x86/blend_a64_mask_sse4.c b/aom_dsp/x86/blend_a64_mask_sse4.c
index b7a2468..22c304e 100644
--- a/aom_dsp/x86/blend_a64_mask_sse4.c
+++ b/aom_dsp/x86/blend_a64_mask_sse4.c
@@ -386,7 +386,7 @@
                                const uint8_t *src0, uint32_t src0_stride,
                                const uint8_t *src1, uint32_t src1_stride,
                                const uint8_t *mask, uint32_t mask_stride, int w,
-                               int h, int subx, int suby) {
+                               int h, int subw, int subh) {
   typedef void (*blend_fn)(
       uint8_t * dst, uint32_t dst_stride, const uint8_t *src0,
       uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride,
@@ -415,9 +415,9 @@
 
   if (UNLIKELY((h | w) & 3)) {  // if (w <= 2 || h <= 2)
     aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride,
-                         mask, mask_stride, w, h, subx, suby);
+                         mask, mask_stride, w, h, subw, subh);
   } else {
-    blend[(w >> 2) & 3][subx != 0][suby != 0](dst, dst_stride, src0,
+    blend[(w >> 2) & 3][subw != 0][subh != 0](dst, dst_stride, src0,
                                               src0_stride, src1, src1_stride,
                                               mask, mask_stride, w, h);
   }
@@ -819,13 +819,13 @@
                                       const uint8_t *src1_8,
                                       uint32_t src1_stride, const uint8_t *mask,
                                       uint32_t mask_stride, int w, int h,
-                                      int subx, int suby, int bd) {
+                                      int subw, int subh, int bd) {
   typedef void (*blend_fn)(
       uint16_t * dst, uint32_t dst_stride, const uint16_t *src0,
       uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride,
       const uint8_t *mask, uint32_t mask_stride, int w, int h);
 
-  // Dimensions are: bd_index X width_index X subx X suby
+  // Dimensions are: bd_index X width_index X subw X subh
   static const blend_fn blend[2][2][2][2] = {
     {   // bd == 8 or 10
       { // w % 8 == 0
@@ -858,14 +858,14 @@
   assert(bd == 8 || bd == 10 || bd == 12);
   if (UNLIKELY((h | w) & 3)) {  // if (w <= 2 || h <= 2)
     aom_highbd_blend_a64_mask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8,
-                                src1_stride, mask, mask_stride, w, h, subx,
-                                suby, bd);
+                                src1_stride, mask, mask_stride, w, h, subw,
+                                subh, bd);
   } else {
     uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
     const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
     const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);
 
-    blend[bd == 12][(w >> 2) & 1][subx != 0][suby != 0](
+    blend[bd == 12][(w >> 2) & 1][subw != 0][subh != 0](
         dst, dst_stride, src0, src0_stride, src1, src1_stride, mask,
         mask_stride, w, h);
   }
diff --git a/aom_dsp/x86/highbd_loopfilter_sse2.c b/aom_dsp/x86/highbd_loopfilter_sse2.c
index 70b91c6..b906db7 100644
--- a/aom_dsp/x86/highbd_loopfilter_sse2.c
+++ b/aom_dsp/x86/highbd_loopfilter_sse2.c
@@ -497,8 +497,9 @@
 }
 
 void aom_highbd_lpf_horizontal_14_sse2(uint16_t *s, int pitch,
-                                       const uint8_t *blt, const uint8_t *lt,
-                                       const uint8_t *thr, int bd) {
+                                       const uint8_t *blimit,
+                                       const uint8_t *limit,
+                                       const uint8_t *thresh, int bd) {
   __m128i p[7], q[7], pq[7];
   int i;
 
@@ -507,7 +508,7 @@
     q[i] = _mm_loadl_epi64((__m128i *)(s + i * pitch));
   }
 
-  highbd_lpf_internal_14_sse2(p, q, pq, blt, lt, thr, bd);
+  highbd_lpf_internal_14_sse2(p, q, pq, blimit, limit, thresh, bd);
 
   for (i = 0; i < 6; i++) {
     _mm_storel_epi64((__m128i *)(s - (i + 1) * pitch), pq[i]);
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index e69d275..d1a38fa 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -72,7 +72,7 @@
 
 add_proto qw/void av1_wiener_convolve_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params";
 
-add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps";
+add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bd";
 
 specialize qw/av1_wiener_convolve_add_src sse2 avx2 neon/;
 specialize qw/av1_highbd_wiener_convolve_add_src ssse3/;
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index d1f52a6..193237d 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -51,9 +51,9 @@
   return first + (qindex * (last + 1 - first)) / QINDEX_RANGE;
 }
 void av1_qm_init(struct AV1Common *cm);
-const qm_val_t *av1_iqmatrix(struct AV1Common *cm, int qindex, int comp,
+const qm_val_t *av1_iqmatrix(struct AV1Common *cm, int qmlevel, int plane,
                              TX_SIZE tx_size);
-const qm_val_t *av1_qmatrix(struct AV1Common *cm, int qindex, int comp,
+const qm_val_t *av1_qmatrix(struct AV1Common *cm, int qmlevel, int plane,
                             TX_SIZE tx_size);
 
 #ifdef __cplusplus
diff --git a/av1/common/x86/cfl_simd.h b/av1/common/x86/cfl_simd.h
index c1ad182..f2d4ac7 100644
--- a/av1/common/x86/cfl_simd.h
+++ b/av1/common/x86/cfl_simd.h
@@ -15,169 +15,169 @@
 #include "av1/common/blockd.h"
 
 // SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void cfl_subsample_lbd_420_4x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_420_4x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_420_4x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_lbd_420_8x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_420_8x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_420_8x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_420_8x32_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void cfl_subsample_lbd_420_16x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_420_16x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_420_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_420_16x16_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_lbd_420_16x32_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_lbd_420_16x16_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_lbd_420_16x32_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void cfl_subsample_lbd_422_4x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_422_4x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_422_4x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_lbd_422_8x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_422_8x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_422_8x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_422_8x32_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void cfl_subsample_lbd_422_16x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_422_16x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_422_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_422_16x16_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_lbd_422_16x32_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_lbd_422_16x16_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_lbd_422_16x32_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 4, we reuse them in AVX2
-void cfl_subsample_lbd_444_4x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_444_4x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_444_4x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_lbd_444_8x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_444_8x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_lbd_444_8x16_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_444_8x32_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 16, we reuse it in AVX2
-void cfl_subsample_lbd_444_16x4_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_444_16x8_ssse3(const uint8_t *input, int input_stride,
+void cfl_subsample_lbd_444_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
                                       uint16_t *output_q3);
-void cfl_subsample_lbd_444_16x16_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_lbd_444_16x32_ssse3(const uint8_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_lbd_444_16x16_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_lbd_444_16x32_ssse3(const uint8_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
-void cfl_subsample_hbd_420_4x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_420_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_4x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_420_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_4x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_420_4x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_hbd_420_8x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_420_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_8x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_420_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_8x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_8x32_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_420_8x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_420_8x32_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is faster for with == 16, we reuse it in AVX2
-void cfl_subsample_hbd_420_16x4_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_16x8_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_420_16x16_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_hbd_420_16x32_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_hbd_420_16x4_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_420_16x8_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_420_16x16_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_420_16x32_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
-void cfl_subsample_hbd_422_4x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_422_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_4x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_422_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_4x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_422_4x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_hbd_422_8x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_422_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_8x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_422_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_8x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_8x32_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_422_8x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_422_8x32_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is faster for with == 16, we reuse it in AVX2
-void cfl_subsample_hbd_422_16x4_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_16x8_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_422_16x16_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_hbd_422_16x32_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_hbd_422_16x4_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_422_16x8_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_422_16x16_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_422_16x32_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
-void cfl_subsample_hbd_444_4x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_444_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_4x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_444_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_4x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_444_4x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is optimal for with == 8, we reuse it in AVX2
-void cfl_subsample_hbd_444_8x4_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_444_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_8x8_ssse3(const uint16_t *input, int input_stride,
+void cfl_subsample_hbd_444_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_8x16_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_8x32_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
+void cfl_subsample_hbd_444_8x16_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_444_8x32_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
 
 // SSSE3 version is faster for with == 16, we reuse it in AVX2
-void cfl_subsample_hbd_444_16x4_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_16x8_ssse3(const uint16_t *input, int input_stride,
-                                      uint16_t *output_q3);
-void cfl_subsample_hbd_444_16x16_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
-void cfl_subsample_hbd_444_16x32_ssse3(const uint16_t *input, int input_stride,
-                                       uint16_t *output_q3);
+void cfl_subsample_hbd_444_16x4_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_444_16x8_ssse3(const uint16_t *cfl_type,
+                                      int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_444_16x16_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
+void cfl_subsample_hbd_444_16x32_ssse3(const uint16_t *cfl_type,
+                                       int input_stride, uint16_t *output_q3);
 
 // SSE2 version is optimal for with == 4, we reuse them in AVX2
 void cfl_subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst);
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
index c6bf917..462d7b8 100644
--- a/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -701,7 +701,7 @@
   out[2] = _mm_unpacklo_epi64(v[1], v[3]);
   out[3] = _mm_unpackhi_epi64(v[1], v[3]);
 }
-void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
+void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t *input, uint16_t *output,
                                    int stride, TX_TYPE tx_type, int bd) {
   __m128i in[4];
   const int8_t *shift = av1_inv_txfm_shift_ls[TX_4X4];
@@ -710,61 +710,61 @@
 
   switch (tx_type) {
     case DCT_DCT:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case ADST_DCT:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case DCT_ADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case ADST_ADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case FLIPADST_DCT:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
       break;
     case DCT_FLIPADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
       break;
     case FLIPADST_FLIPADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 1, 1, -shift[1], bd);
       break;
     case ADST_FLIPADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 1, 0, -shift[1], bd);
       break;
     case FLIPADST_ADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
       break;
     case IDTX:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                         0);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd,
@@ -772,42 +772,42 @@
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case V_DCT:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                         0);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case H_DCT:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       idct4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd,
                         0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case V_ADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                         0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case H_ADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd,
                         0);
       write_buffer_4x4(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case V_FLIPADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                         0);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd, 0);
       write_buffer_4x4(in, output, stride, 0, 1, -shift[1], bd);
       break;
     case H_FLIPADST:
-      load_buffer_4x4(coeff, in);
+      load_buffer_4x4(input, in);
       iadst4x4_sse4_1(in, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd, 0);
       iidentity4_sse4_1(in, in, av1_inv_cos_bit_col[txw_idx][txh_idx], 1, bd,
                         0);
@@ -1415,7 +1415,7 @@
   _mm_store_si128((__m128i *)(output + 7 * stride), u7);
 }
 
-void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
+void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *input, uint16_t *output,
                                    int stride, TX_TYPE tx_type, int bd) {
   __m128i in[16], out[16];
   const int8_t *shift = av1_inv_txfm_shift_ls[TX_8X8];
@@ -1424,7 +1424,7 @@
 
   switch (tx_type) {
     case DCT_DCT:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                      -shift[0]);
@@ -1433,7 +1433,7 @@
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case DCT_ADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
@@ -1442,7 +1442,7 @@
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case ADST_DCT:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                      -shift[0]);
@@ -1451,7 +1451,7 @@
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case ADST_ADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
@@ -1460,7 +1460,7 @@
       write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
       break;
     case FLIPADST_DCT:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       idct8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                      -shift[0]);
@@ -1469,7 +1469,7 @@
       write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
       break;
     case DCT_FLIPADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
@@ -1478,7 +1478,7 @@
       write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
       break;
     case ADST_FLIPADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
@@ -1487,7 +1487,7 @@
       write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
       break;
     case FLIPADST_FLIPADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
@@ -1496,7 +1496,7 @@
       write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd);
       break;
     case FLIPADST_ADST:
-      load_buffer_8x8(coeff, in);
+      load_buffer_8x8(input, in);
       transpose_8x8(in, out);
       iadst8x8_sse4_1(out, in, av1_inv_cos_bit_row[txw_idx][txh_idx], 0, bd,
                       -shift[0]);
diff --git a/av1/common/x86/reconinter_avx2.c b/av1/common/x86/reconinter_avx2.c
index f645e04..a38bd83 100644
--- a/av1/common/x86/reconinter_avx2.c
+++ b/av1/common/x86/reconinter_avx2.c
@@ -28,8 +28,8 @@
 }
 void av1_build_compound_diffwtd_mask_avx2(uint8_t *mask,
                                           DIFFWTD_MASK_TYPE mask_type,
-                                          const uint8_t *src0, int stride0,
-                                          const uint8_t *src1, int stride1,
+                                          const uint8_t *src0, int src0_stride,
+                                          const uint8_t *src1, int src1_stride,
                                           int h, int w) {
   const int mb = (mask_type == DIFFWTD_38_INV) ? AOM_BLEND_A64_MAX_ALPHA : 0;
   const __m256i y_mask_base = _mm256_set1_epi16(38 - mb);
@@ -37,18 +37,18 @@
   if (4 == w) {
     do {
       const __m128i s0A = xx_loadl_32(src0);
-      const __m128i s0B = xx_loadl_32(src0 + stride0);
-      const __m128i s0C = xx_loadl_32(src0 + stride0 * 2);
-      const __m128i s0D = xx_loadl_32(src0 + stride0 * 3);
+      const __m128i s0B = xx_loadl_32(src0 + src0_stride);
+      const __m128i s0C = xx_loadl_32(src0 + src0_stride * 2);
+      const __m128i s0D = xx_loadl_32(src0 + src0_stride * 3);
       const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
       const __m128i s0CD = _mm_unpacklo_epi32(s0C, s0D);
       const __m128i s0ABCD = _mm_unpacklo_epi64(s0AB, s0CD);
       const __m256i s0ABCD_w = _mm256_cvtepu8_epi16(s0ABCD);
 
       const __m128i s1A = xx_loadl_32(src1);
-      const __m128i s1B = xx_loadl_32(src1 + stride1);
-      const __m128i s1C = xx_loadl_32(src1 + stride1 * 2);
-      const __m128i s1D = xx_loadl_32(src1 + stride1 * 3);
+      const __m128i s1B = xx_loadl_32(src1 + src1_stride);
+      const __m128i s1C = xx_loadl_32(src1 + src1_stride * 2);
+      const __m128i s1D = xx_loadl_32(src1 + src1_stride * 3);
       const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
       const __m128i s1CD = _mm_unpacklo_epi32(s1C, s1D);
       const __m128i s1ABCD = _mm_unpacklo_epi64(s1AB, s1CD);
@@ -58,40 +58,40 @@
       const __m128i x_m8 =
           _mm256_castsi256_si128(_mm256_permute4x64_epi64(m8, 0xd8));
       xx_storeu_128(mask, x_m8);
-      src0 += (stride0 << 2);
-      src1 += (stride1 << 2);
+      src0 += (src0_stride << 2);
+      src1 += (src1_stride << 2);
       mask += 16;
       i += 4;
     } while (i < h);
   } else if (8 == w) {
     do {
       const __m128i s0A = xx_loadl_64(src0);
-      const __m128i s0B = xx_loadl_64(src0 + stride0);
-      const __m128i s0C = xx_loadl_64(src0 + stride0 * 2);
-      const __m128i s0D = xx_loadl_64(src0 + stride0 * 3);
+      const __m128i s0B = xx_loadl_64(src0 + src0_stride);
+      const __m128i s0C = xx_loadl_64(src0 + src0_stride * 2);
+      const __m128i s0D = xx_loadl_64(src0 + src0_stride * 3);
       const __m256i s0AC_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0A, s0C));
       const __m256i s0BD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s0B, s0D));
       const __m128i s1A = xx_loadl_64(src1);
-      const __m128i s1B = xx_loadl_64(src1 + stride1);
-      const __m128i s1C = xx_loadl_64(src1 + stride1 * 2);
-      const __m128i s1D = xx_loadl_64(src1 + stride1 * 3);
+      const __m128i s1B = xx_loadl_64(src1 + src1_stride);
+      const __m128i s1C = xx_loadl_64(src1 + src1_stride * 2);
+      const __m128i s1D = xx_loadl_64(src1 + src1_stride * 3);
       const __m256i s1AB_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1A, s1C));
       const __m256i s1CD_w = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(s1B, s1D));
       const __m256i m16AC = calc_mask_avx2(y_mask_base, s0AC_w, s1AB_w);
       const __m256i m16BD = calc_mask_avx2(y_mask_base, s0BD_w, s1CD_w);
       const __m256i m8 = _mm256_packus_epi16(m16AC, m16BD);
       yy_storeu_256(mask, m8);
-      src0 += stride0 << 2;
-      src1 += stride1 << 2;
+      src0 += src0_stride << 2;
+      src1 += src1_stride << 2;
       mask += 32;
       i += 4;
     } while (i < h);
   } else if (16 == w) {
     do {
       const __m128i s0A = xx_load_128(src0);
-      const __m128i s0B = xx_load_128(src0 + stride0);
+      const __m128i s0B = xx_load_128(src0 + src0_stride);
       const __m128i s1A = xx_load_128(src1);
-      const __m128i s1B = xx_load_128(src1 + stride1);
+      const __m128i s1B = xx_load_128(src1 + src1_stride);
       const __m256i s0AL = _mm256_cvtepu8_epi16(s0A);
       const __m256i s0BL = _mm256_cvtepu8_epi16(s0B);
       const __m256i s1AL = _mm256_cvtepu8_epi16(s1A);
@@ -103,8 +103,8 @@
       const __m256i m8 =
           _mm256_permute4x64_epi64(_mm256_packus_epi16(m16AL, m16BL), 0xd8);
       yy_storeu_256(mask, m8);
-      src0 += stride0 << 1;
-      src1 += stride1 << 1;
+      src0 += src0_stride << 1;
+      src1 += src1_stride << 1;
       mask += 32;
       i += 2;
     } while (i < h);
@@ -127,8 +127,8 @@
         yy_storeu_256(mask + j, m8);
         j += 32;
       } while (j < w);
-      src0 += stride0;
-      src1 += stride1;
+      src0 += src0_stride;
+      src1 += src1_stride;
       mask += w;
       i += 1;
     } while (i < h);