Merge "Add unit test for delta-q (aq-mode=4)" into nextgenv2
diff --git a/aom_dsp/arm/fwd_txfm_neon.c b/aom_dsp/arm/fwd_txfm_neon.c
index 17ce29e..1cf8a3a 100644
--- a/aom_dsp/arm/fwd_txfm_neon.c
+++ b/aom_dsp/arm/fwd_txfm_neon.c
@@ -53,10 +53,10 @@
     v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
     v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
     v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
-    v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64);
-    v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64);
-    v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64);
-    v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64);
+    v_t0_lo = vmulq_n_s32(v_t0_lo, (int32_t)cospi_16_64);
+    v_t0_hi = vmulq_n_s32(v_t0_hi, (int32_t)cospi_16_64);
+    v_t1_lo = vmulq_n_s32(v_t1_lo, (int32_t)cospi_16_64);
+    v_t1_hi = vmulq_n_s32(v_t1_hi, (int32_t)cospi_16_64);
     {
       const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
       const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
diff --git a/aom_dsp/arm/idct16x16_add_neon.c b/aom_dsp/arm/idct16x16_add_neon.c
index 3d545f8..b4cb7a0 100644
--- a/aom_dsp/arm/idct16x16_add_neon.c
+++ b/aom_dsp/arm/idct16x16_add_neon.c
@@ -137,8 +137,8 @@
   d31s16 = vget_high_s16(q15s16);
 
   // stage 3
-  d0s16 = vdup_n_s16(cospi_28_64);
-  d1s16 = vdup_n_s16(cospi_4_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_4_64);
 
   q2s32 = vmull_s16(d18s16, d0s16);
   q3s32 = vmull_s16(d19s16, d0s16);
@@ -150,8 +150,8 @@
   q5s32 = vmlal_s16(q5s32, d30s16, d0s16);
   q6s32 = vmlal_s16(q6s32, d31s16, d0s16);
 
-  d2s16 = vdup_n_s16(cospi_12_64);
-  d3s16 = vdup_n_s16(cospi_20_64);
+  d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+  d3s16 = vdup_n_s16((int16_t)cospi_20_64);
 
   d8s16 = vqrshrn_n_s32(q2s32, 14);
   d9s16 = vqrshrn_n_s32(q3s32, 14);
@@ -178,15 +178,15 @@
   q6s16 = vcombine_s16(d12s16, d13s16);
 
   // stage 4
-  d30s16 = vdup_n_s16(cospi_16_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q2s32 = vmull_s16(d16s16, d30s16);
   q11s32 = vmull_s16(d17s16, d30s16);
   q0s32 = vmull_s16(d24s16, d30s16);
   q1s32 = vmull_s16(d25s16, d30s16);
 
-  d30s16 = vdup_n_s16(cospi_24_64);
-  d31s16 = vdup_n_s16(cospi_8_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_24_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_8_64);
 
   q3s32 = vaddq_s32(q2s32, q0s32);
   q12s32 = vaddq_s32(q11s32, q1s32);
@@ -232,7 +232,7 @@
   q2s16 = vsubq_s16(q9s16, q10s16);
   q3s16 = vsubq_s16(q8s16, q11s16);
 
-  d16s16 = vdup_n_s16(cospi_16_64);
+  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q11s32 = vmull_s16(d26s16, d16s16);
   q12s32 = vmull_s16(d27s16, d16s16);
@@ -378,8 +378,8 @@
   d31s16 = vget_high_s16(q15s16);
 
   // stage 3
-  d12s16 = vdup_n_s16(cospi_30_64);
-  d13s16 = vdup_n_s16(cospi_2_64);
+  d12s16 = vdup_n_s16((int16_t)cospi_30_64);
+  d13s16 = vdup_n_s16((int16_t)cospi_2_64);
 
   q2s32 = vmull_s16(d16s16, d12s16);
   q3s32 = vmull_s16(d17s16, d12s16);
@@ -398,8 +398,8 @@
   q0s16 = vcombine_s16(d0s16, d1s16);
   q7s16 = vcombine_s16(d14s16, d15s16);
 
-  d30s16 = vdup_n_s16(cospi_14_64);
-  d31s16 = vdup_n_s16(cospi_18_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_14_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_18_64);
 
   q2s32 = vmull_s16(d24s16, d30s16);
   q3s32 = vmull_s16(d25s16, d30s16);
@@ -418,8 +418,8 @@
   q1s16 = vcombine_s16(d2s16, d3s16);
   q6s16 = vcombine_s16(d12s16, d13s16);
 
-  d30s16 = vdup_n_s16(cospi_22_64);
-  d31s16 = vdup_n_s16(cospi_10_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_22_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_10_64);
 
   q11s32 = vmull_s16(d20s16, d30s16);
   q12s32 = vmull_s16(d21s16, d30s16);
@@ -438,8 +438,8 @@
   q2s16 = vcombine_s16(d4s16, d5s16);
   q5s16 = vcombine_s16(d10s16, d11s16);
 
-  d30s16 = vdup_n_s16(cospi_6_64);
-  d31s16 = vdup_n_s16(cospi_26_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_6_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_26_64);
 
   q10s32 = vmull_s16(d28s16, d30s16);
   q11s32 = vmull_s16(d29s16, d30s16);
@@ -478,8 +478,8 @@
   d28s16 = vget_low_s16(q14s16);
   d29s16 = vget_high_s16(q14s16);
 
-  d30s16 = vdup_n_s16(cospi_8_64);
-  d31s16 = vdup_n_s16(cospi_24_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_24_64);
 
   q2s32 = vmull_s16(d18s16, d31s16);
   q3s32 = vmull_s16(d19s16, d31s16);
@@ -539,7 +539,7 @@
   d26s16 = vget_low_s16(q13s16);
   d27s16 = vget_high_s16(q13s16);
 
-  d14s16 = vdup_n_s16(cospi_16_64);
+  d14s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q3s32 = vmull_s16(d26s16, d14s16);
   q4s32 = vmull_s16(d27s16, d14s16);
@@ -903,15 +903,15 @@
                &q15s16);
 
   // stage 3
-  q0s16 = vdupq_n_s16(cospi_28_64 * 2);
-  q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+  q0s16 = vdupq_n_s16((int16_t)(cospi_28_64 * 2));
+  q1s16 = vdupq_n_s16((int16_t)(cospi_4_64 * 2));
 
   q4s16 = vqrdmulhq_s16(q9s16, q0s16);
   q7s16 = vqrdmulhq_s16(q9s16, q1s16);
 
   // stage 4
-  q1s16 = vdupq_n_s16(cospi_16_64 * 2);
-  d4s16 = vdup_n_s16(cospi_16_64);
+  q1s16 = vdupq_n_s16((int16_t)(cospi_16_64 * 2));
+  d4s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q8s16 = vqrdmulhq_s16(q8s16, q1s16);
 
@@ -1046,13 +1046,13 @@
                &q15s16);
 
   // stage 3
-  q6s16 = vdupq_n_s16(cospi_30_64 * 2);
+  q6s16 = vdupq_n_s16((int16_t)(cospi_30_64 * 2));
   q0s16 = vqrdmulhq_s16(q8s16, q6s16);
-  q6s16 = vdupq_n_s16(cospi_2_64 * 2);
+  q6s16 = vdupq_n_s16((int16_t)(cospi_2_64 * 2));
   q7s16 = vqrdmulhq_s16(q8s16, q6s16);
 
   q15s16 = vdupq_n_s16(-cospi_26_64 * 2);
-  q14s16 = vdupq_n_s16(cospi_6_64 * 2);
+  q14s16 = vdupq_n_s16((int16_t)(cospi_6_64 * 2));
   q3s16 = vqrdmulhq_s16(q9s16, q15s16);
   q4s16 = vqrdmulhq_s16(q9s16, q14s16);
 
@@ -1066,8 +1066,8 @@
   d14s16 = vget_low_s16(q7s16);
   d15s16 = vget_high_s16(q7s16);
 
-  d30s16 = vdup_n_s16(cospi_8_64);
-  d31s16 = vdup_n_s16(cospi_24_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_24_64);
 
   q12s32 = vmull_s16(d14s16, d31s16);
   q5s32 = vmull_s16(d15s16, d31s16);
@@ -1124,7 +1124,7 @@
   d26s16 = vget_low_s16(q13s16);
   d27s16 = vget_high_s16(q13s16);
 
-  d14s16 = vdup_n_s16(cospi_16_64);
+  d14s16 = vdup_n_s16((int16_t)cospi_16_64);
   q3s32 = vmull_s16(d26s16, d14s16);
   q4s32 = vmull_s16(d27s16, d14s16);
   q0s32 = vmull_s16(d20s16, d14s16);
diff --git a/aom_dsp/arm/idct4x4_add_neon.c b/aom_dsp/arm/idct4x4_add_neon.c
index 397c617..763be1a 100644
--- a/aom_dsp/arm/idct4x4_add_neon.c
+++ b/aom_dsp/arm/idct4x4_add_neon.c
@@ -11,6 +11,8 @@
 
 #include <arm_neon.h>
 
+#include "aom_dsp/txfm_common.h"
+
 void aom_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) {
   uint8x8_t d26u8, d27u8;
   uint32x2_t d26u32, d27u32;
@@ -22,9 +24,6 @@
   int16x4x2_t d0x2s16, d1x2s16;
   int32x4x2_t q0x2s32;
   uint8_t *d;
-  int16_t cospi_8_64 = 15137;
-  int16_t cospi_16_64 = 11585;
-  int16_t cospi_24_64 = 6270;
 
   d26u32 = d27u32 = vdup_n_u32(0);
 
@@ -41,8 +40,8 @@
   q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
   q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
 
-  d20s16 = vdup_n_s16(cospi_8_64);
-  d21s16 = vdup_n_s16(cospi_16_64);
+  d20s16 = vdup_n_s16((int16_t)cospi_8_64);
+  d21s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q0x2s32 =
       vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
@@ -51,7 +50,7 @@
   d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
   d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
 
-  d22s16 = vdup_n_s16(cospi_24_64);
+  d22s16 = vdup_n_s16((int16_t)cospi_24_64);
 
   // stage 1
   d23s16 = vadd_s16(d16s16, d18s16);
diff --git a/aom_dsp/arm/idct8x8_1_add_neon.c b/aom_dsp/arm/idct8x8_1_add_neon.c
index fcc2a2f..c7926f9 100644
--- a/aom_dsp/arm/idct8x8_1_add_neon.c
+++ b/aom_dsp/arm/idct8x8_1_add_neon.c
@@ -20,7 +20,7 @@
   uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
   int16x8_t q0s16;
   uint8_t *d1, *d2;
-  int16_t i, a1, cospi_16_64 = 11585;
+  int16_t i, a1;
   int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
   out = dct_const_round_shift(out * cospi_16_64);
   a1 = ROUND_POWER_OF_TWO(out, 5);
diff --git a/aom_dsp/arm/idct8x8_add_neon.c b/aom_dsp/arm/idct8x8_add_neon.c
index 8e75210..8ad7086 100644
--- a/aom_dsp/arm/idct8x8_add_neon.c
+++ b/aom_dsp/arm/idct8x8_add_neon.c
@@ -90,10 +90,10 @@
   int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
   int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
 
-  d0s16 = vdup_n_s16(cospi_28_64);
-  d1s16 = vdup_n_s16(cospi_4_64);
-  d2s16 = vdup_n_s16(cospi_12_64);
-  d3s16 = vdup_n_s16(cospi_20_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_4_64);
+  d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+  d3s16 = vdup_n_s16((int16_t)cospi_20_64);
 
   d16s16 = vget_low_s16(*q8s16);
   d17s16 = vget_high_s16(*q8s16);
@@ -146,7 +146,7 @@
   q6s16 = vcombine_s16(d12s16, d13s16);
   q7s16 = vcombine_s16(d14s16, d15s16);
 
-  d0s16 = vdup_n_s16(cospi_16_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q2s32 = vmull_s16(d16s16, d0s16);
   q3s32 = vmull_s16(d17s16, d0s16);
@@ -158,8 +158,8 @@
   q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
   q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
 
-  d0s16 = vdup_n_s16(cospi_24_64);
-  d1s16 = vdup_n_s16(cospi_8_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_24_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_8_64);
 
   d18s16 = vqrshrn_n_s32(q2s32, 14);
   d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -199,7 +199,7 @@
   d28s16 = vget_low_s16(*q14s16);
   d29s16 = vget_high_s16(*q14s16);
 
-  d16s16 = vdup_n_s16(cospi_16_64);
+  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q9s32 = vmull_s16(d28s16, d16s16);
   q10s32 = vmull_s16(d29s16, d16s16);
@@ -356,29 +356,29 @@
 
   // First transform rows
   // stage 1
-  q0s16 = vdupq_n_s16(cospi_28_64 * 2);
-  q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+  q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
+  q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
 
   q4s16 = vqrdmulhq_s16(q9s16, q0s16);
 
-  q0s16 = vdupq_n_s16(-cospi_20_64 * 2);
+  q0s16 = vdupq_n_s16(-(int16_t)cospi_20_64 * 2);
 
   q7s16 = vqrdmulhq_s16(q9s16, q1s16);
 
-  q1s16 = vdupq_n_s16(cospi_12_64 * 2);
+  q1s16 = vdupq_n_s16((int16_t)cospi_12_64 * 2);
 
   q5s16 = vqrdmulhq_s16(q11s16, q0s16);
 
-  q0s16 = vdupq_n_s16(cospi_16_64 * 2);
+  q0s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
 
   q6s16 = vqrdmulhq_s16(q11s16, q1s16);
 
   // stage 2 & stage 3 - even half
-  q1s16 = vdupq_n_s16(cospi_24_64 * 2);
+  q1s16 = vdupq_n_s16((int16_t)cospi_24_64 * 2);
 
   q9s16 = vqrdmulhq_s16(q8s16, q0s16);
 
-  q0s16 = vdupq_n_s16(cospi_8_64 * 2);
+  q0s16 = vdupq_n_s16((int16_t)cospi_8_64 * 2);
 
   q13s16 = vqrdmulhq_s16(q10s16, q1s16);
 
@@ -400,7 +400,7 @@
   d28s16 = vget_low_s16(q14s16);
   d29s16 = vget_high_s16(q14s16);
 
-  d16s16 = vdup_n_s16(cospi_16_64);
+  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
   q9s32 = vmull_s16(d28s16, d16s16);
   q10s32 = vmull_s16(d29s16, d16s16);
   q11s32 = vmull_s16(d28s16, d16s16);
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index 68e1339..d0282f5 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -103,7 +103,7 @@
 }
 
 // Returns the position in the bit reader in bits.
-static INLINE ptrdiff_t aom_reader_tell(const aom_reader *r) {
+static INLINE uint32_t aom_reader_tell(const aom_reader *r) {
 #if CONFIG_ANS
   (void)r;
   assert(0 && "aom_reader_tell() is unimplemented for ANS");
@@ -116,7 +116,7 @@
 }
 
 // Returns the position in the bit reader in 1/8th bits.
-static INLINE ptrdiff_t aom_reader_tell_frac(const aom_reader *r) {
+static INLINE uint32_t aom_reader_tell_frac(const aom_reader *r) {
 #if CONFIG_ANS
   (void)r;
   assert(0 && "aom_reader_tell_frac() is unimplemented for ANS");
diff --git a/aom_dsp/daalaboolreader.c b/aom_dsp/daalaboolreader.c
index f0da8eb..0fc7b14 100644
--- a/aom_dsp/daalaboolreader.c
+++ b/aom_dsp/daalaboolreader.c
@@ -28,10 +28,10 @@
   return r->buffer_end;
 }
 
-ptrdiff_t aom_daala_reader_tell(const daala_reader *r) {
+uint32_t aom_daala_reader_tell(const daala_reader *r) {
   return od_ec_dec_tell(&r->ec);
 }
 
-ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r) {
+uint32_t aom_daala_reader_tell_frac(const daala_reader *r) {
   return od_ec_dec_tell_frac(&r->ec);
 }
diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h
index 10dc391..9d6cebd 100644
--- a/aom_dsp/daalaboolreader.h
+++ b/aom_dsp/daalaboolreader.h
@@ -36,8 +36,8 @@
 
 int aom_daala_reader_init(daala_reader *r, const uint8_t *buffer, int size);
 const uint8_t *aom_daala_reader_find_end(daala_reader *r);
-ptrdiff_t aom_daala_reader_tell(const daala_reader *r);
-ptrdiff_t aom_daala_reader_tell_frac(const daala_reader *r);
+uint32_t aom_daala_reader_tell(const daala_reader *r);
+uint32_t aom_daala_reader_tell_frac(const daala_reader *r);
 
 static INLINE int aom_daala_read(daala_reader *r, int prob) {
   if (prob == 128) {
diff --git a/aom_dsp/dkboolreader.h b/aom_dsp/dkboolreader.h
index bc4b02f..add480a 100644
--- a/aom_dsp/dkboolreader.h
+++ b/aom_dsp/dkboolreader.h
@@ -12,6 +12,7 @@
 #ifndef AOM_DSP_DKBOOLREADER_H_
 #define AOM_DSP_DKBOOLREADER_H_
 
+#include <assert.h>
 #include <stddef.h>
 #include <limits.h>
 
@@ -67,10 +68,11 @@
 
 const uint8_t *aom_dk_reader_find_end(struct aom_dk_reader *r);
 
-static INLINE ptrdiff_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
-  const size_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
+static INLINE uint32_t aom_dk_reader_tell(const struct aom_dk_reader *r) {
+  const uint32_t bits_read = (r->buffer - r->buffer_start) * CHAR_BIT;
   const int count =
       (r->count < LOTS_OF_BITS) ? r->count : r->count - LOTS_OF_BITS;
+  assert(r->buffer >= r->buffer_start);
   return bits_read - (count + CHAR_BIT);
 }
 
@@ -78,7 +80,7 @@
    3 => 1/8th bits.*/
 #define DK_BITRES (3)
 
-static INLINE ptrdiff_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
+static INLINE uint32_t aom_dk_reader_tell_frac(const struct aom_dk_reader *r) {
   uint32_t num_bits;
   uint32_t range;
   int l;
diff --git a/aom_dsp/fwd_txfm.c b/aom_dsp/fwd_txfm.c
index fadae2b..547919f 100644
--- a/aom_dsp/fwd_txfm.c
+++ b/aom_dsp/fwd_txfm.c
@@ -9,8 +9,9 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#include "./aom_dsp_rtcd.h"
 #include "aom_dsp/fwd_txfm.h"
+#include <assert.h>
+#include "./aom_dsp_rtcd.h"
 
 void aom_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
@@ -22,36 +23,37 @@
   int pass;
   // We need an intermediate buffer between passes.
   tran_low_t intermediate[4 * 4];
-  const int16_t *in_pass0 = input;
-  const tran_low_t *in = NULL;
+  const tran_low_t *in_low = NULL;
   tran_low_t *out = intermediate;
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
-    tran_high_t input[4];      // canbe16
+    tran_high_t in_high[4];    // canbe16
     tran_high_t step[4];       // canbe16
     tran_high_t temp1, temp2;  // needs32
     int i;
     for (i = 0; i < 4; ++i) {
       // Load inputs.
-      if (0 == pass) {
-        input[0] = in_pass0[0 * stride] * 16;
-        input[1] = in_pass0[1 * stride] * 16;
-        input[2] = in_pass0[2 * stride] * 16;
-        input[3] = in_pass0[3 * stride] * 16;
-        if (i == 0 && input[0]) {
-          input[0] += 1;
+      if (pass == 0) {
+        in_high[0] = input[0 * stride] * 16;
+        in_high[1] = input[1 * stride] * 16;
+        in_high[2] = input[2 * stride] * 16;
+        in_high[3] = input[3 * stride] * 16;
+        if (i == 0 && in_high[0]) {
+          ++in_high[0];
         }
       } else {
-        input[0] = in[0 * 4];
-        input[1] = in[1 * 4];
-        input[2] = in[2 * 4];
-        input[3] = in[3 * 4];
+        assert(in_low != NULL);
+        in_high[0] = in_low[0 * 4];
+        in_high[1] = in_low[1 * 4];
+        in_high[2] = in_low[2 * 4];
+        in_high[3] = in_low[3 * 4];
+        ++in_low;
       }
       // Transform.
-      step[0] = input[0] + input[3];
-      step[1] = input[1] + input[2];
-      step[2] = input[1] - input[2];
-      step[3] = input[0] - input[3];
+      step[0] = in_high[0] + in_high[3];
+      step[1] = in_high[1] + in_high[2];
+      step[2] = in_high[1] - in_high[2];
+      step[3] = in_high[0] - in_high[3];
       temp1 = (step[0] + step[1]) * cospi_16_64;
       temp2 = (step[0] - step[1]) * cospi_16_64;
       out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -61,12 +63,11 @@
       out[1] = (tran_low_t)fdct_round_shift(temp1);
       out[3] = (tran_low_t)fdct_round_shift(temp2);
       // Do next column (which is a transposed row in second/horizontal pass)
-      in_pass0++;
-      in++;
+      ++input;
       out += 4;
     }
     // Setup in/out for next pass.
-    in = intermediate;
+    in_low = intermediate;
     out = output;
   }
 
@@ -100,7 +101,6 @@
     tran_high_t t0, t1, t2, t3;                  // needs32
     tran_high_t x0, x1, x2, x3;                  // canbe16
 
-    int i;
     for (i = 0; i < 8; i++) {
       // stage 1
       if (pass == 0) {
@@ -191,56 +191,57 @@
   int pass;
   // We need an intermediate buffer between passes.
   tran_low_t intermediate[256];
-  const int16_t *in_pass0 = input;
-  const tran_low_t *in = NULL;
+  const tran_low_t *in_low = NULL;
   tran_low_t *out = intermediate;
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
     tran_high_t step1[8];      // canbe16
     tran_high_t step2[8];      // canbe16
     tran_high_t step3[8];      // canbe16
-    tran_high_t input[8];      // canbe16
+    tran_high_t in_high[8];    // canbe16
     tran_high_t temp1, temp2;  // needs32
     int i;
     for (i = 0; i < 16; i++) {
       if (0 == pass) {
         // Calculate input for the first 8 results.
-        input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
-        input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
-        input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
-        input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
-        input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
-        input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
-        input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
-        input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+        in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
+        in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
+        in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
+        in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
+        in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
+        in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
+        in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
+        in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
         // Calculate input for the next 8 results.
-        step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
-        step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
-        step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
-        step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
-        step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
-        step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
-        step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
-        step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+        step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
+        step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
+        step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
+        step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
+        step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
+        step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
+        step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
+        step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
       } else {
         // Calculate input for the first 8 results.
-        input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
-        input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
-        input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
-        input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
-        input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
-        input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
-        input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
-        input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+        assert(in_low != NULL);
+        in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
+        in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
+        in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
+        in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
+        in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
+        in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
+        in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
+        in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
         // Calculate input for the next 8 results.
-        step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
-        step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
-        step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
-        step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
-        step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
-        step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
-        step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
-        step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+        step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
+        step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
+        step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
+        step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
+        step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
+        step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
+        step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
+        step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
+        in_low++;
       }
       // Work on the first eight values; fdct8(input, even_results);
       {
@@ -249,14 +250,14 @@
         tran_high_t x0, x1, x2, x3;                  // canbe16
 
         // stage 1
-        s0 = input[0] + input[7];
-        s1 = input[1] + input[6];
-        s2 = input[2] + input[5];
-        s3 = input[3] + input[4];
-        s4 = input[3] - input[4];
-        s5 = input[2] - input[5];
-        s6 = input[1] - input[6];
-        s7 = input[0] - input[7];
+        s0 = in_high[0] + in_high[7];
+        s1 = in_high[1] + in_high[6];
+        s2 = in_high[2] + in_high[5];
+        s3 = in_high[3] + in_high[4];
+        s4 = in_high[3] - in_high[4];
+        s5 = in_high[2] - in_high[5];
+        s6 = in_high[1] - in_high[6];
+        s7 = in_high[0] - in_high[7];
 
         // fdct4(step, step);
         x0 = s0 + s3;
@@ -351,12 +352,11 @@
         out[15] = (tran_low_t)fdct_round_shift(temp2);
       }
       // Do next column (which is a transposed row in second/horizontal pass)
-      in++;
-      in_pass0++;
+      input++;
       out += 16;
     }
     // Setup in/out for next pass.
-    in = intermediate;
+    in_low = intermediate;
     out = output;
   }
 }
diff --git a/aom_dsp/mips/aom_convolve_msa.h b/aom_dsp/mips/aom_convolve_msa.h
index 4efbcbc..1a0ae4d 100644
--- a/aom_dsp/mips/aom_convolve_msa.h
+++ b/aom_dsp/mips/aom_convolve_msa.h
@@ -17,18 +17,18 @@
 
 extern const uint8_t mc_filt_mask_arr[16 * 3];
 
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
-                            filt3)                                       \
-  ({                                                                     \
-    v8i16 tmp0, tmp1;                                                    \
-                                                                         \
-    tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0);                    \
-    tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1);             \
-    tmp1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2);                    \
-    tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)vec3, (v16i8)filt3);             \
-    tmp0 = __msa_adds_s_h(tmp0, tmp1);                                   \
-                                                                         \
-    tmp0;                                                                \
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2,   \
+                            filt3)                                         \
+  ({                                                                       \
+    v8i16 tmp_dpadd_0, tmp_dpadd_1;                                        \
+                                                                           \
+    tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0);               \
+    tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
+    tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2);               \
+    tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
+    tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1);                \
+                                                                           \
+    tmp_dpadd_0;                                                           \
   })
 
 #define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0,       \
@@ -115,11 +115,10 @@
                            stride)                                           \
   {                                                                          \
     v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                    \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                                     \
                                                                              \
     PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m);                         \
     PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                     \
     AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);             \
-    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                                \
+    ST8x4_UB(tmp0_m, tmp1_m, pdst, stride);                                  \
   }
 #endif /* AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_ */
diff --git a/aom_dsp/mips/inv_txfm_msa.h b/aom_dsp/mips/inv_txfm_msa.h
index ce2065b..122667a 100644
--- a/aom_dsp/mips/inv_txfm_msa.h
+++ b/aom_dsp/mips/inv_txfm_msa.h
@@ -197,18 +197,18 @@
                  out2, out3)                                                  \
   {                                                                           \
     v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                         \
-    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                     \
+    v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd;                         \
                                                                               \
     ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m);                            \
     ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m);                            \
     DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \
-                cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m);                        \
-    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
-    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1);                  \
+                cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd);            \
+    SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS);  \
+    PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1);      \
     DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \
-                cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);                        \
-    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS);              \
-    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3);                  \
+                cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd);            \
+    SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS);  \
+    PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3);      \
   }
 
 /* idct 8x8 macro */
diff --git a/aom_dsp/mips/loopfilter_msa.h b/aom_dsp/mips/loopfilter_msa.h
index c1cabc2..4505942 100644
--- a/aom_dsp/mips/loopfilter_msa.h
+++ b/aom_dsp/mips/loopfilter_msa.h
@@ -123,35 +123,35 @@
     p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                           \
   }
 
-#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
-  {                                                                   \
-    v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0;    \
-    v16u8 zero_in = { 0 };                                            \
-                                                                      \
-    tmp = __msa_ori_b(zero_in, 1);                                    \
-    p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                       \
-    q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                       \
-    p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                       \
-    q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                       \
-                                                                      \
-    p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);            \
-    flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                  \
-    p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);            \
-    flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                  \
-                                                                      \
-    flat_out = (tmp < (v16u8)flat_out);                               \
-    flat_out = __msa_xori_b(flat_out, 0xff);                          \
-    flat_out = flat_out & (mask);                                     \
+#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out)    \
+  {                                                                      \
+    v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
+    v16u8 zero_in = { 0 };                                               \
+                                                                         \
+    tmp_flat4 = __msa_ori_b(zero_in, 1);                                 \
+    p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                          \
+    q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                          \
+    p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                          \
+    q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                          \
+                                                                         \
+    p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);               \
+    flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                     \
+    p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);               \
+    flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                     \
+                                                                         \
+    flat_out = (tmp_flat4 < (v16u8)flat_out);                            \
+    flat_out = __msa_xori_b(flat_out, 0xff);                             \
+    flat_out = flat_out & (mask);                                        \
   }
 
 #define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
                   q6_in, q7_in, flat_in, flat2_out)                       \
   {                                                                       \
-    v16u8 tmp, zero_in = { 0 };                                           \
+    v16u8 tmp_flat5, zero_in = { 0 };                                     \
     v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0;             \
     v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0;             \
                                                                           \
-    tmp = __msa_ori_b(zero_in, 1);                                        \
+    tmp_flat5 = __msa_ori_b(zero_in, 1);                                  \
     p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in);                           \
     q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in);                           \
     p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in);                           \
@@ -169,7 +169,7 @@
     p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0);                \
     flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out);                    \
                                                                           \
-    flat2_out = (tmp < (v16u8)flat2_out);                                 \
+    flat2_out = (tmp_flat5 < (v16u8)flat2_out);                           \
     flat2_out = __msa_xori_b(flat2_out, 0xff);                            \
     flat2_out = flat2_out & flat_in;                                      \
   }
@@ -178,38 +178,38 @@
                     p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
                     q1_filt8_out, q2_filt8_out)                             \
   {                                                                         \
-    v8u16 tmp0, tmp1, tmp2;                                                 \
+    v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2;                            \
                                                                             \
-    tmp2 = p2_in + p1_in + p0_in;                                           \
-    tmp0 = p3_in << 1;                                                      \
+    tmp_filt8_2 = p2_in + p1_in + p0_in;                                    \
+    tmp_filt8_0 = p3_in << 1;                                               \
                                                                             \
-    tmp0 = tmp0 + tmp2 + q0_in;                                             \
-    tmp1 = tmp0 + p3_in + p2_in;                                            \
-    p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);                    \
+    tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in;                        \
+    tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in;                              \
+    p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
                                                                             \
-    tmp1 = tmp0 + p1_in + q1_in;                                            \
-    p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);                    \
+    tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in;                              \
+    p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
                                                                             \
-    tmp1 = q2_in + q1_in + q0_in;                                           \
-    tmp2 = tmp2 + tmp1;                                                     \
-    tmp0 = tmp2 + (p0_in);                                                  \
-    tmp0 = tmp0 + (p3_in);                                                  \
-    p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp0, 3);                    \
+    tmp_filt8_1 = q2_in + q1_in + q0_in;                                    \
+    tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1;                                \
+    tmp_filt8_0 = tmp_filt8_2 + (p0_in);                                    \
+    tmp_filt8_0 = tmp_filt8_0 + (p3_in);                                    \
+    p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3);             \
                                                                             \
-    tmp0 = q2_in + q3_in;                                                   \
-    tmp0 = p0_in + tmp1 + tmp0;                                             \
-    tmp1 = q3_in + q3_in;                                                   \
-    tmp1 = tmp1 + tmp0;                                                     \
-    q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);                    \
+    tmp_filt8_0 = q2_in + q3_in;                                            \
+    tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0;                        \
+    tmp_filt8_1 = q3_in + q3_in;                                            \
+    tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0;                                \
+    q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
                                                                             \
-    tmp0 = tmp2 + q3_in;                                                    \
-    tmp1 = tmp0 + q0_in;                                                    \
-    q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);                    \
+    tmp_filt8_0 = tmp_filt8_2 + q3_in;                                      \
+    tmp_filt8_1 = tmp_filt8_0 + q0_in;                                      \
+    q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
                                                                             \
-    tmp1 = tmp0 - p2_in;                                                    \
-    tmp0 = q1_in + q3_in;                                                   \
-    tmp1 = tmp0 + tmp1;                                                     \
-    q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3);                    \
+    tmp_filt8_1 = tmp_filt8_0 - p2_in;                                      \
+    tmp_filt8_0 = q1_in + q3_in;                                            \
+    tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1;                                \
+    q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
   }
 
 #define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
diff --git a/aom_dsp/mips/macros_msa.h b/aom_dsp/mips/macros_msa.h
index 7d0ba4b..48fbcfd 100644
--- a/aom_dsp/mips/macros_msa.h
+++ b/aom_dsp/mips/macros_msa.h
@@ -169,20 +169,20 @@
     val_m;                                                 \
   })
 #else  // !(__mips == 64)
-#define LD(psrc)                                            \
-  ({                                                        \
-    const uint8_t *psrc_m1 = (const uint8_t *)(psrc);       \
-    uint32_t val0_m, val1_m;                                \
-    uint64_t val_m = 0;                                     \
-                                                            \
-    val0_m = LW(psrc_m1);                                   \
-    val1_m = LW(psrc_m1 + 4);                               \
-                                                            \
-    val_m = (uint64_t)(val1_m);                             \
-    val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
-    val_m = (uint64_t)(val_m | (uint64_t)val0_m);           \
-                                                            \
-    val_m;                                                  \
+#define LD(psrc)                                                              \
+  ({                                                                          \
+    const uint8_t *psrc_m1 = (const uint8_t *)(psrc);                         \
+    uint32_t val0_m, val1_m;                                                  \
+    uint64_t val_m_combined = 0;                                              \
+                                                                              \
+    val0_m = LW(psrc_m1);                                                     \
+    val1_m = LW(psrc_m1 + 4);                                                 \
+                                                                              \
+    val_m_combined = (uint64_t)(val1_m);                                      \
+    val_m_combined = (uint64_t)((val_m_combined << 32) & 0xFFFFFFFF00000000); \
+    val_m_combined = (uint64_t)(val_m_combined | (uint64_t)val0_m);           \
+                                                                              \
+    val_m_combined;                                                           \
   })
 #endif  // (__mips == 64)
 
@@ -2020,13 +2020,12 @@
                                 pdst, stride)                               \
   {                                                                         \
     v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                                    \
                                                                             \
     tmp0_m = PCKEV_XORI128_UB(in0, in1);                                    \
     tmp1_m = PCKEV_XORI128_UB(in2, in3);                                    \
     ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                     \
     AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);            \
-    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                               \
+    ST8x4_UB(tmp0_m, tmp1_m, pdst, stride);                                 \
   }
 
 /* Description : Pack even byte elements and store byte vector in destination
diff --git a/aom_dsp/x86/inv_txfm_sse2.c b/aom_dsp/x86/inv_txfm_sse2.c
index 61f548a..4735d97 100644
--- a/aom_dsp/x86/inv_txfm_sse2.c
+++ b/aom_dsp/x86/inv_txfm_sse2.c
@@ -2372,7 +2372,6 @@
 #define IDCT32_34                                                              \
   /* Stage1 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero);                   \
     const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero);                   \
                                                                                \
@@ -2397,7 +2396,6 @@
                                                                                \
   /* Stage2 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero);                   \
     const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero);                   \
                                                                                \
@@ -2424,7 +2422,6 @@
                                                                                \
   /* Stage3 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero);                   \
     const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero);                   \
                                                                                \
@@ -2465,7 +2462,6 @@
                                                                                \
   /* Stage4 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero);                   \
     const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero);                   \
                                                                                \
@@ -3002,6 +2998,7 @@
 // Only upper-left 8x8 has non-zero coeff
 void aom_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
                                int stride) {
+  const __m128i zero = _mm_setzero_si128();
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i final_rounding = _mm_set1_epi16(1 << 5);
 
@@ -3107,7 +3104,6 @@
   col[31] = _mm_sub_epi16(stp1_0, stp1_31);
   for (i = 0; i < 4; i++) {
     int j;
-    const __m128i zero = _mm_setzero_si128();
     // Transpose 32x8 block to 8x32 block
     array_transpose_8x8(col + i * 8, in);
     IDCT32_34
diff --git a/aomdec.c b/aomdec.c
index d9f229d..e88c81f 100644
--- a/aomdec.c
+++ b/aomdec.c
@@ -893,7 +893,7 @@
 
       if (single_file) {
         if (use_y4m) {
-          char buf[Y4M_BUFFER_SIZE] = { 0 };
+          char y4m_buf[Y4M_BUFFER_SIZE] = { 0 };
           size_t len = 0;
           if (img->fmt == AOM_IMG_FMT_I440 || img->fmt == AOM_IMG_FMT_I44016) {
             fprintf(stderr, "Cannot produce y4m output for 440 sampling.\n");
@@ -902,21 +902,22 @@
           if (frame_out == 1) {
             // Y4M file header
             len = y4m_write_file_header(
-                buf, sizeof(buf), aom_input_ctx.width, aom_input_ctx.height,
-                &aom_input_ctx.framerate, img->fmt, img->bit_depth);
+                y4m_buf, sizeof(y4m_buf), aom_input_ctx.width,
+                aom_input_ctx.height, &aom_input_ctx.framerate, img->fmt,
+                img->bit_depth);
             if (do_md5) {
-              MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
+              MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
             } else {
-              fputs(buf, outfile);
+              fputs(y4m_buf, outfile);
             }
           }
 
           // Y4M frame header
-          len = y4m_write_frame_header(buf, sizeof(buf));
+          len = y4m_write_frame_header(y4m_buf, sizeof(y4m_buf));
           if (do_md5) {
-            MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
+            MD5Update(&md5_ctx, (md5byte *)y4m_buf, (unsigned int)len);
           } else {
-            fputs(buf, outfile);
+            fputs(y4m_buf, outfile);
           }
         } else {
           if (frame_out == 1) {
diff --git a/aomenc.c b/aomenc.c
index 63ef753..3f9a87d 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -1753,13 +1753,11 @@
   /* Get the internal reference frame */
   if (strcmp(codec->name, "vp8") == 0) {
     struct aom_ref_frame ref_enc, ref_dec;
-    int width, height;
-
-    width = (stream->config.cfg.g_w + 15) & ~15;
-    height = (stream->config.cfg.g_h + 15) & ~15;
-    aom_img_alloc(&ref_enc.img, AOM_IMG_FMT_I420, width, height, 1);
+    const unsigned int frame_width = (stream->config.cfg.g_w + 15) & ~15;
+    const unsigned int frame_height = (stream->config.cfg.g_h + 15) & ~15;
+    aom_img_alloc(&ref_enc.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
     enc_img = ref_enc.img;
-    aom_img_alloc(&ref_dec.img, AOM_IMG_FMT_I420, width, height, 1);
+    aom_img_alloc(&ref_dec.img, AOM_IMG_FMT_I420, frame_width, frame_height, 1);
     dec_img = ref_dec.img;
 
     ref_enc.frame_type = AOM_LAST_FRAME;
@@ -2131,10 +2129,10 @@
           } else {
             const int64_t input_pos = ftello(input.file);
             const int64_t input_pos_lagged = input_pos - lagged_count;
-            const int64_t limit = input.length;
+            const int64_t input_limit = input.length;
 
             rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0;
-            remaining = limit - input_pos + lagged_count;
+            remaining = input_limit - input_pos + lagged_count;
           }
 
           average_rate =
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index d815ca7..fae7d04 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -871,7 +871,6 @@
   index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1);
   if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
     uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
-    int i, j;
 #ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA
     uint8_t marker_test = 0xc0;
     int mag_test = 2;     // 1 - 4
@@ -890,6 +889,7 @@
     *x++ = marker;
     for (i = 0; i < ctx->pending_frame_count - 1; i++) {
       unsigned int this_sz;
+      int j;
 
       assert(ctx->pending_frame_sizes[i] > 0);
       this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 7b9d21c..7da80f0 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -580,7 +580,7 @@
 
   // Initialize the decoder workers on the first frame.
   if (ctx->frame_workers == NULL) {
-    const aom_codec_err_t res = init_decoder(ctx);
+    res = init_decoder(ctx);
     if (res != AOM_CODEC_OK) return res;
   }
 
@@ -646,7 +646,6 @@
       for (i = 0; i < frame_count; ++i) {
         const uint8_t *data_start_copy = data_start;
         const uint32_t frame_size = frame_sizes[i];
-        aom_codec_err_t res;
         if (data_start < data ||
             frame_size > (uint32_t)(data_end - data_start)) {
           set_error_detail(ctx, "Invalid frame size in index");
@@ -662,8 +661,7 @@
     } else {
       while (data_start < data_end) {
         const uint32_t frame_size = (uint32_t)(data_end - data_start);
-        const aom_codec_err_t res =
-            decode_one(ctx, &data_start, frame_size, user_priv, deadline);
+        res = decode_one(ctx, &data_start, frame_size, user_priv, deadline);
         if (res != AOM_CODEC_OK) return res;
 
         // Account for suboptimal termination by the encoder.
diff --git a/av1/common/arm/neon/iht4x4_add_neon.c b/av1/common/arm/neon/iht4x4_add_neon.c
index baa786f..02572d4 100644
--- a/av1/common/arm/neon/iht4x4_add_neon.c
+++ b/av1/common/arm/neon/iht4x4_add_neon.c
@@ -12,18 +12,11 @@
 #include <arm_neon.h>
 #include <assert.h>
 
-#include "./av1_rtcd.h"
 #include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_dsp/txfm_common.h"
 #include "av1/common/common.h"
 
-static int16_t sinpi_1_9 = 0x14a3;
-static int16_t sinpi_2_9 = 0x26c9;
-static int16_t sinpi_3_9 = 0x3441;
-static int16_t sinpi_4_9 = 0x3b6c;
-static int16_t cospi_8_64 = 0x3b21;
-static int16_t cospi_16_64 = 0x2d41;
-static int16_t cospi_24_64 = 0x187e;
-
 static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
   int32x4_t q8s32, q9s32;
   int16x4x2_t d0x2s16, d1x2s16;
@@ -43,18 +36,18 @@
 
 static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
                                              int16x4_t *d2s16) {
-  *d0s16 = vdup_n_s16(cospi_8_64);
-  *d1s16 = vdup_n_s16(cospi_16_64);
-  *d2s16 = vdup_n_s16(cospi_24_64);
+  *d0s16 = vdup_n_s16((int16_t)cospi_8_64);
+  *d1s16 = vdup_n_s16((int16_t)cospi_16_64);
+  *d2s16 = vdup_n_s16((int16_t)cospi_24_64);
   return;
 }
 
 static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
                                            int16x4_t *d5s16, int16x8_t *q3s16) {
-  *d3s16 = vdup_n_s16(sinpi_1_9);
-  *d4s16 = vdup_n_s16(sinpi_2_9);
-  *q3s16 = vdupq_n_s16(sinpi_3_9);
-  *d5s16 = vdup_n_s16(sinpi_4_9);
+  *d3s16 = vdup_n_s16((int16_t)sinpi_1_9);
+  *d4s16 = vdup_n_s16((int16_t)sinpi_2_9);
+  *q3s16 = vdupq_n_s16((int16_t)sinpi_3_9);
+  *d5s16 = vdup_n_s16((int16_t)sinpi_4_9);
   return;
 }
 
@@ -121,7 +114,7 @@
   q10s32 = vaddq_s32(q10s32, q13s32);
   q10s32 = vaddq_s32(q10s32, q8s32);
   q11s32 = vsubq_s32(q11s32, q14s32);
-  q8s32 = vdupq_n_s32(sinpi_3_9);
+  q8s32 = vdupq_n_s32((int32_t)sinpi_3_9);
   q11s32 = vsubq_s32(q11s32, q9s32);
   q15s32 = vmulq_s32(q15s32, q8s32);
 
diff --git a/av1/common/arm/neon/iht8x8_add_neon.c b/av1/common/arm/neon/iht8x8_add_neon.c
index 15deabe..86798cc 100644
--- a/av1/common/arm/neon/iht8x8_add_neon.c
+++ b/av1/common/arm/neon/iht8x8_add_neon.c
@@ -12,26 +12,11 @@
 #include <arm_neon.h>
 #include <assert.h>
 
-#include "./av1_rtcd.h"
 #include "./aom_config.h"
+#include "./av1_rtcd.h"
+#include "aom_dsp/txfm_common.h"
 #include "av1/common/common.h"
 
-static int16_t cospi_2_64 = 16305;
-static int16_t cospi_4_64 = 16069;
-static int16_t cospi_6_64 = 15679;
-static int16_t cospi_8_64 = 15137;
-static int16_t cospi_10_64 = 14449;
-static int16_t cospi_12_64 = 13623;
-static int16_t cospi_14_64 = 12665;
-static int16_t cospi_16_64 = 11585;
-static int16_t cospi_18_64 = 10394;
-static int16_t cospi_20_64 = 9102;
-static int16_t cospi_22_64 = 7723;
-static int16_t cospi_24_64 = 6270;
-static int16_t cospi_26_64 = 4756;
-static int16_t cospi_28_64 = 3196;
-static int16_t cospi_30_64 = 1606;
-
 static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
                                 int16x8_t *q10s16, int16x8_t *q11s16,
                                 int16x8_t *q12s16, int16x8_t *q13s16,
@@ -108,10 +93,10 @@
   int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
   int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
 
-  d0s16 = vdup_n_s16(cospi_28_64);
-  d1s16 = vdup_n_s16(cospi_4_64);
-  d2s16 = vdup_n_s16(cospi_12_64);
-  d3s16 = vdup_n_s16(cospi_20_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_4_64);
+  d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+  d3s16 = vdup_n_s16((int16_t)cospi_20_64);
 
   d16s16 = vget_low_s16(*q8s16);
   d17s16 = vget_high_s16(*q8s16);
@@ -164,7 +149,7 @@
   q6s16 = vcombine_s16(d12s16, d13s16);
   q7s16 = vcombine_s16(d14s16, d15s16);
 
-  d0s16 = vdup_n_s16(cospi_16_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q2s32 = vmull_s16(d16s16, d0s16);
   q3s32 = vmull_s16(d17s16, d0s16);
@@ -176,8 +161,8 @@
   q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
   q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
 
-  d0s16 = vdup_n_s16(cospi_24_64);
-  d1s16 = vdup_n_s16(cospi_8_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_24_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_8_64);
 
   d18s16 = vqrshrn_n_s32(q2s32, 14);
   d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -217,7 +202,7 @@
   d28s16 = vget_low_s16(*q14s16);
   d29s16 = vget_high_s16(*q14s16);
 
-  d16s16 = vdup_n_s16(cospi_16_64);
+  d16s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   q9s32 = vmull_s16(d28s16, d16s16);
   q10s32 = vmull_s16(d29s16, d16s16);
@@ -276,16 +261,16 @@
   d30s16 = vget_low_s16(*q15s16);
   d31s16 = vget_high_s16(*q15s16);
 
-  d14s16 = vdup_n_s16(cospi_2_64);
-  d15s16 = vdup_n_s16(cospi_30_64);
+  d14s16 = vdup_n_s16((int16_t)cospi_2_64);
+  d15s16 = vdup_n_s16((int16_t)cospi_30_64);
 
   q1s32 = vmull_s16(d30s16, d14s16);
   q2s32 = vmull_s16(d31s16, d14s16);
   q3s32 = vmull_s16(d30s16, d15s16);
   q4s32 = vmull_s16(d31s16, d15s16);
 
-  d30s16 = vdup_n_s16(cospi_18_64);
-  d31s16 = vdup_n_s16(cospi_14_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_18_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_14_64);
 
   q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
   q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
@@ -324,15 +309,15 @@
   d7s16 = vqrshrn_n_s32(q4s32, 14);
   *q12s16 = vcombine_s16(d24s16, d25s16);
 
-  d0s16 = vdup_n_s16(cospi_10_64);
-  d1s16 = vdup_n_s16(cospi_22_64);
+  d0s16 = vdup_n_s16((int16_t)cospi_10_64);
+  d1s16 = vdup_n_s16((int16_t)cospi_22_64);
   q4s32 = vmull_s16(d26s16, d0s16);
   q5s32 = vmull_s16(d27s16, d0s16);
   q2s32 = vmull_s16(d26s16, d1s16);
   q6s32 = vmull_s16(d27s16, d1s16);
 
-  d30s16 = vdup_n_s16(cospi_26_64);
-  d31s16 = vdup_n_s16(cospi_6_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_26_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_6_64);
 
   q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
   q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
@@ -367,8 +352,8 @@
   q4s32 = vsubq_s32(q4s32, q0s32);
   q5s32 = vsubq_s32(q5s32, q13s32);
 
-  d30s16 = vdup_n_s16(cospi_8_64);
-  d31s16 = vdup_n_s16(cospi_24_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+  d31s16 = vdup_n_s16((int16_t)cospi_24_64);
 
   d18s16 = vqrshrn_n_s32(q9s32, 14);
   d19s16 = vqrshrn_n_s32(q10s32, 14);
@@ -423,7 +408,7 @@
   d15s16 = vqrshrn_n_s32(q0s32, 14);
   *q14s16 = vcombine_s16(d28s16, d29s16);
 
-  d30s16 = vdup_n_s16(cospi_16_64);
+  d30s16 = vdup_n_s16((int16_t)cospi_16_64);
 
   d22s16 = vget_low_s16(*q11s16);
   d23s16 = vget_high_s16(*q11s16);
diff --git a/av1/common/av1_fwd_txfm.c b/av1/common/av1_fwd_txfm.c
index 94bd043..84a3876 100644
--- a/av1/common/av1_fwd_txfm.c
+++ b/av1/common/av1_fwd_txfm.c
@@ -9,8 +9,9 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#include "./av1_rtcd.h"
 #include "av1/common/av1_fwd_txfm.h"
+#include <assert.h>
+#include "./av1_rtcd.h"
 
 void av1_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
@@ -22,36 +23,37 @@
   int pass;
   // We need an intermediate buffer between passes.
   tran_low_t intermediate[4 * 4];
-  const int16_t *in_pass0 = input;
-  const tran_low_t *in = NULL;
+  const tran_low_t *in_low = NULL;
   tran_low_t *out = intermediate;
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
-    tran_high_t input[4];      // canbe16
+    tran_high_t in_high[4];    // canbe16
     tran_high_t step[4];       // canbe16
     tran_high_t temp1, temp2;  // needs32
     int i;
     for (i = 0; i < 4; ++i) {
       // Load inputs.
       if (0 == pass) {
-        input[0] = in_pass0[0 * stride] * 16;
-        input[1] = in_pass0[1 * stride] * 16;
-        input[2] = in_pass0[2 * stride] * 16;
-        input[3] = in_pass0[3 * stride] * 16;
-        if (i == 0 && input[0]) {
-          input[0] += 1;
+        in_high[0] = input[0 * stride] * 16;
+        in_high[1] = input[1 * stride] * 16;
+        in_high[2] = input[2 * stride] * 16;
+        in_high[3] = input[3 * stride] * 16;
+        if (i == 0 && in_high[0]) {
+          in_high[0] += 1;
         }
       } else {
-        input[0] = in[0 * 4];
-        input[1] = in[1 * 4];
-        input[2] = in[2 * 4];
-        input[3] = in[3 * 4];
+        assert(in_low != NULL);
+        in_high[0] = in_low[0 * 4];
+        in_high[1] = in_low[1 * 4];
+        in_high[2] = in_low[2 * 4];
+        in_high[3] = in_low[3 * 4];
+        in_low++;
       }
       // Transform.
-      step[0] = input[0] + input[3];
-      step[1] = input[1] + input[2];
-      step[2] = input[1] - input[2];
-      step[3] = input[0] - input[3];
+      step[0] = in_high[0] + in_high[3];
+      step[1] = in_high[1] + in_high[2];
+      step[2] = in_high[1] - in_high[2];
+      step[3] = in_high[0] - in_high[3];
       temp1 = (step[0] + step[1]) * cospi_16_64;
       temp2 = (step[0] - step[1]) * cospi_16_64;
       out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -61,12 +63,11 @@
       out[1] = (tran_low_t)fdct_round_shift(temp1);
       out[3] = (tran_low_t)fdct_round_shift(temp2);
       // Do next column (which is a transposed row in second/horizontal pass)
-      in_pass0++;
-      in++;
+      input++;
       out += 4;
     }
-    // Setup in/out for next pass.
-    in = intermediate;
+    // Setup in_low/out for next pass.
+    in_low = intermediate;
     out = output;
   }
 
@@ -101,7 +102,6 @@
     tran_high_t t0, t1, t2, t3;                  // needs32
     tran_high_t x0, x1, x2, x3;                  // canbe16
 
-    int i;
     for (i = 0; i < 8; i++) {
       // stage 1
       if (pass == 0) {
@@ -193,56 +193,57 @@
   int pass;
   // We need an intermediate buffer between passes.
   tran_low_t intermediate[256];
-  const int16_t *in_pass0 = input;
-  const tran_low_t *in = NULL;
+  const tran_low_t *in_low = NULL;
   tran_low_t *out = intermediate;
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
     tran_high_t step1[8];      // canbe16
     tran_high_t step2[8];      // canbe16
     tran_high_t step3[8];      // canbe16
-    tran_high_t input[8];      // canbe16
+    tran_high_t in_high[8];    // canbe16
     tran_high_t temp1, temp2;  // needs32
     int i;
     for (i = 0; i < 16; i++) {
       if (0 == pass) {
         // Calculate input for the first 8 results.
-        input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
-        input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
-        input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
-        input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
-        input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
-        input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
-        input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
-        input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+        in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
+        in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
+        in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
+        in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
+        in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
+        in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
+        in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
+        in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
         // Calculate input for the next 8 results.
-        step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
-        step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
-        step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
-        step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
-        step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
-        step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
-        step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
-        step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+        step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
+        step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
+        step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
+        step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
+        step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
+        step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
+        step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
+        step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
       } else {
         // Calculate input for the first 8 results.
-        input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
-        input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
-        input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
-        input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
-        input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
-        input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
-        input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
-        input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+        assert(in_low != NULL);
+        in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
+        in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
+        in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
+        in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
+        in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
+        in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
+        in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
+        in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
         // Calculate input for the next 8 results.
-        step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
-        step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
-        step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
-        step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
-        step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
-        step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
-        step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
-        step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+        step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
+        step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
+        step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
+        step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
+        step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
+        step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
+        step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
+        step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
+        in_low++;
       }
       // Work on the first eight values; fdct8(input, even_results);
       {
@@ -251,14 +252,14 @@
         tran_high_t x0, x1, x2, x3;                  // canbe16
 
         // stage 1
-        s0 = input[0] + input[7];
-        s1 = input[1] + input[6];
-        s2 = input[2] + input[5];
-        s3 = input[3] + input[4];
-        s4 = input[3] - input[4];
-        s5 = input[2] - input[5];
-        s6 = input[1] - input[6];
-        s7 = input[0] - input[7];
+        s0 = in_high[0] + in_high[7];
+        s1 = in_high[1] + in_high[6];
+        s2 = in_high[2] + in_high[5];
+        s3 = in_high[3] + in_high[4];
+        s4 = in_high[3] - in_high[4];
+        s5 = in_high[2] - in_high[5];
+        s6 = in_high[1] - in_high[6];
+        s7 = in_high[0] - in_high[7];
 
         // fdct4(step, step);
         x0 = s0 + s3;
@@ -353,12 +354,11 @@
         out[15] = (tran_low_t)fdct_round_shift(temp2);
       }
       // Do next column (which is a transposed row in second/horizontal pass)
-      in++;
-      in_pass0++;
+      input++;
       out += 16;
     }
     // Setup in/out for next pass.
-    in = intermediate;
+    in_low = intermediate;
     out = output;
   }
 }
diff --git a/av1/common/av1_fwd_txfm1d.c b/av1/common/av1_fwd_txfm1d.c
index 4c695ae..3e9d5ec 100644
--- a/av1/common/av1_fwd_txfm1d.c
+++ b/av1/common/av1_fwd_txfm1d.c
@@ -40,6 +40,7 @@
   }
 #endif
 
+// TODO(angiebird): Make 1-d txfm functions static
 void av1_fdct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
                    const int8_t *stage_range) {
   const int32_t size = 4;
@@ -1528,3 +1529,798 @@
   bf1[31] = -bf0[1];
   range_check(stage, input, bf1, size, stage_range[stage]);
 }
+
+#if CONFIG_TX64X64
+void av1_fdct64_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 64;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[64];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf1 = output;
+  bf1[0] = input[0] + input[63];
+  bf1[1] = input[1] + input[62];
+  bf1[2] = input[2] + input[61];
+  bf1[3] = input[3] + input[60];
+  bf1[4] = input[4] + input[59];
+  bf1[5] = input[5] + input[58];
+  bf1[6] = input[6] + input[57];
+  bf1[7] = input[7] + input[56];
+  bf1[8] = input[8] + input[55];
+  bf1[9] = input[9] + input[54];
+  bf1[10] = input[10] + input[53];
+  bf1[11] = input[11] + input[52];
+  bf1[12] = input[12] + input[51];
+  bf1[13] = input[13] + input[50];
+  bf1[14] = input[14] + input[49];
+  bf1[15] = input[15] + input[48];
+  bf1[16] = input[16] + input[47];
+  bf1[17] = input[17] + input[46];
+  bf1[18] = input[18] + input[45];
+  bf1[19] = input[19] + input[44];
+  bf1[20] = input[20] + input[43];
+  bf1[21] = input[21] + input[42];
+  bf1[22] = input[22] + input[41];
+  bf1[23] = input[23] + input[40];
+  bf1[24] = input[24] + input[39];
+  bf1[25] = input[25] + input[38];
+  bf1[26] = input[26] + input[37];
+  bf1[27] = input[27] + input[36];
+  bf1[28] = input[28] + input[35];
+  bf1[29] = input[29] + input[34];
+  bf1[30] = input[30] + input[33];
+  bf1[31] = input[31] + input[32];
+  bf1[32] = -input[32] + input[31];
+  bf1[33] = -input[33] + input[30];
+  bf1[34] = -input[34] + input[29];
+  bf1[35] = -input[35] + input[28];
+  bf1[36] = -input[36] + input[27];
+  bf1[37] = -input[37] + input[26];
+  bf1[38] = -input[38] + input[25];
+  bf1[39] = -input[39] + input[24];
+  bf1[40] = -input[40] + input[23];
+  bf1[41] = -input[41] + input[22];
+  bf1[42] = -input[42] + input[21];
+  bf1[43] = -input[43] + input[20];
+  bf1[44] = -input[44] + input[19];
+  bf1[45] = -input[45] + input[18];
+  bf1[46] = -input[46] + input[17];
+  bf1[47] = -input[47] + input[16];
+  bf1[48] = -input[48] + input[15];
+  bf1[49] = -input[49] + input[14];
+  bf1[50] = -input[50] + input[13];
+  bf1[51] = -input[51] + input[12];
+  bf1[52] = -input[52] + input[11];
+  bf1[53] = -input[53] + input[10];
+  bf1[54] = -input[54] + input[9];
+  bf1[55] = -input[55] + input[8];
+  bf1[56] = -input[56] + input[7];
+  bf1[57] = -input[57] + input[6];
+  bf1[58] = -input[58] + input[5];
+  bf1[59] = -input[59] + input[4];
+  bf1[60] = -input[60] + input[3];
+  bf1[61] = -input[61] + input[2];
+  bf1[62] = -input[62] + input[1];
+  bf1[63] = -input[63] + input[0];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[31];
+  bf1[1] = bf0[1] + bf0[30];
+  bf1[2] = bf0[2] + bf0[29];
+  bf1[3] = bf0[3] + bf0[28];
+  bf1[4] = bf0[4] + bf0[27];
+  bf1[5] = bf0[5] + bf0[26];
+  bf1[6] = bf0[6] + bf0[25];
+  bf1[7] = bf0[7] + bf0[24];
+  bf1[8] = bf0[8] + bf0[23];
+  bf1[9] = bf0[9] + bf0[22];
+  bf1[10] = bf0[10] + bf0[21];
+  bf1[11] = bf0[11] + bf0[20];
+  bf1[12] = bf0[12] + bf0[19];
+  bf1[13] = bf0[13] + bf0[18];
+  bf1[14] = bf0[14] + bf0[17];
+  bf1[15] = bf0[15] + bf0[16];
+  bf1[16] = -bf0[16] + bf0[15];
+  bf1[17] = -bf0[17] + bf0[14];
+  bf1[18] = -bf0[18] + bf0[13];
+  bf1[19] = -bf0[19] + bf0[12];
+  bf1[20] = -bf0[20] + bf0[11];
+  bf1[21] = -bf0[21] + bf0[10];
+  bf1[22] = -bf0[22] + bf0[9];
+  bf1[23] = -bf0[23] + bf0[8];
+  bf1[24] = -bf0[24] + bf0[7];
+  bf1[25] = -bf0[25] + bf0[6];
+  bf1[26] = -bf0[26] + bf0[5];
+  bf1[27] = -bf0[27] + bf0[4];
+  bf1[28] = -bf0[28] + bf0[3];
+  bf1[29] = -bf0[29] + bf0[2];
+  bf1[30] = -bf0[30] + bf0[1];
+  bf1[31] = -bf0[31] + bf0[0];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = bf0[34];
+  bf1[35] = bf0[35];
+  bf1[36] = bf0[36];
+  bf1[37] = bf0[37];
+  bf1[38] = bf0[38];
+  bf1[39] = bf0[39];
+  bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+  bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+  bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit[stage]);
+  bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit[stage]);
+  bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit[stage]);
+  bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit[stage]);
+  bf1[56] = bf0[56];
+  bf1[57] = bf0[57];
+  bf1[58] = bf0[58];
+  bf1[59] = bf0[59];
+  bf1[60] = bf0[60];
+  bf1[61] = bf0[61];
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[15];
+  bf1[1] = bf0[1] + bf0[14];
+  bf1[2] = bf0[2] + bf0[13];
+  bf1[3] = bf0[3] + bf0[12];
+  bf1[4] = bf0[4] + bf0[11];
+  bf1[5] = bf0[5] + bf0[10];
+  bf1[6] = bf0[6] + bf0[9];
+  bf1[7] = bf0[7] + bf0[8];
+  bf1[8] = -bf0[8] + bf0[7];
+  bf1[9] = -bf0[9] + bf0[6];
+  bf1[10] = -bf0[10] + bf0[5];
+  bf1[11] = -bf0[11] + bf0[4];
+  bf1[12] = -bf0[12] + bf0[3];
+  bf1[13] = -bf0[13] + bf0[2];
+  bf1[14] = -bf0[14] + bf0[1];
+  bf1[15] = -bf0[15] + bf0[0];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[47];
+  bf1[33] = bf0[33] + bf0[46];
+  bf1[34] = bf0[34] + bf0[45];
+  bf1[35] = bf0[35] + bf0[44];
+  bf1[36] = bf0[36] + bf0[43];
+  bf1[37] = bf0[37] + bf0[42];
+  bf1[38] = bf0[38] + bf0[41];
+  bf1[39] = bf0[39] + bf0[40];
+  bf1[40] = -bf0[40] + bf0[39];
+  bf1[41] = -bf0[41] + bf0[38];
+  bf1[42] = -bf0[42] + bf0[37];
+  bf1[43] = -bf0[43] + bf0[36];
+  bf1[44] = -bf0[44] + bf0[35];
+  bf1[45] = -bf0[45] + bf0[34];
+  bf1[46] = -bf0[46] + bf0[33];
+  bf1[47] = -bf0[47] + bf0[32];
+  bf1[48] = -bf0[48] + bf0[63];
+  bf1[49] = -bf0[49] + bf0[62];
+  bf1[50] = -bf0[50] + bf0[61];
+  bf1[51] = -bf0[51] + bf0[60];
+  bf1[52] = -bf0[52] + bf0[59];
+  bf1[53] = -bf0[53] + bf0[58];
+  bf1[54] = -bf0[54] + bf0[57];
+  bf1[55] = -bf0[55] + bf0[56];
+  bf1[56] = bf0[56] + bf0[55];
+  bf1[57] = bf0[57] + bf0[54];
+  bf1[58] = bf0[58] + bf0[53];
+  bf1[59] = bf0[59] + bf0[52];
+  bf1[60] = bf0[60] + bf0[51];
+  bf1[61] = bf0[61] + bf0[50];
+  bf1[62] = bf0[62] + bf0[49];
+  bf1[63] = bf0[63] + bf0[48];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = -bf0[4] + bf0[3];
+  bf1[5] = -bf0[5] + bf0[2];
+  bf1[6] = -bf0[6] + bf0[1];
+  bf1[7] = -bf0[7] + bf0[0];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[23];
+  bf1[17] = bf0[17] + bf0[22];
+  bf1[18] = bf0[18] + bf0[21];
+  bf1[19] = bf0[19] + bf0[20];
+  bf1[20] = -bf0[20] + bf0[19];
+  bf1[21] = -bf0[21] + bf0[18];
+  bf1[22] = -bf0[22] + bf0[17];
+  bf1[23] = -bf0[23] + bf0[16];
+  bf1[24] = -bf0[24] + bf0[31];
+  bf1[25] = -bf0[25] + bf0[30];
+  bf1[26] = -bf0[26] + bf0[29];
+  bf1[27] = -bf0[27] + bf0[28];
+  bf1[28] = bf0[28] + bf0[27];
+  bf1[29] = bf0[29] + bf0[26];
+  bf1[30] = bf0[30] + bf0[25];
+  bf1[31] = bf0[31] + bf0[24];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = bf0[34];
+  bf1[35] = bf0[35];
+  bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+  bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+  bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+  bf1[44] = bf0[44];
+  bf1[45] = bf0[45];
+  bf1[46] = bf0[46];
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = bf0[49];
+  bf1[50] = bf0[50];
+  bf1[51] = bf0[51];
+  bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit[stage]);
+  bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit[stage]);
+  bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit[stage]);
+  bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit[stage]);
+  bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit[stage]);
+  bf1[60] = bf0[60];
+  bf1[61] = bf0[61];
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = -bf0[2] + bf0[1];
+  bf1[3] = -bf0[3] + bf0[0];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = -bf0[10] + bf0[9];
+  bf1[11] = -bf0[11] + bf0[8];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[14] + bf0[13];
+  bf1[15] = bf0[15] + bf0[12];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[39];
+  bf1[33] = bf0[33] + bf0[38];
+  bf1[34] = bf0[34] + bf0[37];
+  bf1[35] = bf0[35] + bf0[36];
+  bf1[36] = -bf0[36] + bf0[35];
+  bf1[37] = -bf0[37] + bf0[34];
+  bf1[38] = -bf0[38] + bf0[33];
+  bf1[39] = -bf0[39] + bf0[32];
+  bf1[40] = -bf0[40] + bf0[47];
+  bf1[41] = -bf0[41] + bf0[46];
+  bf1[42] = -bf0[42] + bf0[45];
+  bf1[43] = -bf0[43] + bf0[44];
+  bf1[44] = bf0[44] + bf0[43];
+  bf1[45] = bf0[45] + bf0[42];
+  bf1[46] = bf0[46] + bf0[41];
+  bf1[47] = bf0[47] + bf0[40];
+  bf1[48] = bf0[48] + bf0[55];
+  bf1[49] = bf0[49] + bf0[54];
+  bf1[50] = bf0[50] + bf0[53];
+  bf1[51] = bf0[51] + bf0[52];
+  bf1[52] = -bf0[52] + bf0[51];
+  bf1[53] = -bf0[53] + bf0[50];
+  bf1[54] = -bf0[54] + bf0[49];
+  bf1[55] = -bf0[55] + bf0[48];
+  bf1[56] = -bf0[56] + bf0[63];
+  bf1[57] = -bf0[57] + bf0[62];
+  bf1[58] = -bf0[58] + bf0[61];
+  bf1[59] = -bf0[59] + bf0[60];
+  bf1[60] = bf0[60] + bf0[59];
+  bf1[61] = bf0[61] + bf0[58];
+  bf1[62] = bf0[62] + bf0[57];
+  bf1[63] = bf0[63] + bf0[56];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = -bf0[5] + bf0[4];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[7] + bf0[6];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[19];
+  bf1[17] = bf0[17] + bf0[18];
+  bf1[18] = -bf0[18] + bf0[17];
+  bf1[19] = -bf0[19] + bf0[16];
+  bf1[20] = -bf0[20] + bf0[23];
+  bf1[21] = -bf0[21] + bf0[22];
+  bf1[22] = bf0[22] + bf0[21];
+  bf1[23] = bf0[23] + bf0[20];
+  bf1[24] = bf0[24] + bf0[27];
+  bf1[25] = bf0[25] + bf0[26];
+  bf1[26] = -bf0[26] + bf0[25];
+  bf1[27] = -bf0[27] + bf0[24];
+  bf1[28] = -bf0[28] + bf0[31];
+  bf1[29] = -bf0[29] + bf0[30];
+  bf1[30] = bf0[30] + bf0[29];
+  bf1[31] = bf0[31] + bf0[28];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+  bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+  bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+  bf1[38] = bf0[38];
+  bf1[39] = bf0[39];
+  bf1[40] = bf0[40];
+  bf1[41] = bf0[41];
+  bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+  bf1[46] = bf0[46];
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = bf0[49];
+  bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit[stage]);
+  bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit[stage]);
+  bf1[54] = bf0[54];
+  bf1[55] = bf0[55];
+  bf1[56] = bf0[56];
+  bf1[57] = bf0[57];
+  bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit[stage]);
+  bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit[stage]);
+  bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit[stage]);
+  bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit[stage]);
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = -bf0[9] + bf0[8];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[11] + bf0[10];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = -bf0[13] + bf0[12];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[15] + bf0[14];
+  bf1[16] = bf0[16];
+  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[35];
+  bf1[33] = bf0[33] + bf0[34];
+  bf1[34] = -bf0[34] + bf0[33];
+  bf1[35] = -bf0[35] + bf0[32];
+  bf1[36] = -bf0[36] + bf0[39];
+  bf1[37] = -bf0[37] + bf0[38];
+  bf1[38] = bf0[38] + bf0[37];
+  bf1[39] = bf0[39] + bf0[36];
+  bf1[40] = bf0[40] + bf0[43];
+  bf1[41] = bf0[41] + bf0[42];
+  bf1[42] = -bf0[42] + bf0[41];
+  bf1[43] = -bf0[43] + bf0[40];
+  bf1[44] = -bf0[44] + bf0[47];
+  bf1[45] = -bf0[45] + bf0[46];
+  bf1[46] = bf0[46] + bf0[45];
+  bf1[47] = bf0[47] + bf0[44];
+  bf1[48] = bf0[48] + bf0[51];
+  bf1[49] = bf0[49] + bf0[50];
+  bf1[50] = -bf0[50] + bf0[49];
+  bf1[51] = -bf0[51] + bf0[48];
+  bf1[52] = -bf0[52] + bf0[55];
+  bf1[53] = -bf0[53] + bf0[54];
+  bf1[54] = bf0[54] + bf0[53];
+  bf1[55] = bf0[55] + bf0[52];
+  bf1[56] = bf0[56] + bf0[59];
+  bf1[57] = bf0[57] + bf0[58];
+  bf1[58] = -bf0[58] + bf0[57];
+  bf1[59] = -bf0[59] + bf0[56];
+  bf1[60] = -bf0[60] + bf0[63];
+  bf1[61] = -bf0[61] + bf0[62];
+  bf1[62] = bf0[62] + bf0[61];
+  bf1[63] = bf0[63] + bf0[60];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+  bf1[16] = bf0[16] + bf0[17];
+  bf1[17] = -bf0[17] + bf0[16];
+  bf1[18] = -bf0[18] + bf0[19];
+  bf1[19] = bf0[19] + bf0[18];
+  bf1[20] = bf0[20] + bf0[21];
+  bf1[21] = -bf0[21] + bf0[20];
+  bf1[22] = -bf0[22] + bf0[23];
+  bf1[23] = bf0[23] + bf0[22];
+  bf1[24] = bf0[24] + bf0[25];
+  bf1[25] = -bf0[25] + bf0[24];
+  bf1[26] = -bf0[26] + bf0[27];
+  bf1[27] = bf0[27] + bf0[26];
+  bf1[28] = bf0[28] + bf0[29];
+  bf1[29] = -bf0[29] + bf0[28];
+  bf1[30] = -bf0[30] + bf0[31];
+  bf1[31] = bf0[31] + bf0[30];
+  bf1[32] = bf0[32];
+  bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+  bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+  bf1[35] = bf0[35];
+  bf1[36] = bf0[36];
+  bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+  bf1[39] = bf0[39];
+  bf1[40] = bf0[40];
+  bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+  bf1[43] = bf0[43];
+  bf1[44] = bf0[44];
+  bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit[stage]);
+  bf1[51] = bf0[51];
+  bf1[52] = bf0[52];
+  bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit[stage]);
+  bf1[55] = bf0[55];
+  bf1[56] = bf0[56];
+  bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit[stage]);
+  bf1[59] = bf0[59];
+  bf1[60] = bf0[60];
+  bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit[stage]);
+  bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit[stage]);
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+  bf1[32] = bf0[32] + bf0[33];
+  bf1[33] = -bf0[33] + bf0[32];
+  bf1[34] = -bf0[34] + bf0[35];
+  bf1[35] = bf0[35] + bf0[34];
+  bf1[36] = bf0[36] + bf0[37];
+  bf1[37] = -bf0[37] + bf0[36];
+  bf1[38] = -bf0[38] + bf0[39];
+  bf1[39] = bf0[39] + bf0[38];
+  bf1[40] = bf0[40] + bf0[41];
+  bf1[41] = -bf0[41] + bf0[40];
+  bf1[42] = -bf0[42] + bf0[43];
+  bf1[43] = bf0[43] + bf0[42];
+  bf1[44] = bf0[44] + bf0[45];
+  bf1[45] = -bf0[45] + bf0[44];
+  bf1[46] = -bf0[46] + bf0[47];
+  bf1[47] = bf0[47] + bf0[46];
+  bf1[48] = bf0[48] + bf0[49];
+  bf1[49] = -bf0[49] + bf0[48];
+  bf1[50] = -bf0[50] + bf0[51];
+  bf1[51] = bf0[51] + bf0[50];
+  bf1[52] = bf0[52] + bf0[53];
+  bf1[53] = -bf0[53] + bf0[52];
+  bf1[54] = -bf0[54] + bf0[55];
+  bf1[55] = bf0[55] + bf0[54];
+  bf1[56] = bf0[56] + bf0[57];
+  bf1[57] = -bf0[57] + bf0[56];
+  bf1[58] = -bf0[58] + bf0[59];
+  bf1[59] = bf0[59] + bf0[58];
+  bf1[60] = bf0[60] + bf0[61];
+  bf1[61] = -bf0[61] + bf0[60];
+  bf1[62] = -bf0[62] + bf0[63];
+  bf1[63] = bf0[63] + bf0[62];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 10
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = bf0[26];
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit[stage]);
+  bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit[stage]);
+  bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit[stage]);
+  bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit[stage]);
+  bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit[stage]);
+  bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit[stage]);
+  bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit[stage]);
+  bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit[stage]);
+  bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit[stage]);
+  bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit[stage]);
+  bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit[stage]);
+  bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit[stage]);
+  bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit[stage]);
+  bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit[stage]);
+  bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit[stage]);
+  bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit[stage]);
+  bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit[stage]);
+  bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit[stage]);
+  bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 11
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[32];
+  bf1[2] = bf0[16];
+  bf1[3] = bf0[48];
+  bf1[4] = bf0[8];
+  bf1[5] = bf0[40];
+  bf1[6] = bf0[24];
+  bf1[7] = bf0[56];
+  bf1[8] = bf0[4];
+  bf1[9] = bf0[36];
+  bf1[10] = bf0[20];
+  bf1[11] = bf0[52];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[44];
+  bf1[14] = bf0[28];
+  bf1[15] = bf0[60];
+  bf1[16] = bf0[2];
+  bf1[17] = bf0[34];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[50];
+  bf1[20] = bf0[10];
+  bf1[21] = bf0[42];
+  bf1[22] = bf0[26];
+  bf1[23] = bf0[58];
+  bf1[24] = bf0[6];
+  bf1[25] = bf0[38];
+  bf1[26] = bf0[22];
+  bf1[27] = bf0[54];
+  bf1[28] = bf0[14];
+  bf1[29] = bf0[46];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[62];
+  bf1[32] = bf0[1];
+  bf1[33] = bf0[33];
+  bf1[34] = bf0[17];
+  bf1[35] = bf0[49];
+  bf1[36] = bf0[9];
+  bf1[37] = bf0[41];
+  bf1[38] = bf0[25];
+  bf1[39] = bf0[57];
+  bf1[40] = bf0[5];
+  bf1[41] = bf0[37];
+  bf1[42] = bf0[21];
+  bf1[43] = bf0[53];
+  bf1[44] = bf0[13];
+  bf1[45] = bf0[45];
+  bf1[46] = bf0[29];
+  bf1[47] = bf0[61];
+  bf1[48] = bf0[3];
+  bf1[49] = bf0[35];
+  bf1[50] = bf0[19];
+  bf1[51] = bf0[51];
+  bf1[52] = bf0[11];
+  bf1[53] = bf0[43];
+  bf1[54] = bf0[27];
+  bf1[55] = bf0[59];
+  bf1[56] = bf0[7];
+  bf1[57] = bf0[39];
+  bf1[58] = bf0[23];
+  bf1[59] = bf0[55];
+  bf1[60] = bf0[15];
+  bf1[61] = bf0[47];
+  bf1[62] = bf0[31];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif  // CONFIG_TX64X64
diff --git a/av1/common/av1_inv_txfm1d.c b/av1/common/av1_inv_txfm1d.c
index 2e9bbcb..40d8403 100644
--- a/av1/common/av1_inv_txfm1d.c
+++ b/av1/common/av1_inv_txfm1d.c
@@ -40,6 +40,7 @@
   }
 #endif
 
+// TODO(angiebird): Make 1-d txfm functions static
 void av1_idct4_new(const int32_t *input, int32_t *output, const int8_t *cos_bit,
                    const int8_t *stage_range) {
   const int32_t size = 4;
@@ -1535,3 +1536,798 @@
   bf1[31] = bf0[0];
   range_check(stage, input, bf1, size, stage_range[stage]);
 }
+
+#if CONFIG_TX64X64
+void av1_idct64_new(const int32_t *input, int32_t *output,
+                    const int8_t *cos_bit, const int8_t *stage_range) {
+  const int32_t size = 64;
+  const int32_t *cospi;
+
+  int32_t stage = 0;
+  int32_t *bf0, *bf1;
+  int32_t step[64];
+
+  // stage 0;
+  range_check(stage, input, input, size, stage_range[stage]);
+
+  // stage 1;
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf1 = output;
+  bf1[0] = input[0];
+  bf1[1] = input[32];
+  bf1[2] = input[16];
+  bf1[3] = input[48];
+  bf1[4] = input[8];
+  bf1[5] = input[40];
+  bf1[6] = input[24];
+  bf1[7] = input[56];
+  bf1[8] = input[4];
+  bf1[9] = input[36];
+  bf1[10] = input[20];
+  bf1[11] = input[52];
+  bf1[12] = input[12];
+  bf1[13] = input[44];
+  bf1[14] = input[28];
+  bf1[15] = input[60];
+  bf1[16] = input[2];
+  bf1[17] = input[34];
+  bf1[18] = input[18];
+  bf1[19] = input[50];
+  bf1[20] = input[10];
+  bf1[21] = input[42];
+  bf1[22] = input[26];
+  bf1[23] = input[58];
+  bf1[24] = input[6];
+  bf1[25] = input[38];
+  bf1[26] = input[22];
+  bf1[27] = input[54];
+  bf1[28] = input[14];
+  bf1[29] = input[46];
+  bf1[30] = input[30];
+  bf1[31] = input[62];
+  bf1[32] = input[1];
+  bf1[33] = input[33];
+  bf1[34] = input[17];
+  bf1[35] = input[49];
+  bf1[36] = input[9];
+  bf1[37] = input[41];
+  bf1[38] = input[25];
+  bf1[39] = input[57];
+  bf1[40] = input[5];
+  bf1[41] = input[37];
+  bf1[42] = input[21];
+  bf1[43] = input[53];
+  bf1[44] = input[13];
+  bf1[45] = input[45];
+  bf1[46] = input[29];
+  bf1[47] = input[61];
+  bf1[48] = input[3];
+  bf1[49] = input[35];
+  bf1[50] = input[19];
+  bf1[51] = input[51];
+  bf1[52] = input[11];
+  bf1[53] = input[43];
+  bf1[54] = input[27];
+  bf1[55] = input[59];
+  bf1[56] = input[7];
+  bf1[57] = input[39];
+  bf1[58] = input[23];
+  bf1[59] = input[55];
+  bf1[60] = input[15];
+  bf1[61] = input[47];
+  bf1[62] = input[31];
+  bf1[63] = input[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 2
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = bf0[21];
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = bf0[26];
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = half_btf(cospi[63], bf0[32], -cospi[1], bf0[63], cos_bit[stage]);
+  bf1[33] = half_btf(cospi[31], bf0[33], -cospi[33], bf0[62], cos_bit[stage]);
+  bf1[34] = half_btf(cospi[47], bf0[34], -cospi[17], bf0[61], cos_bit[stage]);
+  bf1[35] = half_btf(cospi[15], bf0[35], -cospi[49], bf0[60], cos_bit[stage]);
+  bf1[36] = half_btf(cospi[55], bf0[36], -cospi[9], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(cospi[23], bf0[37], -cospi[41], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(cospi[39], bf0[38], -cospi[25], bf0[57], cos_bit[stage]);
+  bf1[39] = half_btf(cospi[7], bf0[39], -cospi[57], bf0[56], cos_bit[stage]);
+  bf1[40] = half_btf(cospi[59], bf0[40], -cospi[5], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(cospi[27], bf0[41], -cospi[37], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(cospi[43], bf0[42], -cospi[21], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(cospi[11], bf0[43], -cospi[53], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(cospi[51], bf0[44], -cospi[13], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(cospi[19], bf0[45], -cospi[45], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(cospi[35], bf0[46], -cospi[29], bf0[49], cos_bit[stage]);
+  bf1[47] = half_btf(cospi[3], bf0[47], -cospi[61], bf0[48], cos_bit[stage]);
+  bf1[48] = half_btf(cospi[61], bf0[47], cospi[3], bf0[48], cos_bit[stage]);
+  bf1[49] = half_btf(cospi[29], bf0[46], cospi[35], bf0[49], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[45], bf0[45], cospi[19], bf0[50], cos_bit[stage]);
+  bf1[51] = half_btf(cospi[13], bf0[44], cospi[51], bf0[51], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[53], bf0[43], cospi[11], bf0[52], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[21], bf0[42], cospi[43], bf0[53], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[37], bf0[41], cospi[27], bf0[54], cos_bit[stage]);
+  bf1[55] = half_btf(cospi[5], bf0[40], cospi[59], bf0[55], cos_bit[stage]);
+  bf1[56] = half_btf(cospi[57], bf0[39], cospi[7], bf0[56], cos_bit[stage]);
+  bf1[57] = half_btf(cospi[25], bf0[38], cospi[39], bf0[57], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[41], bf0[37], cospi[23], bf0[58], cos_bit[stage]);
+  bf1[59] = half_btf(cospi[9], bf0[36], cospi[55], bf0[59], cos_bit[stage]);
+  bf1[60] = half_btf(cospi[49], bf0[35], cospi[15], bf0[60], cos_bit[stage]);
+  bf1[61] = half_btf(cospi[17], bf0[34], cospi[47], bf0[61], cos_bit[stage]);
+  bf1[62] = half_btf(cospi[33], bf0[33], cospi[31], bf0[62], cos_bit[stage]);
+  bf1[63] = half_btf(cospi[1], bf0[32], cospi[63], bf0[63], cos_bit[stage]);
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 3
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = bf0[10];
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = bf0[13];
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+  bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+  bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+  bf1[32] = bf0[32] + bf0[33];
+  bf1[33] = bf0[32] - bf0[33];
+  bf1[34] = -bf0[34] + bf0[35];
+  bf1[35] = bf0[34] + bf0[35];
+  bf1[36] = bf0[36] + bf0[37];
+  bf1[37] = bf0[36] - bf0[37];
+  bf1[38] = -bf0[38] + bf0[39];
+  bf1[39] = bf0[38] + bf0[39];
+  bf1[40] = bf0[40] + bf0[41];
+  bf1[41] = bf0[40] - bf0[41];
+  bf1[42] = -bf0[42] + bf0[43];
+  bf1[43] = bf0[42] + bf0[43];
+  bf1[44] = bf0[44] + bf0[45];
+  bf1[45] = bf0[44] - bf0[45];
+  bf1[46] = -bf0[46] + bf0[47];
+  bf1[47] = bf0[46] + bf0[47];
+  bf1[48] = bf0[48] + bf0[49];
+  bf1[49] = bf0[48] - bf0[49];
+  bf1[50] = -bf0[50] + bf0[51];
+  bf1[51] = bf0[50] + bf0[51];
+  bf1[52] = bf0[52] + bf0[53];
+  bf1[53] = bf0[52] - bf0[53];
+  bf1[54] = -bf0[54] + bf0[55];
+  bf1[55] = bf0[54] + bf0[55];
+  bf1[56] = bf0[56] + bf0[57];
+  bf1[57] = bf0[56] - bf0[57];
+  bf1[58] = -bf0[58] + bf0[59];
+  bf1[59] = bf0[58] + bf0[59];
+  bf1[60] = bf0[60] + bf0[61];
+  bf1[61] = bf0[60] - bf0[61];
+  bf1[62] = -bf0[62] + bf0[63];
+  bf1[63] = bf0[62] + bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 4
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = bf0[5];
+  bf1[6] = bf0[6];
+  bf1[7] = bf0[7];
+  bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+  bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+  bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+  bf1[16] = bf0[16] + bf0[17];
+  bf1[17] = bf0[16] - bf0[17];
+  bf1[18] = -bf0[18] + bf0[19];
+  bf1[19] = bf0[18] + bf0[19];
+  bf1[20] = bf0[20] + bf0[21];
+  bf1[21] = bf0[20] - bf0[21];
+  bf1[22] = -bf0[22] + bf0[23];
+  bf1[23] = bf0[22] + bf0[23];
+  bf1[24] = bf0[24] + bf0[25];
+  bf1[25] = bf0[24] - bf0[25];
+  bf1[26] = -bf0[26] + bf0[27];
+  bf1[27] = bf0[26] + bf0[27];
+  bf1[28] = bf0[28] + bf0[29];
+  bf1[29] = bf0[28] - bf0[29];
+  bf1[30] = -bf0[30] + bf0[31];
+  bf1[31] = bf0[30] + bf0[31];
+  bf1[32] = bf0[32];
+  bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit[stage]);
+  bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit[stage]);
+  bf1[35] = bf0[35];
+  bf1[36] = bf0[36];
+  bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit[stage]);
+  bf1[39] = bf0[39];
+  bf1[40] = bf0[40];
+  bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit[stage]);
+  bf1[43] = bf0[43];
+  bf1[44] = bf0[44];
+  bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit[stage]);
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = half_btf(-cospi[52], bf0[46], cospi[12], bf0[49], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[12], bf0[45], cospi[52], bf0[50], cos_bit[stage]);
+  bf1[51] = bf0[51];
+  bf1[52] = bf0[52];
+  bf1[53] = half_btf(-cospi[20], bf0[42], cospi[44], bf0[53], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[44], bf0[41], cospi[20], bf0[54], cos_bit[stage]);
+  bf1[55] = bf0[55];
+  bf1[56] = bf0[56];
+  bf1[57] = half_btf(-cospi[36], bf0[38], cospi[28], bf0[57], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[28], bf0[37], cospi[36], bf0[58], cos_bit[stage]);
+  bf1[59] = bf0[59];
+  bf1[60] = bf0[60];
+  bf1[61] = half_btf(-cospi[4], bf0[34], cospi[60], bf0[61], cos_bit[stage]);
+  bf1[62] = half_btf(cospi[60], bf0[33], cospi[4], bf0[62], cos_bit[stage]);
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 5
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0];
+  bf1[1] = bf0[1];
+  bf1[2] = bf0[2];
+  bf1[3] = bf0[3];
+  bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+  bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+  bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+  bf1[8] = bf0[8] + bf0[9];
+  bf1[9] = bf0[8] - bf0[9];
+  bf1[10] = -bf0[10] + bf0[11];
+  bf1[11] = bf0[10] + bf0[11];
+  bf1[12] = bf0[12] + bf0[13];
+  bf1[13] = bf0[12] - bf0[13];
+  bf1[14] = -bf0[14] + bf0[15];
+  bf1[15] = bf0[14] + bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+  bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+  bf1[19] = bf0[19];
+  bf1[20] = bf0[20];
+  bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+  bf1[27] = bf0[27];
+  bf1[28] = bf0[28];
+  bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+  bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[35];
+  bf1[33] = bf0[33] + bf0[34];
+  bf1[34] = bf0[33] - bf0[34];
+  bf1[35] = bf0[32] - bf0[35];
+  bf1[36] = -bf0[36] + bf0[39];
+  bf1[37] = -bf0[37] + bf0[38];
+  bf1[38] = bf0[37] + bf0[38];
+  bf1[39] = bf0[36] + bf0[39];
+  bf1[40] = bf0[40] + bf0[43];
+  bf1[41] = bf0[41] + bf0[42];
+  bf1[42] = bf0[41] - bf0[42];
+  bf1[43] = bf0[40] - bf0[43];
+  bf1[44] = -bf0[44] + bf0[47];
+  bf1[45] = -bf0[45] + bf0[46];
+  bf1[46] = bf0[45] + bf0[46];
+  bf1[47] = bf0[44] + bf0[47];
+  bf1[48] = bf0[48] + bf0[51];
+  bf1[49] = bf0[49] + bf0[50];
+  bf1[50] = bf0[49] - bf0[50];
+  bf1[51] = bf0[48] - bf0[51];
+  bf1[52] = -bf0[52] + bf0[55];
+  bf1[53] = -bf0[53] + bf0[54];
+  bf1[54] = bf0[53] + bf0[54];
+  bf1[55] = bf0[52] + bf0[55];
+  bf1[56] = bf0[56] + bf0[59];
+  bf1[57] = bf0[57] + bf0[58];
+  bf1[58] = bf0[57] - bf0[58];
+  bf1[59] = bf0[56] - bf0[59];
+  bf1[60] = -bf0[60] + bf0[63];
+  bf1[61] = -bf0[61] + bf0[62];
+  bf1[62] = bf0[61] + bf0[62];
+  bf1[63] = bf0[60] + bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 6
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+  bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+  bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+  bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+  bf1[4] = bf0[4] + bf0[5];
+  bf1[5] = bf0[4] - bf0[5];
+  bf1[6] = -bf0[6] + bf0[7];
+  bf1[7] = bf0[6] + bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+  bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+  bf1[11] = bf0[11];
+  bf1[12] = bf0[12];
+  bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+  bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[19];
+  bf1[17] = bf0[17] + bf0[18];
+  bf1[18] = bf0[17] - bf0[18];
+  bf1[19] = bf0[16] - bf0[19];
+  bf1[20] = -bf0[20] + bf0[23];
+  bf1[21] = -bf0[21] + bf0[22];
+  bf1[22] = bf0[21] + bf0[22];
+  bf1[23] = bf0[20] + bf0[23];
+  bf1[24] = bf0[24] + bf0[27];
+  bf1[25] = bf0[25] + bf0[26];
+  bf1[26] = bf0[25] - bf0[26];
+  bf1[27] = bf0[24] - bf0[27];
+  bf1[28] = -bf0[28] + bf0[31];
+  bf1[29] = -bf0[29] + bf0[30];
+  bf1[30] = bf0[29] + bf0[30];
+  bf1[31] = bf0[28] + bf0[31];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit[stage]);
+  bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit[stage]);
+  bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit[stage]);
+  bf1[38] = bf0[38];
+  bf1[39] = bf0[39];
+  bf1[40] = bf0[40];
+  bf1[41] = bf0[41];
+  bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit[stage]);
+  bf1[46] = bf0[46];
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = bf0[49];
+  bf1[50] = half_btf(-cospi[40], bf0[45], cospi[24], bf0[50], cos_bit[stage]);
+  bf1[51] = half_btf(-cospi[40], bf0[44], cospi[24], bf0[51], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[24], bf0[43], cospi[40], bf0[52], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[24], bf0[42], cospi[40], bf0[53], cos_bit[stage]);
+  bf1[54] = bf0[54];
+  bf1[55] = bf0[55];
+  bf1[56] = bf0[56];
+  bf1[57] = bf0[57];
+  bf1[58] = half_btf(-cospi[8], bf0[37], cospi[56], bf0[58], cos_bit[stage]);
+  bf1[59] = half_btf(-cospi[8], bf0[36], cospi[56], bf0[59], cos_bit[stage]);
+  bf1[60] = half_btf(cospi[56], bf0[35], cospi[8], bf0[60], cos_bit[stage]);
+  bf1[61] = half_btf(cospi[56], bf0[34], cospi[8], bf0[61], cos_bit[stage]);
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 7
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[3];
+  bf1[1] = bf0[1] + bf0[2];
+  bf1[2] = bf0[1] - bf0[2];
+  bf1[3] = bf0[0] - bf0[3];
+  bf1[4] = bf0[4];
+  bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+  bf1[7] = bf0[7];
+  bf1[8] = bf0[8] + bf0[11];
+  bf1[9] = bf0[9] + bf0[10];
+  bf1[10] = bf0[9] - bf0[10];
+  bf1[11] = bf0[8] - bf0[11];
+  bf1[12] = -bf0[12] + bf0[15];
+  bf1[13] = -bf0[13] + bf0[14];
+  bf1[14] = bf0[13] + bf0[14];
+  bf1[15] = bf0[12] + bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+  bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+  bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+  bf1[22] = bf0[22];
+  bf1[23] = bf0[23];
+  bf1[24] = bf0[24];
+  bf1[25] = bf0[25];
+  bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+  bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+  bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[39];
+  bf1[33] = bf0[33] + bf0[38];
+  bf1[34] = bf0[34] + bf0[37];
+  bf1[35] = bf0[35] + bf0[36];
+  bf1[36] = bf0[35] - bf0[36];
+  bf1[37] = bf0[34] - bf0[37];
+  bf1[38] = bf0[33] - bf0[38];
+  bf1[39] = bf0[32] - bf0[39];
+  bf1[40] = -bf0[40] + bf0[47];
+  bf1[41] = -bf0[41] + bf0[46];
+  bf1[42] = -bf0[42] + bf0[45];
+  bf1[43] = -bf0[43] + bf0[44];
+  bf1[44] = bf0[43] + bf0[44];
+  bf1[45] = bf0[42] + bf0[45];
+  bf1[46] = bf0[41] + bf0[46];
+  bf1[47] = bf0[40] + bf0[47];
+  bf1[48] = bf0[48] + bf0[55];
+  bf1[49] = bf0[49] + bf0[54];
+  bf1[50] = bf0[50] + bf0[53];
+  bf1[51] = bf0[51] + bf0[52];
+  bf1[52] = bf0[51] - bf0[52];
+  bf1[53] = bf0[50] - bf0[53];
+  bf1[54] = bf0[49] - bf0[54];
+  bf1[55] = bf0[48] - bf0[55];
+  bf1[56] = -bf0[56] + bf0[63];
+  bf1[57] = -bf0[57] + bf0[62];
+  bf1[58] = -bf0[58] + bf0[61];
+  bf1[59] = -bf0[59] + bf0[60];
+  bf1[60] = bf0[59] + bf0[60];
+  bf1[61] = bf0[58] + bf0[61];
+  bf1[62] = bf0[57] + bf0[62];
+  bf1[63] = bf0[56] + bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 8
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[7];
+  bf1[1] = bf0[1] + bf0[6];
+  bf1[2] = bf0[2] + bf0[5];
+  bf1[3] = bf0[3] + bf0[4];
+  bf1[4] = bf0[3] - bf0[4];
+  bf1[5] = bf0[2] - bf0[5];
+  bf1[6] = bf0[1] - bf0[6];
+  bf1[7] = bf0[0] - bf0[7];
+  bf1[8] = bf0[8];
+  bf1[9] = bf0[9];
+  bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+  bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+  bf1[14] = bf0[14];
+  bf1[15] = bf0[15];
+  bf1[16] = bf0[16] + bf0[23];
+  bf1[17] = bf0[17] + bf0[22];
+  bf1[18] = bf0[18] + bf0[21];
+  bf1[19] = bf0[19] + bf0[20];
+  bf1[20] = bf0[19] - bf0[20];
+  bf1[21] = bf0[18] - bf0[21];
+  bf1[22] = bf0[17] - bf0[22];
+  bf1[23] = bf0[16] - bf0[23];
+  bf1[24] = -bf0[24] + bf0[31];
+  bf1[25] = -bf0[25] + bf0[30];
+  bf1[26] = -bf0[26] + bf0[29];
+  bf1[27] = -bf0[27] + bf0[28];
+  bf1[28] = bf0[27] + bf0[28];
+  bf1[29] = bf0[26] + bf0[29];
+  bf1[30] = bf0[25] + bf0[30];
+  bf1[31] = bf0[24] + bf0[31];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = bf0[34];
+  bf1[35] = bf0[35];
+  bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit[stage]);
+  bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit[stage]);
+  bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit[stage]);
+  bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit[stage]);
+  bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit[stage]);
+  bf1[44] = bf0[44];
+  bf1[45] = bf0[45];
+  bf1[46] = bf0[46];
+  bf1[47] = bf0[47];
+  bf1[48] = bf0[48];
+  bf1[49] = bf0[49];
+  bf1[50] = bf0[50];
+  bf1[51] = bf0[51];
+  bf1[52] = half_btf(-cospi[16], bf0[43], cospi[48], bf0[52], cos_bit[stage]);
+  bf1[53] = half_btf(-cospi[16], bf0[42], cospi[48], bf0[53], cos_bit[stage]);
+  bf1[54] = half_btf(-cospi[16], bf0[41], cospi[48], bf0[54], cos_bit[stage]);
+  bf1[55] = half_btf(-cospi[16], bf0[40], cospi[48], bf0[55], cos_bit[stage]);
+  bf1[56] = half_btf(cospi[48], bf0[39], cospi[16], bf0[56], cos_bit[stage]);
+  bf1[57] = half_btf(cospi[48], bf0[38], cospi[16], bf0[57], cos_bit[stage]);
+  bf1[58] = half_btf(cospi[48], bf0[37], cospi[16], bf0[58], cos_bit[stage]);
+  bf1[59] = half_btf(cospi[48], bf0[36], cospi[16], bf0[59], cos_bit[stage]);
+  bf1[60] = bf0[60];
+  bf1[61] = bf0[61];
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 9
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[15];
+  bf1[1] = bf0[1] + bf0[14];
+  bf1[2] = bf0[2] + bf0[13];
+  bf1[3] = bf0[3] + bf0[12];
+  bf1[4] = bf0[4] + bf0[11];
+  bf1[5] = bf0[5] + bf0[10];
+  bf1[6] = bf0[6] + bf0[9];
+  bf1[7] = bf0[7] + bf0[8];
+  bf1[8] = bf0[7] - bf0[8];
+  bf1[9] = bf0[6] - bf0[9];
+  bf1[10] = bf0[5] - bf0[10];
+  bf1[11] = bf0[4] - bf0[11];
+  bf1[12] = bf0[3] - bf0[12];
+  bf1[13] = bf0[2] - bf0[13];
+  bf1[14] = bf0[1] - bf0[14];
+  bf1[15] = bf0[0] - bf0[15];
+  bf1[16] = bf0[16];
+  bf1[17] = bf0[17];
+  bf1[18] = bf0[18];
+  bf1[19] = bf0[19];
+  bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+  bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+  bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+  bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+  bf1[28] = bf0[28];
+  bf1[29] = bf0[29];
+  bf1[30] = bf0[30];
+  bf1[31] = bf0[31];
+  bf1[32] = bf0[32] + bf0[47];
+  bf1[33] = bf0[33] + bf0[46];
+  bf1[34] = bf0[34] + bf0[45];
+  bf1[35] = bf0[35] + bf0[44];
+  bf1[36] = bf0[36] + bf0[43];
+  bf1[37] = bf0[37] + bf0[42];
+  bf1[38] = bf0[38] + bf0[41];
+  bf1[39] = bf0[39] + bf0[40];
+  bf1[40] = bf0[39] - bf0[40];
+  bf1[41] = bf0[38] - bf0[41];
+  bf1[42] = bf0[37] - bf0[42];
+  bf1[43] = bf0[36] - bf0[43];
+  bf1[44] = bf0[35] - bf0[44];
+  bf1[45] = bf0[34] - bf0[45];
+  bf1[46] = bf0[33] - bf0[46];
+  bf1[47] = bf0[32] - bf0[47];
+  bf1[48] = -bf0[48] + bf0[63];
+  bf1[49] = -bf0[49] + bf0[62];
+  bf1[50] = -bf0[50] + bf0[61];
+  bf1[51] = -bf0[51] + bf0[60];
+  bf1[52] = -bf0[52] + bf0[59];
+  bf1[53] = -bf0[53] + bf0[58];
+  bf1[54] = -bf0[54] + bf0[57];
+  bf1[55] = -bf0[55] + bf0[56];
+  bf1[56] = bf0[55] + bf0[56];
+  bf1[57] = bf0[54] + bf0[57];
+  bf1[58] = bf0[53] + bf0[58];
+  bf1[59] = bf0[52] + bf0[59];
+  bf1[60] = bf0[51] + bf0[60];
+  bf1[61] = bf0[50] + bf0[61];
+  bf1[62] = bf0[49] + bf0[62];
+  bf1[63] = bf0[48] + bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 10
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = output;
+  bf1 = step;
+  bf1[0] = bf0[0] + bf0[31];
+  bf1[1] = bf0[1] + bf0[30];
+  bf1[2] = bf0[2] + bf0[29];
+  bf1[3] = bf0[3] + bf0[28];
+  bf1[4] = bf0[4] + bf0[27];
+  bf1[5] = bf0[5] + bf0[26];
+  bf1[6] = bf0[6] + bf0[25];
+  bf1[7] = bf0[7] + bf0[24];
+  bf1[8] = bf0[8] + bf0[23];
+  bf1[9] = bf0[9] + bf0[22];
+  bf1[10] = bf0[10] + bf0[21];
+  bf1[11] = bf0[11] + bf0[20];
+  bf1[12] = bf0[12] + bf0[19];
+  bf1[13] = bf0[13] + bf0[18];
+  bf1[14] = bf0[14] + bf0[17];
+  bf1[15] = bf0[15] + bf0[16];
+  bf1[16] = bf0[15] - bf0[16];
+  bf1[17] = bf0[14] - bf0[17];
+  bf1[18] = bf0[13] - bf0[18];
+  bf1[19] = bf0[12] - bf0[19];
+  bf1[20] = bf0[11] - bf0[20];
+  bf1[21] = bf0[10] - bf0[21];
+  bf1[22] = bf0[9] - bf0[22];
+  bf1[23] = bf0[8] - bf0[23];
+  bf1[24] = bf0[7] - bf0[24];
+  bf1[25] = bf0[6] - bf0[25];
+  bf1[26] = bf0[5] - bf0[26];
+  bf1[27] = bf0[4] - bf0[27];
+  bf1[28] = bf0[3] - bf0[28];
+  bf1[29] = bf0[2] - bf0[29];
+  bf1[30] = bf0[1] - bf0[30];
+  bf1[31] = bf0[0] - bf0[31];
+  bf1[32] = bf0[32];
+  bf1[33] = bf0[33];
+  bf1[34] = bf0[34];
+  bf1[35] = bf0[35];
+  bf1[36] = bf0[36];
+  bf1[37] = bf0[37];
+  bf1[38] = bf0[38];
+  bf1[39] = bf0[39];
+  bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+  bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+  bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+  bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+  bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+  bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+  bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+  bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+  bf1[48] = half_btf(cospi[32], bf0[47], cospi[32], bf0[48], cos_bit[stage]);
+  bf1[49] = half_btf(cospi[32], bf0[46], cospi[32], bf0[49], cos_bit[stage]);
+  bf1[50] = half_btf(cospi[32], bf0[45], cospi[32], bf0[50], cos_bit[stage]);
+  bf1[51] = half_btf(cospi[32], bf0[44], cospi[32], bf0[51], cos_bit[stage]);
+  bf1[52] = half_btf(cospi[32], bf0[43], cospi[32], bf0[52], cos_bit[stage]);
+  bf1[53] = half_btf(cospi[32], bf0[42], cospi[32], bf0[53], cos_bit[stage]);
+  bf1[54] = half_btf(cospi[32], bf0[41], cospi[32], bf0[54], cos_bit[stage]);
+  bf1[55] = half_btf(cospi[32], bf0[40], cospi[32], bf0[55], cos_bit[stage]);
+  bf1[56] = bf0[56];
+  bf1[57] = bf0[57];
+  bf1[58] = bf0[58];
+  bf1[59] = bf0[59];
+  bf1[60] = bf0[60];
+  bf1[61] = bf0[61];
+  bf1[62] = bf0[62];
+  bf1[63] = bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+
+  // stage 11
+  stage++;
+  cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+  bf0 = step;
+  bf1 = output;
+  bf1[0] = bf0[0] + bf0[63];
+  bf1[1] = bf0[1] + bf0[62];
+  bf1[2] = bf0[2] + bf0[61];
+  bf1[3] = bf0[3] + bf0[60];
+  bf1[4] = bf0[4] + bf0[59];
+  bf1[5] = bf0[5] + bf0[58];
+  bf1[6] = bf0[6] + bf0[57];
+  bf1[7] = bf0[7] + bf0[56];
+  bf1[8] = bf0[8] + bf0[55];
+  bf1[9] = bf0[9] + bf0[54];
+  bf1[10] = bf0[10] + bf0[53];
+  bf1[11] = bf0[11] + bf0[52];
+  bf1[12] = bf0[12] + bf0[51];
+  bf1[13] = bf0[13] + bf0[50];
+  bf1[14] = bf0[14] + bf0[49];
+  bf1[15] = bf0[15] + bf0[48];
+  bf1[16] = bf0[16] + bf0[47];
+  bf1[17] = bf0[17] + bf0[46];
+  bf1[18] = bf0[18] + bf0[45];
+  bf1[19] = bf0[19] + bf0[44];
+  bf1[20] = bf0[20] + bf0[43];
+  bf1[21] = bf0[21] + bf0[42];
+  bf1[22] = bf0[22] + bf0[41];
+  bf1[23] = bf0[23] + bf0[40];
+  bf1[24] = bf0[24] + bf0[39];
+  bf1[25] = bf0[25] + bf0[38];
+  bf1[26] = bf0[26] + bf0[37];
+  bf1[27] = bf0[27] + bf0[36];
+  bf1[28] = bf0[28] + bf0[35];
+  bf1[29] = bf0[29] + bf0[34];
+  bf1[30] = bf0[30] + bf0[33];
+  bf1[31] = bf0[31] + bf0[32];
+  bf1[32] = bf0[31] - bf0[32];
+  bf1[33] = bf0[30] - bf0[33];
+  bf1[34] = bf0[29] - bf0[34];
+  bf1[35] = bf0[28] - bf0[35];
+  bf1[36] = bf0[27] - bf0[36];
+  bf1[37] = bf0[26] - bf0[37];
+  bf1[38] = bf0[25] - bf0[38];
+  bf1[39] = bf0[24] - bf0[39];
+  bf1[40] = bf0[23] - bf0[40];
+  bf1[41] = bf0[22] - bf0[41];
+  bf1[42] = bf0[21] - bf0[42];
+  bf1[43] = bf0[20] - bf0[43];
+  bf1[44] = bf0[19] - bf0[44];
+  bf1[45] = bf0[18] - bf0[45];
+  bf1[46] = bf0[17] - bf0[46];
+  bf1[47] = bf0[16] - bf0[47];
+  bf1[48] = bf0[15] - bf0[48];
+  bf1[49] = bf0[14] - bf0[49];
+  bf1[50] = bf0[13] - bf0[50];
+  bf1[51] = bf0[12] - bf0[51];
+  bf1[52] = bf0[11] - bf0[52];
+  bf1[53] = bf0[10] - bf0[53];
+  bf1[54] = bf0[9] - bf0[54];
+  bf1[55] = bf0[8] - bf0[55];
+  bf1[56] = bf0[7] - bf0[56];
+  bf1[57] = bf0[6] - bf0[57];
+  bf1[58] = bf0[5] - bf0[58];
+  bf1[59] = bf0[4] - bf0[59];
+  bf1[60] = bf0[3] - bf0[60];
+  bf1[61] = bf0[2] - bf0[61];
+  bf1[62] = bf0[1] - bf0[62];
+  bf1[63] = bf0[0] - bf0[63];
+  range_check(stage, input, bf1, size, stage_range[stage]);
+}
+#endif  // CONFIG_TX64X64
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index b16f512..cc990ea 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -357,7 +357,7 @@
   FRAME_CONTEXT *fc;
 
   /* pointers to reference frames */
-  RefBuffer *block_refs[2];
+  const RefBuffer *block_refs[2];
 
   /* pointer to current frame */
   const YV12_BUFFER_CONFIG *cur_buf;
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 3368ea2..23e0409 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1489,7 +1489,6 @@
 
 #if CONFIG_SUPERTX
   for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
-    int j;
     for (j = 1; j < TX_SIZES; ++j) {
       fc->supertx_prob[i][j] = av1_mode_mv_merge_probs(
           pre_fc->supertx_prob[i][j], counts->supertx[i][j]);
@@ -1583,7 +1582,6 @@
     }
     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
       if (use_intra_ext_tx_for_txsize[s][i]) {
-        int j;
         for (j = 0; j < INTRA_MODES; ++j)
           aom_tree_merge_probs(
               av1_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j],
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index f45f3db..c8022f2 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -889,42 +889,44 @@
       break;
     default:
       for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
-        const int shift_y = shift_32_y[idx_32];
-        const int shift_uv = shift_32_uv[idx_32];
+        const int shift_y_32 = shift_32_y[idx_32];
+        const int shift_uv_32 = shift_32_uv[idx_32];
         const int mi_32_col_offset = ((idx_32 & 1) << 2);
         const int mi_32_row_offset = ((idx_32 >> 1) << 2);
         if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
           continue;
         switch (mip[0]->mbmi.sb_type) {
           case BLOCK_32X32:
-            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
             break;
-          case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+          case BLOCK_32X16:
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
 #if CONFIG_SUPERTX
             if (supertx_enabled(&mip[0]->mbmi)) break;
 #endif
             if (mi_32_row_offset + 2 >= max_rows) continue;
             mip2 = mip + mode_info_stride * 2;
-            build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
+            build_masks(lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4, lfm);
             break;
-          case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+          case BLOCK_16X32:
+            build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
 #if CONFIG_SUPERTX
             if (supertx_enabled(&mip[0]->mbmi)) break;
 #endif
             if (mi_32_col_offset + 2 >= max_cols) continue;
             mip2 = mip + 2;
-            build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
+            build_masks(lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1, lfm);
             break;
           default:
 #if CONFIG_SUPERTX
             if (mip[0]->mbmi.tx_size == TX_32X32) {
-              build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+              build_masks(lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
               break;
             }
 #endif
             for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
-              const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
-              const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
+              const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
+              const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
               const int mi_16_col_offset =
                   mi_32_col_offset + ((idx_16 & 1) << 1);
               const int mi_16_row_offset =
@@ -935,16 +937,18 @@
 
               switch (mip[0]->mbmi.sb_type) {
                 case BLOCK_16X16:
-                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+                              lfm);
                   break;
                 case BLOCK_16X8:
 #if CONFIG_SUPERTX
                   if (supertx_enabled(&mip[0]->mbmi)) break;
 #endif
-                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+                              lfm);
                   if (mi_16_row_offset + 1 >= max_rows) continue;
                   mip2 = mip + mode_info_stride;
-                  build_y_mask(lfi_n, mip2[0], shift_y + 8,
+                  build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 8,
 #if CONFIG_SUPERTX
                                0,
 #endif
@@ -954,29 +958,31 @@
 #if CONFIG_SUPERTX
                   if (supertx_enabled(&mip[0]->mbmi)) break;
 #endif
-                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y_32_16, shift_uv_32_16,
+                              lfm);
                   if (mi_16_col_offset + 1 >= max_cols) continue;
                   mip2 = mip + 1;
-                  build_y_mask(lfi_n, mip2[0], shift_y + 1,
+                  build_y_mask(lfi_n, mip2[0], shift_y_32_16 + 1,
 #if CONFIG_SUPERTX
                                0,
 #endif
                                lfm);
                   break;
                 default: {
-                  const int shift_y =
-                      shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
+                  const int shift_y_32_16_8_zero = shift_y_32_16 + shift_8_y[0];
 #if CONFIG_SUPERTX
                   if (mip[0]->mbmi.tx_size == TX_16X16) {
-                    build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                    build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
+                                shift_uv_32_16, lfm);
                     break;
                   }
 #endif
-                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y_32_16_8_zero,
+                              shift_uv_32_16, lfm);
                   mip += offset[0];
                   for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
-                    const int shift_y = shift_32_y[idx_32] +
-                                        shift_16_y[idx_16] + shift_8_y[idx_8];
+                    const int shift_y_32_16_8 =
+                        shift_y_32_16 + shift_8_y[idx_8];
                     const int mi_8_col_offset =
                         mi_16_col_offset + ((idx_8 & 1));
                     const int mi_8_row_offset =
@@ -985,7 +991,7 @@
                     if (mi_8_col_offset >= max_cols ||
                         mi_8_row_offset >= max_rows)
                       continue;
-                    build_y_mask(lfi_n, mip[0], shift_y,
+                    build_y_mask(lfi_n, mip[0], shift_y_32_16_8,
 #if CONFIG_SUPERTX
                                  supertx_enabled(&mip[0]->mbmi),
 #endif
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 4fdeefe..2344bc1 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -763,8 +763,8 @@
   *near_mv = mvlist[1];
 }
 
-void av1_append_sub8x8_mvs_for_idx(AV1_COMMON *cm, MACROBLOCKD *xd, int block,
-                                   int ref, int mi_row, int mi_col,
+void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                   int block, int ref, int mi_row, int mi_col,
 #if CONFIG_REF_MV
                                    CANDIDATE_MV *ref_mv_stack,
                                    uint8_t *ref_mv_count,
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index b5e7094..25ebbfd 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -465,8 +465,8 @@
 void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
                            int_mv *near_mv);
 
-void av1_append_sub8x8_mvs_for_idx(AV1_COMMON *cm, MACROBLOCKD *xd, int block,
-                                   int ref, int mi_row, int mi_col,
+void av1_append_sub8x8_mvs_for_idx(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                   int block, int ref, int mi_row, int mi_col,
 #if CONFIG_REF_MV
                                    CANDIDATE_MV *ref_mv_stack,
                                    uint8_t *ref_mv_count,
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index 8927f26..6b0a3d5 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -20,7 +20,7 @@
 extern "C" {
 #endif
 
-static INLINE int get_segment_id(const AV1_COMMON *cm,
+static INLINE int get_segment_id(const AV1_COMMON *const cm,
                                  const uint8_t *segment_ids, BLOCK_SIZE bsize,
                                  int mi_row, int mi_col) {
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index ae57a43..c0fc494 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -1170,7 +1170,7 @@
 // top/left neighboring blocks' inter predictors with the regular inter
 // prediction. We assume the original prediction (bmc) is stored in
 // xd->plane[].dst.buf
-void av1_build_obmc_inter_prediction(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                      int mi_row, int mi_col,
                                      uint8_t *above[MAX_MB_PLANE],
                                      int above_stride[MAX_MB_PLANE],
@@ -1281,7 +1281,7 @@
 }
 #endif  // CONFIG_EXT_INTER
 
-void av1_build_prediction_by_above_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                          int mi_row, int mi_col,
                                          uint8_t *tmp_buf[MAX_MB_PLANE],
                                          int tmp_width[MAX_MB_PLANE],
@@ -1319,8 +1319,8 @@
                        pd->subsampling_y);
     }
     for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
-      MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
-      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+      const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
 
       xd->block_refs[ref] = ref_buf;
       if ((!av1_is_valid_scale(&ref_buf->sf)))
@@ -1378,7 +1378,7 @@
   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
 }
 
-void av1_build_prediction_by_left_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                         int mi_row, int mi_col,
                                         uint8_t *tmp_buf[MAX_MB_PLANE],
                                         int tmp_width[MAX_MB_PLANE],
@@ -1416,8 +1416,8 @@
                        pd->subsampling_y);
     }
     for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
-      MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
-      RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+      const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+      const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
 
       xd->block_refs[ref] = ref_buf;
       if ((!av1_is_valid_scale(&ref_buf->sf)))
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 4f86354..bfa7e95 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -517,19 +517,19 @@
 
 #if CONFIG_MOTION_VAR
 const uint8_t *av1_get_obmc_mask(int length);
-void av1_build_obmc_inter_prediction(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                      int mi_row, int mi_col,
                                      uint8_t *above[MAX_MB_PLANE],
                                      int above_stride[MAX_MB_PLANE],
                                      uint8_t *left[MAX_MB_PLANE],
                                      int left_stride[MAX_MB_PLANE]);
-void av1_build_prediction_by_above_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                          int mi_row, int mi_col,
                                          uint8_t *tmp_buf[MAX_MB_PLANE],
                                          int tmp_width[MAX_MB_PLANE],
                                          int tmp_height[MAX_MB_PLANE],
                                          int tmp_stride[MAX_MB_PLANE]);
-void av1_build_prediction_by_left_preds(AV1_COMMON *cm, MACROBLOCKD *xd,
+void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                         int mi_row, int mi_col,
                                         uint8_t *tmp_buf[MAX_MB_PLANE],
                                         int tmp_width[MAX_MB_PLANE],
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index d6bd87d..483b0b2 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -233,6 +233,14 @@
   const int w = AOMMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
   const int step = 1 << txsz;
 
+  // TODO(bshacklett, huisu): Currently the RD loop traverses 4X8 blocks in
+  // inverted N order while in the bitstream the subblocks are stored in Z
+  // order. This discrepancy makes this function incorrect when considering 4X8
+  // blocks in the RD loop, so we disable the extended right edge for these
+  // blocks. The correct solution is to change the bitstream to store these
+  // blocks in inverted N order, and then update this function appropriately.
+  if (bsize == BLOCK_4X8 && y == 1) return 0;
+
   if (!right_available) {
     return 0;
   } else {
@@ -702,7 +710,7 @@
                                          const uint8_t *above,
                                          const uint8_t *left, int mode) {
   int k, r, c;
-  int pred[33][65];
+  int preds[33][65];
   int mean, ipred;
   const TX_SIZE tx_size =
       (bs == 32) ? TX_32X32
@@ -721,20 +729,20 @@
   }
   mean = (mean + bs) / (2 * bs);
 
-  for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+  for (r = 0; r < bs; ++r) preds[r + 1][0] = (int)left[r] - mean;
 
-  for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+  for (c = 0; c < 2 * bs + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
 
   for (r = 1; r < bs + 1; ++r)
     for (c = 1; c < 2 * bs + 1 - r; ++c) {
-      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
-              c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
-      pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+      ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
+              c2 * preds[r - 1][c - 1] + c3 * preds[r - 1][c + 1];
+      preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
     }
 
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
-      ipred = pred[r + 1][c + 1] + mean;
+      ipred = preds[r + 1][c + 1] + mean;
       dst[c] = clip_pixel(ipred);
     }
     dst += stride;
@@ -997,7 +1005,7 @@
                                                 const uint16_t *left, int mode,
                                                 int bd) {
   int k, r, c;
-  int pred[33][65];
+  int preds[33][65];
   int mean, ipred;
   const TX_SIZE tx_size =
       (bs == 32) ? TX_32X32
@@ -1016,20 +1024,20 @@
   }
   mean = (mean + bs) / (2 * bs);
 
-  for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+  for (r = 0; r < bs; ++r) preds[r + 1][0] = (int)left[r] - mean;
 
-  for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+  for (c = 0; c < 2 * bs + 1; ++c) preds[0][c] = (int)above[c - 1] - mean;
 
   for (r = 1; r < bs + 1; ++r)
     for (c = 1; c < 2 * bs + 1 - r; ++c) {
-      ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
-              c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
-      pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+      ipred = c0 * preds[r - 1][c] + c1 * preds[r][c - 1] +
+              c2 * preds[r - 1][c - 1] + c3 * preds[r - 1][c + 1];
+      preds[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
     }
 
   for (r = 0; r < bs; ++r) {
     for (c = 0; c < bs; ++c) {
-      ipred = pred[r + 1][c + 1] + mean;
+      ipred = preds[r + 1][c + 1] + mean;
       dst[c] = clip_pixel_highbd(ipred, bd);
     }
     dst += stride;
@@ -1188,8 +1196,6 @@
   }
 
   if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
-    EXT_INTRA_MODE ext_intra_mode =
-        ext_intra_mode_info->ext_intra_mode[plane != 0];
     need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
     need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
   }
@@ -1202,7 +1208,6 @@
   assert(n_bottomleft_px >= 0);
 
   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
-    int i;
     const int val = (n_left_px == 0) ? base + 1 : base - 1;
     for (i = 0; i < bs; ++i) {
       aom_memset16(dst, val, bs);
@@ -1351,8 +1356,6 @@
   }
 
   if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
-    EXT_INTRA_MODE ext_intra_mode =
-        ext_intra_mode_info->ext_intra_mode[plane != 0];
     need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
     need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
   }
@@ -1373,7 +1376,6 @@
   assert(n_bottomleft_px >= 0);
 
   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
-    int i;
     const int val = (n_left_px == 0) ? 129 : 127;
     for (i = 0; i < bs; ++i) {
       memset(dst, val, bs);
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 3be4ed2..919fe8c 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -3799,7 +3799,7 @@
 };
 #endif  // CONFIG_EXT_TX
 
-const scan_order av1_default_scan_orders[TX_SIZES] = {
+const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
   { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
   { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
   { default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
@@ -3807,7 +3807,7 @@
 };
 
 #if CONFIG_EXT_TX
-const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
   {
       // TX_4X4
       { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3894,7 +3894,7 @@
   }
 };
 
-const scan_order av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
   {
       // TX_4X4
       { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -4135,7 +4135,7 @@
 
 #else   // CONFIG_EXT_TX
 
-const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
   { // TX_4X4
     { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
     { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
diff --git a/av1/common/scan.h b/av1/common/scan.h
index d3032aa..c183ba9 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -28,10 +28,10 @@
   const int16_t *scan;
   const int16_t *iscan;
   const int16_t *neighbors;
-} scan_order;
+} SCAN_ORDER;
 
-extern const scan_order av1_default_scan_orders[TX_SIZES];
-extern const scan_order av1_intra_scan_orders[TX_SIZES][TX_TYPES];
+extern const SCAN_ORDER av1_default_scan_orders[TX_SIZES];
+extern const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES];
 
 static INLINE int get_coef_context(const int16_t *neighbors,
                                    const uint8_t *token_cache, int c) {
@@ -40,21 +40,21 @@
          1;
 }
 
-static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+static INLINE const SCAN_ORDER *get_intra_scan(TX_SIZE tx_size,
                                                TX_TYPE tx_type) {
   return &av1_intra_scan_orders[tx_size][tx_type];
 }
 
 #if CONFIG_EXT_TX
-extern const scan_order av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
+extern const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
 
-static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+static INLINE const SCAN_ORDER *get_inter_scan(TX_SIZE tx_size,
                                                TX_TYPE tx_type) {
   return &av1_inter_scan_orders[tx_size][tx_type];
 }
 #endif  // CONFIG_EXT_TX
 
-static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
+static INLINE const SCAN_ORDER *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
                                          int is_inter) {
 #if CONFIG_EXT_TX
   return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
diff --git a/av1/common/x86/av1_inv_txfm_sse2.c b/av1/common/x86/av1_inv_txfm_sse2.c
index 4fe709e..365c124 100644
--- a/av1/common/x86/av1_inv_txfm_sse2.c
+++ b/av1/common/x86/av1_inv_txfm_sse2.c
@@ -2388,7 +2388,6 @@
 #define IDCT32_34                                                              \
   /* Stage1 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero);                   \
     const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero);                   \
                                                                                \
@@ -2413,7 +2412,6 @@
                                                                                \
   /* Stage2 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero);                   \
     const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero);                   \
                                                                                \
@@ -2440,7 +2438,6 @@
                                                                                \
   /* Stage3 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero);                   \
     const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero);                   \
                                                                                \
@@ -2481,7 +2478,6 @@
                                                                                \
   /* Stage4 */                                                                 \
   {                                                                            \
-    const __m128i zero = _mm_setzero_si128();                                  \
     const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero);                   \
     const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero);                   \
                                                                                \
@@ -3018,6 +3014,7 @@
 // Only upper-left 8x8 has non-zero coeff
 void av1_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
                                int stride) {
+  const __m128i zero = _mm_setzero_si128();
   const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
   const __m128i final_rounding = _mm_set1_epi16(1 << 5);
 
@@ -3123,7 +3120,6 @@
   col[31] = _mm_sub_epi16(stp1_0, stp1_31);
   for (i = 0; i < 4; i++) {
     int j;
-    const __m128i zero = _mm_setzero_si128();
     // Transpose 32x8 block to 8x32 block
     array_transpose_8x8(col + i * 8, in);
     IDCT32_34
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 81a4692..6744572 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -296,9 +296,9 @@
 
   if (!mbmi->skip) {
     TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
-    const scan_order *sc = get_scan(tx_size, tx_type, 0);
-    const int eob = av1_decode_block_tokens(xd, plane, sc, col, row, tx_size,
-                                            tx_type, r, mbmi->segment_id);
+    const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 0);
+    const int eob = av1_decode_block_tokens(
+        xd, plane, scan_order, col, row, tx_size, tx_type, r, mbmi->segment_id);
     inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
                             eob);
   }
@@ -330,7 +330,7 @@
   if (tx_size == plane_tx_size) {
     PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
     TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
-    const scan_order *sc = get_scan(plane_tx_size, tx_type, 1);
+    const SCAN_ORDER *sc = get_scan(plane_tx_size, tx_type, 1);
     const int eob =
         av1_decode_block_tokens(xd, plane, sc, blk_col, blk_row, plane_tx_size,
                                 tx_type, r, mbmi->segment_id);
@@ -373,9 +373,9 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   int block_idx = (row << 1) + col;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
-  const scan_order *sc = get_scan(tx_size, tx_type, 1);
-  const int eob = av1_decode_block_tokens(xd, plane, sc, col, row, tx_size,
-                                          tx_type, r, segment_id);
+  const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 1);
+  const int eob = av1_decode_block_tokens(xd, plane, scan_order, col, row,
+                                          tx_size, tx_type, r, segment_id);
 
   inverse_transform_block(xd, plane, tx_type, tx_size,
                           &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
@@ -418,12 +418,9 @@
   // passing bsize from decode_partition().
   xd->mi[0]->mbmi.sb_type = bsize;
   for (y = 0; y < y_mis; ++y)
-    for (x = !y; x < x_mis; ++x) {
-      xd->mi[y * cm->mi_stride + x] = xd->mi[0];
-    }
+    for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
 
   set_plane_n4(xd, bw, bh, bwl, bhl);
-
   set_skip_context(xd, mi_row, mi_col);
 
 #if CONFIG_VAR_TX
@@ -1311,8 +1308,8 @@
         const BLOCK_SIZE plane_bsize =
             get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
         const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
-        int bw = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
-        int bh = num_4x4_blocks_high_txsize_lookup[max_tx_size];
+        const int bw_var_tx = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
+        const int bh_var_tx = num_4x4_blocks_high_txsize_lookup[max_tx_size];
         const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
         int block = 0;
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -1336,8 +1333,8 @@
                                                   plane, row, col, tx_size);
         } else {
 #endif
-          for (row = 0; row < num_4x4_h; row += bh) {
-            for (col = 0; col < num_4x4_w; col += bw) {
+          for (row = 0; row < num_4x4_h; row += bh_var_tx) {
+            for (col = 0; col < num_4x4_w; col += bw_var_tx) {
               decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block, row,
                                     col, max_tx_size, &eobtotal);
               block += step;
@@ -3481,16 +3478,13 @@
 
   setup_segmentation(cm, rb);
 
-  {
-    int i;
-    for (i = 0; i < MAX_SEGMENTS; ++i) {
-      const int qindex = cm->seg.enabled
-                             ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
-                             : cm->base_qindex;
-      xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
-                        cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
-      xd->qindex[i] = qindex;
-    }
+  for (i = 0; i < MAX_SEGMENTS; ++i) {
+    const int qindex = cm->seg.enabled
+                           ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
+                           : cm->base_qindex;
+    xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
+                      cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+    xd->qindex[i] = qindex;
   }
 
   setup_segmentation_dequant(cm);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index e19b3e3..66056c0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1512,7 +1512,6 @@
     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
   } else {
-    int ref;
     int_mv ref_mv[2];
     ref_mv[0] = nearestmv[0];
     ref_mv[1] = nearestmv[1];
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index fa235b6..1bd4c0d 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -367,7 +367,7 @@
 #endif  // CONFIG_PALETTE
 
 int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
-                            const scan_order *sc, int x, int y, TX_SIZE tx_size,
+                            const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
                             TX_TYPE tx_type,
 #if CONFIG_ANS
                             struct AnsDecoder *const r,
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
index dc96cf5..9c08ff9 100644
--- a/av1/decoder/detokenize.h
+++ b/av1/decoder/detokenize.h
@@ -27,7 +27,7 @@
 #endif  // CONFIG_PALETTE
 
 int av1_decode_block_tokens(MACROBLOCKD *const xd, int plane,
-                            const scan_order *sc, int x, int y, TX_SIZE tx_size,
+                            const SCAN_ORDER *sc, int x, int y, TX_SIZE tx_size,
                             TX_TYPE tx_type,
 #if CONFIG_ANS
                             struct AnsDecoder *const r,
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
index 3c9c92f..5c4a5e3 100644
--- a/av1/encoder/aq_complexity.c
+++ b/av1/encoder/aq_complexity.c
@@ -111,9 +111,9 @@
 // Select a segment for the current block.
 // The choice of segment for a block depends on the ratio of the projected
 // bits for the block vs a target average and its spatial complexity.
-void av1_caq_select_segment(AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
+void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
                             int mi_row, int mi_col, int projected_rate) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
 
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
   const int xmis = AOMMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
diff --git a/av1/encoder/aq_complexity.h b/av1/encoder/aq_complexity.h
index 1d966ac..af525b3 100644
--- a/av1/encoder/aq_complexity.h
+++ b/av1/encoder/aq_complexity.h
@@ -22,7 +22,7 @@
 struct macroblock;
 
 // Select a segment for the current Block.
-void av1_caq_select_segment(struct AV1_COMP *cpi, struct macroblock *,
+void av1_caq_select_segment(const struct AV1_COMP *cpi, struct macroblock *,
                             BLOCK_SIZE bs, int mi_row, int mi_col,
                             int projected_rate);
 
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index e0e4b88..bcf11a7 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -209,7 +209,7 @@
 // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
-void av1_cyclic_refresh_update_segment(AV1_COMP *const cpi,
+void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi,
                                        MB_MODE_INFO *const mbmi, int mi_row,
                                        int mi_col, BLOCK_SIZE bsize,
                                        int64_t rate, int64_t dist, int skip) {
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index cdc9815..459ab80 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -49,7 +49,7 @@
 // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
 // check if we should reset the segment_id, and update the cyclic_refresh map
 // and segmentation map.
-void av1_cyclic_refresh_update_segment(struct AV1_COMP *const cpi,
+void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
                                        MB_MODE_INFO *const mbmi, int mi_row,
                                        int mi_col, BLOCK_SIZE bsize,
                                        int64_t rate, int64_t dist, int skip);
diff --git a/av1/encoder/aq_variance.c b/av1/encoder/aq_variance.c
index 1f5554e..01528ec 100644
--- a/av1/encoder/aq_variance.c
+++ b/av1/encoder/aq_variance.c
@@ -141,7 +141,7 @@
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-static unsigned int block_variance(AV1_COMP *cpi, MACROBLOCK *x,
+static unsigned int block_variance(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    BLOCK_SIZE bs) {
   MACROBLOCKD *xd = &x->e_mbd;
   unsigned int var, sse;
@@ -189,14 +189,14 @@
   }
 }
 
-double av1_log_block_var(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+double av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
   unsigned int var = block_variance(cpi, x, bs);
   aom_clear_system_state();
   return log(var + 1.0);
 }
 
 #define DEFAULT_E_MIDPOINT 10.0
-int av1_block_energy(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+int av1_block_energy(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
   double energy;
   double energy_midpoint;
   aom_clear_system_state();
diff --git a/av1/encoder/aq_variance.h b/av1/encoder/aq_variance.h
index 4900aa7..05725c5 100644
--- a/av1/encoder/aq_variance.h
+++ b/av1/encoder/aq_variance.h
@@ -21,8 +21,8 @@
 unsigned int av1_vaq_segment_id(int energy);
 void av1_vaq_frame_setup(AV1_COMP *cpi);
 
-int av1_block_energy(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
-double av1_log_block_var(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+int av1_block_energy(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+double av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f1e8828..948c4f5 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -664,91 +664,92 @@
 #endif
 
   while (p < stop && p->token != EOSB_TOKEN) {
-    const int t = p->token;
+    const int token = p->token;
+    aom_tree_index index = 0;
 #if !CONFIG_ANS
-    const struct av1_token *const a = &av1_coef_encodings[t];
-    int v = a->value;
-    int n = a->len;
+    const struct av1_token *const coef_encoding = &av1_coef_encodings[token];
+    int coef_value = coef_encoding->value;
+    int coef_length = coef_encoding->len;
 #endif  // !CONFIG_ANS
 #if CONFIG_AOM_HIGHBITDEPTH
-    const av1_extra_bit *b;
-    if (bit_depth == AOM_BITS_12)
-      b = &av1_extra_bits_high12[t];
-    else if (bit_depth == AOM_BITS_10)
-      b = &av1_extra_bits_high10[t];
-    else
-      b = &av1_extra_bits[t];
+    const av1_extra_bit *const extra_bits_av1 =
+        (bit_depth == AOM_BITS_12)
+            ? &av1_extra_bits_high12[token]
+            : (bit_depth == AOM_BITS_10) ? &av1_extra_bits_high10[token]
+                                         : &av1_extra_bits[token];
 #else
-    const av1_extra_bit *const b = &av1_extra_bits[t];
+    const av1_extra_bit *const extra_bits_av1 = &av1_extra_bits[token];
     (void)bit_depth;
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
 #if CONFIG_ANS
     /* skip one or two nodes */
-    if (!p->skip_eob_node) aom_write(w, t != EOB_TOKEN, p->context_tree[0]);
+    if (!p->skip_eob_node) aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
 
-    if (t != EOB_TOKEN) {
-      aom_write(w, t != ZERO_TOKEN, p->context_tree[1]);
+    if (token != EOB_TOKEN) {
+      aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
 
-      if (t != ZERO_TOKEN) {
-        aom_write_symbol(w, t - ONE_TOKEN, *p->token_cdf,
+      if (token != ZERO_TOKEN) {
+        aom_write_symbol(w, token - ONE_TOKEN, *p->token_cdf,
                          CATEGORY6_TOKEN - ONE_TOKEN + 1);
       }
     }
 #else
     /* skip one or two nodes */
     if (p->skip_eob_node)
-      n -= p->skip_eob_node;
+      coef_length -= p->skip_eob_node;
     else
-      aom_write(w, t != EOB_TOKEN, p->context_tree[0]);
+      aom_write(w, token != EOB_TOKEN, p->context_tree[0]);
 
-    if (t != EOB_TOKEN) {
-      aom_write(w, t != ZERO_TOKEN, p->context_tree[1]);
+    if (token != EOB_TOKEN) {
+      aom_write(w, token != ZERO_TOKEN, p->context_tree[1]);
 
-      if (t != ZERO_TOKEN) {
-        aom_write(w, t != ONE_TOKEN, p->context_tree[2]);
+      if (token != ZERO_TOKEN) {
+        aom_write(w, token != ONE_TOKEN, p->context_tree[2]);
 
-        if (t != ONE_TOKEN) {
-          int len = UNCONSTRAINED_NODES - p->skip_eob_node;
+        if (token != ONE_TOKEN) {
+          const int unconstrained_len = UNCONSTRAINED_NODES - p->skip_eob_node;
           aom_write_tree(w, av1_coef_con_tree,
-                         av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1], v,
-                         n - len, 0);
+                         av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
+                         coef_value, coef_length - unconstrained_len, 0);
         }
       }
     }
 #endif  // CONFIG_ANS
 
-    if (b->base_val) {
-      const int e = p->extra, l = b->len;
-      int skip_bits = (b->base_val == CAT6_MIN_VAL)
+    if (extra_bits_av1->base_val) {
+      const int extra_bits = p->extra;
+      const int extra_bits_av1_length = extra_bits_av1->len;
+      int skip_bits = (extra_bits_av1->base_val == CAT6_MIN_VAL)
                           ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
                           : 0;
 
-      if (l) {
-        const unsigned char *pb = b->prob;
-        int v = e >> 1;
-        int n = l; /* number of bits in v, assumed nonzero */
-        int i = 0;
+      if (extra_bits_av1_length) {
+        const unsigned char *pb = extra_bits_av1->prob;
+        const int value = extra_bits >> 1;
+        int num_bits = extra_bits_av1_length;  // number of bits in value
+        assert(num_bits > 0);
 
+        index = 0;
         do {
-          const int bb = (v >> --n) & 1;
+          const int bb = (value >> --num_bits) & 1;
           if (skip_bits) {
-            skip_bits--;
+            --skip_bits;
             assert(!bb);
           } else {
-            aom_write(w, bb, pb[i >> 1]);
+            aom_write(w, bb, pb[index >> 1]);
           }
-          i = b->tree[i + bb];
-        } while (n);
+          index = extra_bits_av1->tree[index + bb];
+        } while (num_bits);
       }
 
-      aom_write_bit(w, e & 1);
+      aom_write_bit(w, extra_bits & 1);
     }
     ++p;
 
 #if CONFIG_VAR_TX
     ++count;
-    if (t == EOB_TOKEN || count == seg_eob) break;
+    if (token == EOB_TOKEN || count == seg_eob) break;
 #endif
   }
 
@@ -1701,9 +1702,9 @@
 #endif
         const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
         const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-        int bw = num_4x4_blocks_wide_lookup[txb_size];
         int block = 0;
         const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+        bw = num_4x4_blocks_wide_lookup[txb_size];
         for (row = 0; row < num_4x4_h; row += bw) {
           for (col = 0; col < num_4x4_w; col += bw) {
             pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
@@ -1715,8 +1716,8 @@
         TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
                            : m->mbmi.tx_size;
         BLOCK_SIZE txb_size = txsize_to_bsize[tx];
-        int bw = num_4x4_blocks_wide_lookup[txb_size];
-        int bh = num_4x4_blocks_high_lookup[txb_size];
+        bw = num_4x4_blocks_wide_lookup[txb_size];
+        bh = num_4x4_blocks_high_lookup[txb_size];
 
         for (row = 0; row < num_4x4_h; row += bh)
           for (col = 0; col < num_4x4_w; col += bw)
@@ -2096,7 +2097,6 @@
               for (t = 0; t < entropy_nodes_update; ++t) {
                 aom_prob newp = new_coef_probs[i][j][k][l][t];
                 aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
-                const aom_prob upd = DIFF_UPDATE_PROB;
                 int s;
                 int u = 0;
                 if (t == PIVOT_NODE)
@@ -2300,7 +2300,6 @@
               for (t = 0; t < entropy_nodes_update; ++t) {
                 aom_prob newp = new_coef_probs[i][j][k][l][t];
                 aom_prob *oldp = old_coef_probs[i][j][k][l] + t;
-                const aom_prob upd = DIFF_UPDATE_PROB;
                 int s;
                 int u = 0;
 
@@ -2423,8 +2422,6 @@
 #if CONFIG_ENTROPY
       if (cm->do_subframe_update &&
           cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
-        unsigned int eob_counts_copy[PLANE_TYPES][REF_TYPES][COEF_BANDS]
-                                    [COEFF_CONTEXTS];
         av1_coeff_count coef_counts_copy[PLANE_TYPES];
         av1_copy(eob_counts_copy, cpi->common.counts.eob_branch[tx_size]);
         av1_copy(coef_counts_copy, cpi->td.rd_counts.coef_counts[tx_size]);
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index a4dcba2..310325e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -26,7 +26,7 @@
   unsigned int sse;
   int sum;
   unsigned int var;
-} diff;
+} DIFF;
 
 typedef struct macroblock_plane {
   DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
index 855b140..63b71a5 100644
--- a/av1/encoder/dct.c
+++ b/av1/encoder/dct.c
@@ -1498,7 +1498,6 @@
     tran_high_t t0, t1, t2, t3;                  // needs32
     tran_high_t x0, x1, x2, x3;                  // canbe16
 
-    int i;
     for (i = 0; i < 8; i++) {
       // stage 1
       s0 = (input[0 * stride] + input[7 * stride]) * 4;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 41a71bb..004ad68 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -60,17 +60,18 @@
 #define IF_HBD(...)
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-static void encode_superblock(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                              RUN_TYPE dry_run, int mi_row, int mi_col,
-                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                              int *rate);
+static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
+                              TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+                              int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx, int *rate);
 
 #if CONFIG_SUPERTX
 static int check_intra_b(PICK_MODE_CONTEXT *ctx);
 
-static int check_intra_sb(AV1_COMP *cpi, const TileInfo *const tile, int mi_row,
-                          int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree);
-static void predict_superblock(AV1_COMP *cpi, ThreadData *td,
+static int check_intra_sb(const AV1_COMP *cpi, const TileInfo *const tile,
+                          int mi_row, int mi_col, BLOCK_SIZE bsize,
+                          PC_TREE *pc_tree);
+static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_EXT_INTER
                                int mi_row_ori, int mi_col_ori,
 #endif  // CONFIG_EXT_INTER
@@ -78,17 +79,17 @@
                                BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
 static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
                             PC_TREE *pc_tree);
-static void predict_sb_complex(AV1_COMP *cpi, ThreadData *td,
+static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
                                const TileInfo *const tile, int mi_row,
                                int mi_col, int mi_row_ori, int mi_col_ori,
                                RUN_TYPE dry_run, BLOCK_SIZE bsize,
                                BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
                                int dst_stride[3], PC_TREE *pc_tree);
-static void update_state_sb_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
                                     const TileInfo *const tile, int mi_row,
                                     int mi_col, BLOCK_SIZE bsize,
                                     RUN_TYPE dry_run, PC_TREE *pc_tree);
-static void rd_supertx_sb(AV1_COMP *cpi, ThreadData *td,
+static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
                           const TileInfo *const tile, int mi_row, int mi_col,
                           BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
                           TX_TYPE *best_tx, PC_TREE *pc_tree);
@@ -176,7 +177,7 @@
 };
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-unsigned int av1_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
                                            const struct buf_2d *ref,
                                            BLOCK_SIZE bs) {
   unsigned int sse;
@@ -186,7 +187,7 @@
 }
 
 #if CONFIG_AOM_HIGHBITDEPTH
-unsigned int av1_high_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
                                                 const struct buf_2d *ref,
                                                 BLOCK_SIZE bs, int bd) {
   unsigned int var, sse;
@@ -212,7 +213,7 @@
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-static unsigned int get_sby_perpixel_diff_variance(AV1_COMP *cpi,
+static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
                                                    const struct buf_2d *ref,
                                                    int mi_row, int mi_col,
                                                    BLOCK_SIZE bs) {
@@ -243,21 +244,21 @@
 
 // Lighter version of set_offsets that only sets the mode info
 // pointers.
-static void set_mode_info_offsets(AV1_COMP *const cpi, MACROBLOCK *const x,
-                                  MACROBLOCKD *const xd, int mi_row,
-                                  int mi_col) {
-  AV1_COMMON *const cm = &cpi->common;
+static void set_mode_info_offsets(const AV1_COMP *const cpi,
+                                  MACROBLOCK *const x, MACROBLOCKD *const xd,
+                                  int mi_row, int mi_col) {
+  const AV1_COMMON *const cm = &cpi->common;
   const int idx_str = xd->mi_stride * mi_row + mi_col;
   xd->mi = cm->mi_grid_visible + idx_str;
   xd->mi[0] = cm->mi + idx_str;
   x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
 }
 
-static void set_offsets_without_segment_id(AV1_COMP *cpi,
+static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
                                            const TileInfo *const tile,
                                            MACROBLOCK *const x, int mi_row,
                                            int mi_col, BLOCK_SIZE bsize) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -299,10 +300,10 @@
   xd->tile = *tile;
 }
 
-static void set_offsets(AV1_COMP *cpi, const TileInfo *const tile,
+static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
                         MACROBLOCK *const x, int mi_row, int mi_col,
                         BLOCK_SIZE bsize) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -332,11 +333,11 @@
 }
 
 #if CONFIG_SUPERTX
-static void set_offsets_supertx(AV1_COMP *cpi, ThreadData *td,
+static void set_offsets_supertx(const AV1_COMP *const cpi, ThreadData *td,
                                 const TileInfo *const tile, int mi_row,
                                 int mi_col, BLOCK_SIZE bsize) {
   MACROBLOCK *const x = &td->mb;
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
@@ -349,7 +350,7 @@
                  cm->mi_cols);
 }
 
-static void set_offsets_extend(AV1_COMP *cpi, ThreadData *td,
+static void set_offsets_extend(const AV1_COMP *const cpi, ThreadData *td,
                                const TileInfo *const tile, int mi_row_pred,
                                int mi_col_pred, int mi_row_ori, int mi_col_ori,
                                BLOCK_SIZE bsize_pred) {
@@ -357,7 +358,7 @@
   // (mi_row_ori, mi_col_ori, bsize_ori): region for mv
   // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
   MACROBLOCK *const x = &td->mb;
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred];
   const int mi_height = num_8x8_blocks_high_lookup[bsize_pred];
@@ -973,7 +974,7 @@
 }
 
 #if CONFIG_DUAL_FILTER
-static void reset_intmv_filter_type(AV1_COMMON *cm, MACROBLOCKD *xd,
+static void reset_intmv_filter_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
                                     MB_MODE_INFO *mbmi) {
   int dir;
   for (dir = 0; dir < 2; ++dir) {
@@ -1011,11 +1012,11 @@
 }
 #endif  // CONFIG_GLOBAL_MOTION
 
-static void update_state(AV1_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
-                         int mi_row, int mi_col, BLOCK_SIZE bsize,
-                         RUN_TYPE dry_run) {
+static void update_state(const AV1_COMP *const cpi, ThreadData *td,
+                         PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+                         BLOCK_SIZE bsize, RUN_TYPE dry_run) {
   int i, x_idx, y;
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   RD_COUNTS *const rdc = &td->rd_counts;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1132,18 +1133,22 @@
   if (dry_run) return;
 
 #if CONFIG_INTERNAL_STATS
-  if (frame_is_intra_only(cm)) {
-    static const int kf_mode_index[] = {
-      THR_DC /*DC_PRED*/,          THR_V_PRED /*V_PRED*/,
-      THR_H_PRED /*H_PRED*/,       THR_D45_PRED /*D45_PRED*/,
-      THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
-      THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
-      THR_D63_PRED /*D63_PRED*/,   THR_TM /*TM_PRED*/,
-    };
-    ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
-  } else {
-    // Note how often each mode chosen as best
-    ++cpi->mode_chosen_counts[ctx->best_mode_index];
+  {
+    unsigned int *const mode_chosen_counts =
+        (unsigned int *)cpi->mode_chosen_counts;  // Cast const away.
+    if (frame_is_intra_only(cm)) {
+      static const int kf_mode_index[] = {
+        THR_DC /*DC_PRED*/,          THR_V_PRED /*V_PRED*/,
+        THR_H_PRED /*H_PRED*/,       THR_D45_PRED /*D45_PRED*/,
+        THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
+        THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
+        THR_D63_PRED /*D63_PRED*/,   THR_TM /*TM_PRED*/,
+      };
+      ++mode_chosen_counts[kf_mode_index[mbmi->mode]];
+    } else {
+      // Note how often each mode chosen as best
+      ++mode_chosen_counts[ctx->best_mode_index];
+    }
   }
 #endif
   if (!frame_is_intra_only(cm)) {
@@ -1172,8 +1177,8 @@
 #if CONFIG_DUAL_FILTER
         update_filter_type_count(td->counts, xd, mbmi);
 #else
-        const int ctx = av1_get_pred_context_switchable_interp(xd);
-        ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+        const int switchable_ctx = av1_get_pred_context_switchable_interp(xd);
+        ++td->counts->switchable_interp[switchable_ctx][mbmi->interp_filter];
 #endif
       }
     }
@@ -1196,14 +1201,14 @@
 }
 
 #if CONFIG_SUPERTX
-static void update_state_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td,
                                  PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
                                  BLOCK_SIZE bsize, RUN_TYPE dry_run) {
   int y, x_idx;
 #if CONFIG_VAR_TX || CONFIG_REF_MV
   int i;
 #endif
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   RD_COUNTS *const rdc = &td->rd_counts;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1341,11 +1346,11 @@
   }
 }
 
-static void update_state_sb_supertx(AV1_COMP *cpi, ThreadData *td,
+static void update_state_sb_supertx(const AV1_COMP *const cpi, ThreadData *td,
                                     const TileInfo *const tile, int mi_row,
                                     int mi_col, BLOCK_SIZE bsize,
                                     RUN_TYPE dry_run, PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *const p = x->plane;
@@ -1497,10 +1502,11 @@
   ctx->mic.mbmi.tx_type = best_tx;
 }
 
-static void update_supertx_param_sb(AV1_COMP *cpi, ThreadData *td, int mi_row,
-                                    int mi_col, BLOCK_SIZE bsize, int best_tx,
-                                    TX_SIZE supertx_size, PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
+static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td,
+                                    int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                    int best_tx, TX_SIZE supertx_size,
+                                    PC_TREE *pc_tree) {
+  const AV1_COMMON *const cm = &cpi->common;
   int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
   PARTITION_TYPE partition = pc_tree->partitioning;
   BLOCK_SIZE subsize = get_subsize(bsize, partition);
@@ -1584,10 +1590,10 @@
                      x->e_mbd.plane[i].subsampling_y);
 }
 
-static int set_segment_rdmult(AV1_COMP *const cpi, MACROBLOCK *const x,
+static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
                               int8_t segment_id) {
   int segment_qindex;
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   av1_init_plane_quantizers(cpi, x, segment_id);
   aom_clear_system_state();
   segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
@@ -1595,7 +1601,7 @@
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
-static void rd_pick_sb_modes(AV1_COMP *cpi, TileDataEnc *tile_data,
+static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
                              MACROBLOCK *const x, int mi_row, int mi_col,
                              RD_COST *rd_cost,
 #if CONFIG_SUPERTX
@@ -1606,7 +1612,7 @@
 #endif
                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
                              int64_t best_rd) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   TileInfo *const tile_info = &tile_data->tile_info;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi;
@@ -1787,7 +1793,7 @@
 }
 #endif
 
-static void update_stats(AV1_COMMON *cm, ThreadData *td
+static void update_stats(const AV1_COMMON *const cm, ThreadData *td
 #if CONFIG_SUPERTX
                          ,
                          int supertx_enabled
@@ -2099,9 +2105,9 @@
 #endif
 }
 
-static void encode_b(AV1_COMP *cpi, const TileInfo *const tile, ThreadData *td,
-                     TOKENEXTRA **tp, int mi_row, int mi_col, RUN_TYPE dry_run,
-                     BLOCK_SIZE bsize,
+static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile,
+                     ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
+                     RUN_TYPE dry_run, BLOCK_SIZE bsize,
 #if CONFIG_EXT_PARTITION_TYPES
                      PARTITION_TYPE partition,
 #endif
@@ -2123,9 +2129,10 @@
   }
 }
 
-static void encode_sb(AV1_COMP *cpi, ThreadData *td, const TileInfo *const tile,
-                      TOKENEXTRA **tp, int mi_row, int mi_col, RUN_TYPE dry_run,
-                      BLOCK_SIZE bsize, PC_TREE *pc_tree, int *rate) {
+static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
+                      const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
+                      int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+                      PC_TREE *pc_tree, int *rate) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2429,7 +2436,7 @@
   int splits_below = 0;
   BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type;
   int do_partition_search = 1;
-  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
 #if CONFIG_SUPERTX
   int last_part_rate_nocoef = INT_MAX;
   int none_rate_nocoef = INT_MAX;
@@ -2488,7 +2495,7 @@
 #if CONFIG_EXT_PARTITION_TYPES
                        PARTITION_NONE,
 #endif
-                       bsize, ctx, INT64_MAX);
+                       bsize, ctx_none, INT64_MAX);
 
       if (none_rdc.rate < INT_MAX) {
         none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
@@ -2515,7 +2522,7 @@
 #if CONFIG_EXT_PARTITION_TYPES
                        PARTITION_NONE,
 #endif
-                       bsize, ctx, INT64_MAX);
+                       bsize, ctx_none, INT64_MAX);
       break;
     case PARTITION_HORZ:
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -2532,11 +2539,11 @@
 #if CONFIG_SUPERTX
         int rt_nocoef = 0;
 #endif
-        PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+        PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
         av1_rd_cost_init(&tmp_rdc);
-        update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+        update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
-                          ctx, NULL);
+                          ctx_h, NULL);
         rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
 #if CONFIG_SUPERTX
                          &rt_nocoef,
@@ -2575,11 +2582,11 @@
 #if CONFIG_SUPERTX
         int rt_nocoef = 0;
 #endif
-        PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+        PICK_MODE_CONTEXT *ctx_v = &pc_tree->vertical[0];
         av1_rd_cost_init(&tmp_rdc);
-        update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+        update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
         encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
-                          ctx, NULL);
+                          ctx_v, NULL);
         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
 #if CONFIG_SUPERTX
                          &rt_nocoef,
@@ -2698,8 +2705,6 @@
 #if CONFIG_SUPERTX
       int rt_nocoef = 0;
 #endif
-      RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-
       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
         continue;
 
@@ -2933,26 +2938,25 @@
 }
 
 // TODO(jingning) refactor functions setting partition search range
-static void set_partition_range(AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row,
+static void set_partition_range(const AV1_COMMON *const cm,
+                                const MACROBLOCKD *const xd, int mi_row,
                                 int mi_col, BLOCK_SIZE bsize,
-                                BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
-  int mi_width = num_8x8_blocks_wide_lookup[bsize];
-  int mi_height = num_8x8_blocks_high_lookup[bsize];
+                                BLOCK_SIZE *const min_bs,
+                                BLOCK_SIZE *const max_bs) {
+  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+  const int mi_height = num_8x8_blocks_high_lookup[bsize];
   int idx, idy;
 
-  MODE_INFO *mi;
   const int idx_str = cm->mi_stride * mi_row + mi_col;
-  MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
-  BLOCK_SIZE bs, min_size, max_size;
-
-  min_size = BLOCK_LARGEST;
-  max_size = BLOCK_4X4;
+  MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str];
+  BLOCK_SIZE min_size = BLOCK_64X64;  // default values
+  BLOCK_SIZE max_size = BLOCK_4X4;
 
   if (prev_mi) {
     for (idy = 0; idy < mi_height; ++idy) {
       for (idx = 0; idx < mi_width; ++idx) {
-        mi = prev_mi[idy * cm->mi_stride + idx];
-        bs = mi ? mi->mbmi.sb_type : bsize;
+        const MODE_INFO *const mi = prev_mi[idy * cm->mi_stride + idx];
+        const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
         min_size = AOMMIN(min_size, bs);
         max_size = AOMMAX(max_size, bs);
       }
@@ -2961,8 +2965,8 @@
 
   if (xd->left_available) {
     for (idy = 0; idy < mi_height; ++idy) {
-      mi = xd->mi[idy * cm->mi_stride - 1];
-      bs = mi ? mi->mbmi.sb_type : bsize;
+      const MODE_INFO *const mi = xd->mi[idy * cm->mi_stride - 1];
+      const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
       min_size = AOMMIN(min_size, bs);
       max_size = AOMMAX(max_size, bs);
     }
@@ -2970,8 +2974,8 @@
 
   if (xd->up_available) {
     for (idx = 0; idx < mi_width; ++idx) {
-      mi = xd->mi[idx - cm->mi_stride];
-      bs = mi ? mi->mbmi.sb_type : bsize;
+      const MODE_INFO *const mi = xd->mi[idx - cm->mi_stride];
+      const BLOCK_SIZE bs = mi ? mi->mbmi.sb_type : bsize;
       min_size = AOMMIN(min_size, bs);
       max_size = AOMMAX(max_size, bs);
     }
@@ -3094,10 +3098,10 @@
 
 #if CONFIG_EXT_PARTITION_TYPES
 static void rd_test_partition3(
-    AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp,
-    PC_TREE *pc_tree, RD_COST *best_rdc, PICK_MODE_CONTEXT ctxs[3],
-    PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize,
-    PARTITION_TYPE partition,
+    const AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
+    TOKENEXTRA **tp, PC_TREE *pc_tree, RD_COST *best_rdc,
+    PICK_MODE_CONTEXT ctxs[3], PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+    BLOCK_SIZE bsize, PARTITION_TYPE partition,
 #if CONFIG_SUPERTX
     int64_t best_rd, int *best_rate_nocoef, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
 #endif
@@ -3107,7 +3111,7 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   RD_COST this_rdc, sum_rdc;
 #if CONFIG_SUPERTX
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   TileInfo *const tile_info = &tile_data->tile_info;
   int this_rate_nocoef, sum_rate_nocoef;
   int abort_flag;
@@ -3134,12 +3138,12 @@
 #else
   if (sum_rdc.rdcost < best_rdc->rdcost) {
 #endif
-    PICK_MODE_CONTEXT *ctx = &ctxs[0];
-    update_state(cpi, td, ctx, mi_row0, mi_col0, subsize0, 1);
+    PICK_MODE_CONTEXT *ctx_0 = &ctxs[0];
+    update_state(cpi, td, ctx_0, mi_row0, mi_col0, subsize0, 1);
     encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row0, mi_col0, subsize0,
-                      ctx, NULL);
+                      ctx_0, NULL);
 
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_0);
 
 #if CONFIG_SUPERTX
     rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
@@ -3175,12 +3179,12 @@
 #else
     if (sum_rdc.rdcost < best_rdc->rdcost) {
 #endif
-      PICK_MODE_CONTEXT *ctx = &ctxs[1];
-      update_state(cpi, td, ctx, mi_row1, mi_col1, subsize1, 1);
+      PICK_MODE_CONTEXT *ctx_1 = &ctxs[1];
+      update_state(cpi, td, ctx_1, mi_row1, mi_col1, subsize1, 1);
       encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row1, mi_col1, subsize1,
-                        ctx, NULL);
+                        ctx_1, NULL);
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_1);
 
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
@@ -3274,7 +3278,7 @@
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
-static void rd_pick_partition(AV1_COMP *cpi, ThreadData *td,
+static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
                               TileDataEnc *tile_data, TOKENEXTRA **tp,
                               int mi_row, int mi_col, BLOCK_SIZE bsize,
                               RD_COST *rd_cost,
@@ -3282,17 +3286,16 @@
                               int *rate_nocoef,
 #endif
                               int64_t best_rd, PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   TileInfo *const tile_info = &tile_data->tile_info;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
-  TOKENEXTRA *tp_orig = *tp;
-  PICK_MODE_CONTEXT *ctx = &pc_tree->none;
-  int i;
+  const TOKENEXTRA *const tp_orig = *tp;
+  PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
   const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-  int *partition_cost = cpi->partition_cost[pl];
+  const int *partition_cost = cpi->partition_cost[pl];
   int tmp_partition_cost[PARTITION_TYPES];
   BLOCK_SIZE subsize;
   RD_COST this_rdc, sum_rdc, best_rdc;
@@ -3303,8 +3306,9 @@
                               bsize <= MAX_SUPERTX_BLOCK_SIZE &&
                               !xd->lossless[0];
 #endif  // CONFIG_SUPERTX
-  int do_split = bsize >= BLOCK_8X8;
-  int do_rect = 1;
+  const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
+  int do_square_split = bsize_at_least_8x8;
+  int do_rectangular_split = 1;
 #if CONFIG_EXT_PARTITION_TYPES
   BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
 #endif
@@ -3325,9 +3329,9 @@
 
   int partition_none_allowed = !force_horz_split && !force_vert_split;
   int partition_horz_allowed =
-      !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
+      !force_vert_split && yss <= xss && bsize_at_least_8x8;
   int partition_vert_allowed =
-      !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+      !force_horz_split && xss <= yss && bsize_at_least_8x8;
   (void)*tp_orig;
 
   if (force_horz_split || force_vert_split) {
@@ -3377,7 +3381,7 @@
     x->mb_energy = av1_block_energy(cpi, x, bsize);
 
   if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
-    int cb_partition_search_ctrl =
+    const int cb_partition_search_ctrl =
         ((pc_tree->index == 0 || pc_tree->index == 3) +
          get_chessboard_index(cm->current_video_frame)) &
         0x1;
@@ -3389,12 +3393,13 @@
   // Determine partition types in search according to the speed features.
   // The threshold set here has to be of square block size.
   if (cpi->sf.auto_min_max_partition_size) {
-    partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
-    partition_horz_allowed &=
-        ((bsize <= max_size && bsize > min_size) || force_horz_split);
-    partition_vert_allowed &=
-        ((bsize <= max_size && bsize > min_size) || force_vert_split);
-    do_split &= bsize > min_size;
+    const int no_partition_allowed = (bsize <= max_size && bsize >= min_size);
+    // Note: Further partitioning is NOT allowed when bsize == min_size already.
+    const int partition_allowed = (bsize <= max_size && bsize > min_size);
+    partition_none_allowed &= no_partition_allowed;
+    partition_horz_allowed &= partition_allowed || force_horz_split;
+    partition_vert_allowed &= partition_allowed || force_vert_split;
+    do_square_split &= bsize > min_size;
   }
   if (cpi->sf.use_square_partition_only) {
     partition_horz_allowed &= force_horz_split;
@@ -3420,7 +3425,7 @@
 #if CONFIG_FP_MB_STATS
   // Decide whether we shall split directly and skip searching NONE by using
   // the first pass block statistics
-  if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
+  if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split &&
       partition_none_allowed && src_diff_var > 4 &&
       cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
     int mb_row = mi_row >> 1;
@@ -3477,9 +3482,9 @@
 #if CONFIG_EXT_PARTITION_TYPES
                      PARTITION_NONE,
 #endif
-                     bsize, ctx, best_rdc.rdcost);
+                     bsize, ctx_none, best_rdc.rdcost);
     if (this_rdc.rate != INT_MAX) {
-      if (bsize >= BLOCK_8X8) {
+      if (bsize_at_least_8x8) {
         this_rdc.rate += partition_cost[PARTITION_NONE];
         this_rdc.rdcost =
             RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
@@ -3489,22 +3494,21 @@
       }
 
       if (this_rdc.rdcost < best_rdc.rdcost) {
-        int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
-        int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
+        // Adjust dist breakout threshold according to the partition size.
+        const int64_t dist_breakout_thr =
+            cpi->sf.partition_search_breakout_dist_thr >>
+            ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
+             (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+        const int rate_breakout_thr =
+            cpi->sf.partition_search_breakout_rate_thr *
+            num_pels_log2_lookup[bsize];
 
         best_rdc = this_rdc;
 #if CONFIG_SUPERTX
         best_rate_nocoef = this_rate_nocoef;
         assert(best_rate_nocoef >= 0);
 #endif
-        if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
-
-        // Adjust dist breakout threshold according to the partition size.
-        dist_breakout_thr >>=
-            (2 * (MAX_SB_SIZE_LOG2 - 2)) -
-            (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
-        rate_breakout_thr *= num_pels_log2_lookup[bsize];
+        if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
 
         // If all y, u, v transform blocks in this partition are skippable, and
         // the dist & rate are within the thresholds, the partition search is
@@ -3512,10 +3516,10 @@
         // The dist & rate thresholds are set to 0 at speed 0 to disable the
         // early termination at that speed.
         if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] &&
-            (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
+            (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
              best_rdc.rate < rate_breakout_thr)) {
-          do_split = 0;
-          do_rect = 0;
+          do_square_split = 0;
+          do_rectangular_split = 0;
         }
 
 #if CONFIG_FP_MB_STATS
@@ -3524,7 +3528,7 @@
         // If that is the case, check the difference variance between the
         // current frame and the last frame. If the variance is small enough,
         // stop further splitting in RD optimization
-        if (cpi->use_fp_mb_stats && do_split != 0 &&
+        if (cpi->use_fp_mb_stats && do_square_split &&
             cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
           int mb_row = mi_row >> 1;
           int mb_col = mi_col >> 1;
@@ -3557,8 +3561,8 @@
                   cpi, &x->plane[0].src, mi_row, mi_col, bsize);
             }
             if (src_diff_var < 8) {
-              do_split = 0;
-              do_rect = 0;
+              do_square_split = 0;
+              do_rectangular_split = 0;
             }
           }
         }
@@ -3570,23 +3574,23 @@
   }
 
   // store estimated motion vector
-  if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
+  if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
 
   // PARTITION_SPLIT
   // TODO(jingning): use the motion vectors given by the above search as
   // the starting point of motion search in the following partition type check.
-  if (do_split) {
+  if (do_square_split) {
+    int reached_last_index = 0;
     subsize = get_subsize(bsize, PARTITION_SPLIT);
     if (bsize == BLOCK_8X8) {
-      i = 4;
 #if CONFIG_DUAL_FILTER
       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
         pc_tree->leaf_split[0]->pred_interp_filter =
-            ctx->mic.mbmi.interp_filter[0];
+            ctx_none->mic.mbmi.interp_filter[0];
 #else
       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
         pc_tree->leaf_split[0]->pred_interp_filter =
-            ctx->mic.mbmi.interp_filter;
+            ctx_none->mic.mbmi.interp_filter;
 #endif
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
@@ -3649,29 +3653,31 @@
         pc_tree->partitioning = best_partition;
       }
 #endif  // CONFIG_SUPERTX
+      reached_last_index = 1;
     } else {
+      int idx;
 #if CONFIG_SUPERTX
-      for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) {
+      for (idx = 0; idx < 4 && sum_rdc.rdcost < INT64_MAX; ++idx) {
 #else
-      for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
+      for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
 #endif  // CONFIG_SUPERTX
-        const int x_idx = (i & 1) * mi_step;
-        const int y_idx = (i >> 1) * mi_step;
+        const int x_idx = (idx & 1) * mi_step;
+        const int y_idx = (idx >> 1) * mi_step;
 
         if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
           continue;
 
-        if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+        if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
-        pc_tree->split[i]->index = i;
+        pc_tree->split[idx]->index = idx;
 #if CONFIG_SUPERTX
         rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
                           mi_col + x_idx, subsize, &this_rdc, &this_rate_nocoef,
-                          INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]);
+                          INT64_MAX - sum_rdc.rdcost, pc_tree->split[idx]);
 #else
-        rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
-                          mi_col + x_idx, subsize, &this_rdc,
-                          best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
+        rd_pick_partition(
+            cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+            &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
 #endif  // CONFIG_SUPERTX
 
         if (this_rdc.rate == INT_MAX) {
@@ -3689,8 +3695,9 @@
 #endif  // CONFIG_SUPERTX
         }
       }
+      reached_last_index = (idx == 4);
 #if CONFIG_SUPERTX
-      if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && i == 4) {
+      if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
         TX_SIZE supertx_size = max_txsize_lookup[bsize];
         const PARTITION_TYPE best_partition = pc_tree->partitioning;
 
@@ -3732,7 +3739,7 @@
 #endif  // CONFIG_SUPERTX
     }
 
-    if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
+    if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
       sum_rdc.rate += partition_cost[PARTITION_SPLIT];
       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
 #if CONFIG_SUPERTX
@@ -3747,10 +3754,10 @@
 #endif  // CONFIG_SUPERTX
         pc_tree->partitioning = PARTITION_SPLIT;
       }
-    } else {
+    } else if (cpi->sf.less_rectangular_check) {
       // skip rectangular partition test when larger block size
       // gives better rd cost
-      if (cpi->sf.less_rectangular_check) do_rect &= !partition_none_allowed;
+      do_rectangular_split &= !partition_none_allowed;
     }
 
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
@@ -3758,18 +3765,19 @@
 
   // PARTITION_HORZ
   if (partition_horz_allowed &&
-      (do_rect || av1_active_h_edge(cpi, mi_row, mi_step))) {
+      (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
     subsize = get_subsize(bsize, PARTITION_HORZ);
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 #if CONFIG_DUAL_FILTER
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
       pc_tree->horizontal[0].pred_interp_filter =
-          ctx->mic.mbmi.interp_filter[0];
+          ctx_none->mic.mbmi.interp_filter[0];
 #else
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+      pc_tree->horizontal[0].pred_interp_filter =
+          ctx_none->mic.mbmi.interp_filter;
 #endif
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
 #if CONFIG_SUPERTX
@@ -3787,23 +3795,24 @@
 #else
     if (sum_rdc.rdcost < best_rdc.rdcost &&
 #endif  // CONFIG_SUPERTX
-        mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) {
-      PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
-      update_state(cpi, td, ctx, mi_row, mi_col, subsize, 1);
+        !force_horz_split && bsize > BLOCK_8X8) {
+      PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
+      update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
       encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
-                        ctx, NULL);
+                        ctx_h, NULL);
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
 
 #if CONFIG_DUAL_FILTER
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         pc_tree->horizontal[1].pred_interp_filter =
-            ctx->mic.mbmi.interp_filter[0];
+            ctx_h->mic.mbmi.interp_filter[0];
 #else
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+        pc_tree->horizontal[1].pred_interp_filter =
+            ctx_none->mic.mbmi.interp_filter;
 #endif
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
@@ -3896,19 +3905,21 @@
 
   // PARTITION_VERT
   if (partition_vert_allowed &&
-      (do_rect || av1_active_v_edge(cpi, mi_col, mi_step))) {
+      (do_rectangular_split || av1_active_v_edge(cpi, mi_col, mi_step))) {
     subsize = get_subsize(bsize, PARTITION_VERT);
 
-    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+    if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
 #if CONFIG_DUAL_FILTER
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter[0];
+      pc_tree->vertical[0].pred_interp_filter =
+          ctx_none->mic.mbmi.interp_filter[0];
 #else
     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
         partition_none_allowed)
-      pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+      pc_tree->vertical[0].pred_interp_filter =
+          ctx_none->mic.mbmi.interp_filter;
 #endif
     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
 #if CONFIG_SUPERTX
@@ -3925,22 +3936,23 @@
 #else
     if (sum_rdc.rdcost < best_rdc.rdcost &&
 #endif  // CONFIG_SUPERTX
-        mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) {
+        !force_vert_split && bsize > BLOCK_8X8) {
       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
       encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize,
                         &pc_tree->vertical[0], NULL);
 
-      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+      if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
 
 #if CONFIG_DUAL_FILTER
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
         pc_tree->vertical[1].pred_interp_filter =
-            ctx->mic.mbmi.interp_filter[0];
+            ctx_none->mic.mbmi.interp_filter[0];
 #else
       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
           partition_none_allowed)
-        pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+        pc_tree->vertical[1].pred_interp_filter =
+            ctx_none->mic.mbmi.interp_filter;
 #endif
 #if CONFIG_SUPERTX
       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
@@ -4032,11 +4044,11 @@
 
 #if CONFIG_EXT_PARTITION_TYPES
   // PARTITION_HORZ_A
-  if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+  if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
       partition_none_allowed) {
     subsize = get_subsize(bsize, PARTITION_HORZ_A);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->horizontala, ctx, mi_row, mi_col, bsize,
+                       pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_HORZ_A,
 #if CONFIG_SUPERTX
                        best_rd, &best_rate_nocoef, &x_ctx,
@@ -4046,11 +4058,11 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
   // PARTITION_HORZ_B
-  if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+  if (partition_horz_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
       partition_none_allowed) {
     subsize = get_subsize(bsize, PARTITION_HORZ_B);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->horizontalb, ctx, mi_row, mi_col, bsize,
+                       pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_HORZ_B,
 #if CONFIG_SUPERTX
                        best_rd, &best_rate_nocoef, &x_ctx,
@@ -4060,11 +4072,11 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
   // PARTITION_VERT_A
-  if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+  if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
       partition_none_allowed) {
     subsize = get_subsize(bsize, PARTITION_VERT_A);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->verticala, ctx, mi_row, mi_col, bsize,
+                       pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_VERT_A,
 #if CONFIG_SUPERTX
                        best_rd, &best_rate_nocoef, &x_ctx,
@@ -4074,11 +4086,11 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
   // PARTITION_VERT_B
-  if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+  if (partition_vert_allowed && do_rectangular_split && bsize > BLOCK_8X8 &&
       partition_none_allowed) {
     subsize = get_subsize(bsize, PARTITION_VERT_B);
     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
-                       pc_tree->verticalb, ctx, mi_row, mi_col, bsize,
+                       pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
                        PARTITION_VERT_B,
 #if CONFIG_SUPERTX
                        best_rd, &best_rate_nocoef, &x_ctx,
@@ -4924,9 +4936,10 @@
   }
 }
 
-static void tx_partition_count_update(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                      BLOCK_SIZE plane_bsize, int mi_row,
-                                      int mi_col, FRAME_COUNTS *td_counts) {
+static void tx_partition_count_update(const AV1_COMMON *const cm,
+                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
+                                      int mi_row, int mi_col,
+                                      FRAME_COUNTS *td_counts) {
   const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
   const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
   TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -4985,9 +4998,9 @@
   }
 }
 
-static void tx_partition_set_contexts(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                      BLOCK_SIZE plane_bsize, int mi_row,
-                                      int mi_col) {
+static void tx_partition_set_contexts(const AV1_COMMON *const cm,
+                                      MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
+                                      int mi_row, int mi_col) {
   const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
   const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
   TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -5005,11 +5018,11 @@
 }
 #endif
 
-static void encode_superblock(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                              RUN_TYPE dry_run, int mi_row, int mi_col,
-                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                              int *rate) {
-  AV1_COMMON *const cm = &cpi->common;
+static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
+                              TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
+                              int mi_col, BLOCK_SIZE bsize,
+                              PICK_MODE_CONTEXT *ctx, int *rate) {
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO **mi_8x8 = xd->mi;
@@ -5179,7 +5192,7 @@
 #endif
       ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
     } else {
-      int x, y;
+      int i, j;
       TX_SIZE tx_size;
       // The new intra coding scheme requires no change of transform size
       if (is_inter_block(&mi->mbmi)) {
@@ -5196,10 +5209,10 @@
         tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
       }
 
-      for (y = 0; y < mi_height; y++)
-        for (x = 0; x < mi_width; x++)
-          if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
-            mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
+      for (j = 0; j < mi_height; j++)
+        for (i = 0; i < mi_width; i++)
+          if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
+            mi_8x8[mis * j + i]->mbmi.tx_size = tx_size;
     }
     ++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
     ++td->counts
@@ -5278,8 +5291,9 @@
   return 0;
 }
 
-static int check_intra_sb(AV1_COMP *cpi, const TileInfo *const tile, int mi_row,
-                          int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
+static int check_intra_sb(const AV1_COMP *const cpi, const TileInfo *const tile,
+                          int mi_row, int mi_col, BLOCK_SIZE bsize,
+                          PC_TREE *pc_tree) {
   const AV1_COMMON *const cm = &cpi->common;
 
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -5388,7 +5402,7 @@
   }
 }
 
-static void predict_superblock(AV1_COMP *cpi, ThreadData *td,
+static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td,
 #if CONFIG_EXT_INTER
                                int mi_row_ori, int mi_col_ori,
 #endif  // CONFIG_EXT_INTER
@@ -5397,7 +5411,7 @@
   // Used in supertx
   // (mi_row_ori, mi_col_ori): location for mv
   // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *mi_8x8 = xd->mi[0];
@@ -5429,7 +5443,7 @@
                                                 bsize_pred, block);
 }
 
-static void predict_b_extend(AV1_COMP *cpi, ThreadData *td,
+static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td,
                              const TileInfo *const tile, int block,
                              int mi_row_ori, int mi_col_ori, int mi_row_pred,
                              int mi_col_pred, int mi_row_top, int mi_col_top,
@@ -5445,7 +5459,7 @@
   // bextend: 1: region to predict is an extension of ori; 0: not
 
   MACROBLOCK *const x = &td->mb;
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   int r = (mi_row_pred - mi_row_top) * MI_SIZE;
   int c = (mi_col_pred - mi_col_top) * MI_SIZE;
@@ -5482,7 +5496,7 @@
   if (!dry_run && !bextend) update_stats(&cpi->common, td, 1);
 }
 
-static void extend_dir(AV1_COMP *cpi, ThreadData *td,
+static void extend_dir(const AV1_COMP *const cpi, ThreadData *td,
                        const TileInfo *const tile, int block, BLOCK_SIZE bsize,
                        BLOCK_SIZE top_bsize, int mi_row, int mi_col,
                        int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
@@ -5554,7 +5568,7 @@
   }
 }
 
-static void extend_all(AV1_COMP *cpi, ThreadData *td,
+static void extend_all(const AV1_COMP *const cpi, ThreadData *td,
                        const TileInfo *const tile, int block, BLOCK_SIZE bsize,
                        BLOCK_SIZE top_bsize, int mi_row, int mi_col,
                        int mi_row_top, int mi_col_top, RUN_TYPE dry_run,
@@ -5586,13 +5600,13 @@
 // then applied to the 2 masked prediction mentioned above in vertical direction
 // If the block is split into more than one level, at every stage, masked
 // prediction is stored in dst_buf[] passed from higher level.
-static void predict_sb_complex(AV1_COMP *cpi, ThreadData *td,
+static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td,
                                const TileInfo *const tile, int mi_row,
                                int mi_col, int mi_row_top, int mi_col_top,
                                RUN_TYPE dry_run, BLOCK_SIZE bsize,
                                BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
                                int dst_stride[3], PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
 
@@ -5644,7 +5658,11 @@
   }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-  if (!dry_run && bsize < top_bsize) cm->counts.partition[ctx][partition]++;
+  if (!dry_run && bsize < top_bsize) {
+    // Explicitly cast away const.
+    FRAME_COUNTS *const frame_counts = (FRAME_COUNTS *)&cm->counts;
+    frame_counts->partition[ctx][partition]++;
+  }
 
   for (i = 0; i < MAX_MB_PLANE; i++) {
     xd->plane[i].dst.buf = dst_buf[i];
@@ -6029,11 +6047,11 @@
 #endif  // CONFIG_EXT_PARTITION_TYPES
 }
 
-static void rd_supertx_sb(AV1_COMP *cpi, ThreadData *td,
+static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td,
                           const TileInfo *const tile, int mi_row, int mi_col,
                           BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
                           TX_TYPE *best_tx, PC_TREE *pc_tree) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate,
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index c5dfadd..192ec47 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -85,10 +85,10 @@
   const int16_t *const dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const so =
+  const SCAN_ORDER *const scan_order =
       get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
-  const int16_t *const scan = so->scan;
-  const int16_t *const nb = so->neighbors;
+  const int16_t *const scan = scan_order->scan;
+  const int16_t *const nb = scan_order->neighbors;
 #if CONFIG_AOM_QM
   int seg_id = xd->mi[0]->mbmi.segment_id;
   const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
@@ -447,7 +447,7 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -520,7 +520,7 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -593,7 +593,7 @@
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, is_inter);
   int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
                                    is_inter, plane_type);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 13c8d87..da6f35c 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -111,8 +111,7 @@
     if (c == MV_CLASS_0) {
       cost += class0_cost[d];
     } else {
-      int i, b;
-      b = c + CLASS0_BITS - 1; /* number of bits */
+      const int b = c + CLASS0_BITS - 1; /* number of bits */
       for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
     }
     if (c == MV_CLASS_0) {
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index ad33134..c39b78a 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -101,6 +101,10 @@
 FILE *keyfile;
 #endif
 
+#if CONFIG_INTERNAL_STATS
+typedef enum { Y, U, V, ALL } STAT_TYPE;
+#endif  // CONFIG_INTERNAL_STATS
+
 static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
   switch (mode) {
     case NORMAL:
@@ -2267,7 +2271,8 @@
   av1_set_speed_features_framesize_dependent(cpi);
 
   // Allocate memory to store variances for a frame.
-  CHECK_MEM_ERROR(cm, cpi->source_diff_var, aom_calloc(cm->MBs, sizeof(diff)));
+  CHECK_MEM_ERROR(cm, cpi->source_diff_var,
+                  aom_calloc(cm->MBs, sizeof(*cpi->source_diff_var)));
   cpi->source_var_thresh = 0;
   cpi->frames_till_next_var_check = 0;
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 8819655..e2046ec 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -306,13 +306,15 @@
   unsigned char *map;
 } ActiveMap;
 
-typedef enum { Y, U, V, ALL } STAT_TYPE;
+#define NUM_STAT_TYPES 4  // types of stats: Y, U, V and ALL
 
 typedef struct IMAGE_STAT {
-  double stat[ALL + 1];
+  double stat[NUM_STAT_TYPES];
   double worst;
 } ImageStat;
 
+#undef NUM_STAT_TYPES
+
 typedef struct {
   int ref_count;
   YV12_BUFFER_CONFIG buf;
@@ -516,7 +518,7 @@
                     // scaled.
 
   // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
-  diff *source_diff_var;
+  DIFF *source_diff_var;
   // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
   unsigned int source_var_thresh;
   int frames_till_next_var_check;
@@ -709,7 +711,7 @@
     return cpi->alt_fb_idx;
 }
 
-static INLINE int get_ref_frame_buf_idx(const AV1_COMP *const cpi,
+static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi,
                                         MV_REFERENCE_FRAME ref_frame) {
   const AV1_COMMON *const cm = &cpi->common;
   const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
@@ -717,15 +719,15 @@
 }
 
 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
-    AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
-  AV1_COMMON *const cm = &cpi->common;
+    const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+  const AV1_COMMON *const cm = &cpi->common;
   const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
   return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
                                 : NULL;
 }
 
 static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
-    AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
+    const AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
   // Use up-sampled reference frames.
   const int buf_idx =
       cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
@@ -796,7 +798,7 @@
 }
 #endif  // CONFIG_EXT_REFS
 
-static INLINE void set_ref_ptrs(AV1_COMMON *cm, MACROBLOCKD *xd,
+static INLINE void set_ref_ptrs(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                 MV_REFERENCE_FRAME ref0,
                                 MV_REFERENCE_FRAME ref1) {
   xd->block_refs[0] =
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 396f0d7..003ecb3 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -2712,7 +2712,6 @@
   // If this is an arf frame then we dont want to read the stats file or
   // advance the input pointer as we already have what we need.
   if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
-    int target_rate;
     configure_buffer_updates(cpi);
     target_rate = gf_group->bit_allocation[gf_group->index];
     target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate);
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index c1ccb95..9bbed2b 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -110,7 +110,6 @@
   // If the current best reference mv is not centered on 0,0 then do a 0,0
   // based search as well.
   if (ref_mv->row != 0 || ref_mv->col != 0) {
-    unsigned int tmp_err;
     MV zero_ref_mv = { 0, 0 };
 
     tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index bd90739..3fbceab 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -861,36 +861,35 @@
   const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
   const int br = best_mv->row;
   const int bc = best_mv->col;
-  MV this_mv;
   int i;
   unsigned int sse;
+  const MV this_mv = { br, bc };
 
-  this_mv.row = br;
-  this_mv.col = bc;
   cost_list[0] =
       fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
                  in_what->stride, &sse) +
       mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
   if (check_bounds(x, br, bc, 1)) {
     for (i = 0; i < 4; i++) {
-      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
       cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
-                                    get_buf_from_mv(in_what, &this_mv),
+                                    get_buf_from_mv(in_what, &neighbor_mv),
                                     in_what->stride, &sse) +
-                         mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
+                         mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
                                      x->mvcost, x->errorperbit);
     }
   } else {
     for (i = 0; i < 4; i++) {
-      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
-      if (!is_mv_in(x, &this_mv))
+      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+      if (!is_mv_in(x, &neighbor_mv))
         cost_list[i + 1] = INT_MAX;
       else
-        cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
-                                      get_buf_from_mv(in_what, &this_mv),
-                                      in_what->stride, &sse) +
-                           mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
-                                       x->mvcost, x->errorperbit);
+        cost_list[i + 1] =
+            fn_ptr->vf(what->buf, what->stride,
+                       get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
+                       &sse) +
+            mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
+                        x->errorperbit);
     }
   }
 }
@@ -1187,12 +1186,13 @@
   // cost_list[3]: cost/sad at delta { 0, 1} (right)  from the best integer pel
   // cost_list[4]: cost/sad at delta {-1, 0} (top)    from the best integer pel
   if (cost_list) {
-    const MV best_mv = { br, bc };
+    const MV best_int_mv = { br, bc };
     if (last_is_4) {
-      calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list,
+      calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
                         use_mvcost, bestsad);
     } else {
-      calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list);
+      calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
+                         cost_list);
     }
   }
   x->best_mv.as_mv.row = br;
@@ -1692,7 +1692,7 @@
   const int ref_stride = xd->plane[0].pre[0].stride;
   uint8_t const *ref_buf, *src_buf;
   MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
-  unsigned int best_sad, tmp_sad, this_sad[4];
+  unsigned int best_sad, tmp_sad, sad_arr[4];
   MV this_mv;
   const int norm_factor = 3 + (bw >> 5);
   const YV12_BUFFER_CONFIG *scaled_ref_frame =
@@ -1762,23 +1762,23 @@
       ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
     };
 
-    cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
+    cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
   }
 
   for (idx = 0; idx < 4; ++idx) {
-    if (this_sad[idx] < best_sad) {
-      best_sad = this_sad[idx];
+    if (sad_arr[idx] < best_sad) {
+      best_sad = sad_arr[idx];
       tmp_mv->row = search_pos[idx].row + this_mv.row;
       tmp_mv->col = search_pos[idx].col + this_mv.col;
     }
   }
 
-  if (this_sad[0] < this_sad[3])
+  if (sad_arr[0] < sad_arr[3])
     this_mv.row -= 1;
   else
     this_mv.row += 1;
 
-  if (this_sad[1] < this_sad[2])
+  if (sad_arr[1] < sad_arr[2])
     this_mv.col -= 1;
   else
     this_mv.col += 1;
@@ -1805,9 +1805,9 @@
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
-static int full_pixel_diamond(AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
-                              int step_param, int sadpb, int further_steps,
-                              int do_refine, int *cost_list,
+static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
+                              MV *mvp_full, int step_param, int sadpb,
+                              int further_steps, int do_refine, int *cost_list,
                               const aom_variance_fn_ptr_t *fn_ptr,
                               const MV *ref_mv) {
   MV temp_mv;
@@ -1870,7 +1870,7 @@
 #define MIN_INTERVAL 1
 // Runs an limited range exhaustive mesh search using a pattern set
 // according to the encode speed profile.
-static int full_pixel_exhaustive(AV1_COMP *cpi, MACROBLOCK *x,
+static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
                                  const MV *centre_mv_full, int sadpb,
                                  int *cost_list,
                                  const aom_variance_fn_ptr_t *fn_ptr,
@@ -2243,7 +2243,7 @@
 }
 
 #define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(AV1_COMP *cpi, MACROBLOCK *x) {
+static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
   const SPEED_FEATURES *const sf = &cpi->sf;
   const int max_ex =
       AOMMAX(MIN_EX_SEARCH_LIMIT,
@@ -2254,13 +2254,13 @@
          (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
 }
 
-int av1_full_pixel_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
                           MV *mvp_full, int step_param, int error_per_bit,
                           int *cost_list, const MV *ref_mv, int var_max,
                           int rd) {
   const SPEED_FEATURES *const sf = &cpi->sf;
   const SEARCH_METHODS method = sf->mv.search_method;
-  aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+  const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
   int var = 0;
 
   if (cost_list) {
@@ -2530,7 +2530,7 @@
 }
 
 int av1_find_best_masked_sub_pixel_tree_up(
-    AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
+    const AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
     int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
     int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
     int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
@@ -3031,7 +3031,7 @@
 }
 
 int av1_find_best_obmc_sub_pixel_tree_up(
-    AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
+    const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
     const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 8c42825..e244a3f 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -114,10 +114,10 @@
 
 struct AV1_COMP;
 
-int av1_full_pixel_search(struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                          MV *mvp_full, int step_param, int error_per_bit,
-                          int *cost_list, const MV *ref_mv, int var_max,
-                          int rd);
+int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
+                          BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                          int error_per_bit, int *cost_list, const MV *ref_mv,
+                          int var_max, int rd);
 
 #if CONFIG_EXT_INTER
 int av1_find_best_masked_sub_pixel_tree(
@@ -127,11 +127,11 @@
     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
     int is_second);
 int av1_find_best_masked_sub_pixel_tree_up(
-    struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
-    int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
-    int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
-    int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
-    unsigned int *sse1, int is_second, int use_upsampled_ref);
+    const struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask,
+    int mask_stride, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
+    int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
+    int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
+    int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
 int av1_masked_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
                                   const uint8_t *mask, int mask_stride,
                                   MV *mvp_full, int step_param, int sadpb,
@@ -147,8 +147,8 @@
                                 const aom_variance_fn_ptr_t *fn_ptr,
                                 const MV *ref_mv, MV *dst_mv, int is_second);
 int av1_find_best_obmc_sub_pixel_tree_up(
-    struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
-    const MV *ref_mv, int allow_hp, int error_per_bit,
+    const struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
+    MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
     const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
     int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
     int is_second, int use_upsampled_ref);
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 28bdcc3..62303b7 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -351,7 +351,6 @@
   memset(A, 0, sizeof(A));
   memset(B, 0, sizeof(B));
   for (i = 0; i < RESTORATION_WIN; i++) {
-    int j;
     for (j = 0; j < RESTORATION_WIN; ++j) {
       const int jj = wrap_index(j);
       A[jj] += Mc[i][j] * b[i];
@@ -399,7 +398,6 @@
   memset(A, 0, sizeof(A));
   memset(B, 0, sizeof(B));
   for (i = 0; i < RESTORATION_WIN; i++) {
-    int j;
     const int ii = wrap_index(i);
     for (j = 0; j < RESTORATION_WIN; j++) A[ii] += Mc[i][j] * a[j];
   }
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
index d3b8c1c..827e6d8 100644
--- a/av1/encoder/quantize.c
+++ b/av1/encoder/quantize.c
@@ -344,7 +344,7 @@
                             const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                             const MACROBLOCKD_PLANE *pd,
                             tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const scan_order *sc, const QUANT_PARAM *qparam) {
+                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
 
@@ -362,7 +362,7 @@
 void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                            const MACROBLOCKD_PLANE *pd, tran_low_t *dqcoeff_ptr,
-                           uint16_t *eob_ptr, const scan_order *sc,
+                           uint16_t *eob_ptr, const SCAN_ORDER *sc,
                            const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
@@ -382,7 +382,7 @@
                             const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                             const MACROBLOCKD_PLANE *pd,
                             tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const scan_order *sc, const QUANT_PARAM *qparam) {
+                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
   (void)sc;
@@ -402,7 +402,7 @@
                                    tran_low_t *qcoeff_ptr,
                                    const MACROBLOCKD_PLANE *pd,
                                    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const scan_order *sc,
+                                   const SCAN_ORDER *sc,
                                    const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
@@ -418,7 +418,7 @@
                                   tran_low_t *qcoeff_ptr,
                                   const MACROBLOCKD_PLANE *pd,
                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                  const scan_order *sc,
+                                  const SCAN_ORDER *sc,
                                   const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
@@ -434,7 +434,7 @@
                                    tran_low_t *qcoeff_ptr,
                                    const MACROBLOCKD_PLANE *pd,
                                    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const scan_order *sc,
+                                   const SCAN_ORDER *sc,
                                    const QUANT_PARAM *qparam) {
   // obsolete skip_block
   const int skip_block = 0;
@@ -1115,9 +1115,9 @@
 #if CONFIG_NEW_QUANT
     for (dq = 0; dq < QUANT_PROFILES; dq++) {
       for (i = 0; i < COEF_BANDS; i++) {
-        const int quant = cpi->y_dequant[q][i != 0];
+        const int y_quant = cpi->y_dequant[q][i != 0];
         const int uvquant = cpi->uv_dequant[q][i != 0];
-        av1_get_dequant_val_nuq(quant, i, cpi->y_dequant_val_nuq[dq][q][i],
+        av1_get_dequant_val_nuq(y_quant, i, cpi->y_dequant_val_nuq[dq][q][i],
                                 quants->y_cuml_bins_nuq[dq][q][i], dq);
         av1_get_dequant_val_nuq(uvquant, i, cpi->uv_dequant_val_nuq[dq][q][i],
                                 quants->uv_cuml_bins_nuq[dq][q][i], dq);
diff --git a/av1/encoder/quantize.h b/av1/encoder/quantize.h
index 7394887..1c32ee1 100644
--- a/av1/encoder/quantize.h
+++ b/av1/encoder/quantize.h
@@ -28,7 +28,7 @@
                                  tran_low_t *qcoeff_ptr,
                                  const MACROBLOCKD_PLANE *pd,
                                  tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                 const scan_order *sc,
+                                 const SCAN_ORDER *sc,
                                  const QUANT_PARAM *qparam);
 
 typedef struct {
@@ -80,19 +80,19 @@
                             const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                             const MACROBLOCKD_PLANE *pd,
                             tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const scan_order *sc, const QUANT_PARAM *qparam);
+                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
 
 void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                            const MACROBLOCKD_PLANE *pd, tran_low_t *dqcoeff_ptr,
-                           uint16_t *eob_ptr, const scan_order *sc,
+                           uint16_t *eob_ptr, const SCAN_ORDER *sc,
                            const QUANT_PARAM *qparam);
 
 void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                             const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
                             const MACROBLOCKD_PLANE *pd,
                             tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                            const scan_order *sc, const QUANT_PARAM *qparam);
+                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam);
 
 #if CONFIG_NEW_QUANT
 void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -128,7 +128,7 @@
                                    tran_low_t *qcoeff_ptr,
                                    const MACROBLOCKD_PLANE *pd,
                                    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const scan_order *sc,
+                                   const SCAN_ORDER *sc,
                                    const QUANT_PARAM *qparam);
 
 void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
@@ -136,7 +136,7 @@
                                   tran_low_t *qcoeff_ptr,
                                   const MACROBLOCKD_PLANE *pd,
                                   tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                  const scan_order *sc,
+                                  const SCAN_ORDER *sc,
                                   const QUANT_PARAM *qparam);
 
 void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
@@ -144,7 +144,7 @@
                                    tran_low_t *qcoeff_ptr,
                                    const MACROBLOCKD_PLANE *pd,
                                    tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
-                                   const scan_order *sc,
+                                   const SCAN_ORDER *sc,
                                    const QUANT_PARAM *qparam);
 
 void av1_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index cbc5173..aaca103 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -776,20 +776,20 @@
 
   if (frame_is_intra_only(cm)) {
     if (oxcf->rc_mode == AOM_Q) {
-      int qindex = cq_level;
-      double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
-      int delta_qindex = av1_compute_qdelta(rc, q, q * 0.25, cm->bit_depth);
+      const int qindex = cq_level;
+      const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+      const int delta_qindex =
+          av1_compute_qdelta(rc, q_val, q_val * 0.25, cm->bit_depth);
       active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
     } else if (rc->this_key_frame_forced) {
-      int qindex = rc->last_boosted_qindex;
-      double last_boosted_q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
-      int delta_qindex = av1_compute_qdelta(
+      const int qindex = rc->last_boosted_qindex;
+      const double last_boosted_q =
+          av1_convert_qindex_to_q(qindex, cm->bit_depth);
+      const int delta_qindex = av1_compute_qdelta(
           rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth);
       active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
-    } else {
-      // not first frame of one pass and kf_boost is set
+    } else {  // not first frame of one pass and kf_boost is set
       double q_adj_factor = 1.0;
-      double q_val;
 
       active_best_quality = get_kf_active_quality(
           rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
@@ -799,60 +799,56 @@
         q_adj_factor -= 0.25;
       }
 
-      // Convert the adjustment factor to a qindex delta
-      // on active_best_quality.
-      q_val = av1_convert_qindex_to_q(active_best_quality, cm->bit_depth);
-      active_best_quality +=
-          av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+      // Convert the adjustment factor to a qindex delta on active_best_quality.
+      {
+        const double q_val =
+            av1_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+        active_best_quality +=
+            av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+      }
     }
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
     // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
     // a key frame.
-    if (rc->frames_since_key > 1 &&
-        rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
-      q = rc->avg_frame_qindex[INTER_FRAME];
-    } else {
-      q = rc->avg_frame_qindex[KEY_FRAME];
-    }
+    q = (rc->frames_since_key > 1 &&
+         rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+            ? rc->avg_frame_qindex[INTER_FRAME]
+            : rc->avg_frame_qindex[KEY_FRAME];
     // For constrained quality dont allow Q less than the cq level
     if (oxcf->rc_mode == AOM_CQ) {
       if (q < cq_level) q = cq_level;
-
       active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
-
       // Constrained quality use slightly lower active best.
       active_best_quality = active_best_quality * 15 / 16;
-
     } else if (oxcf->rc_mode == AOM_Q) {
-      int qindex = cq_level;
-      double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
-      int delta_qindex;
-      if (cpi->refresh_alt_ref_frame)
-        delta_qindex = av1_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
-      else
-        delta_qindex = av1_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
+      const int qindex = cq_level;
+      const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+      const int delta_qindex =
+          (cpi->refresh_alt_ref_frame)
+              ? av1_compute_qdelta(rc, q_val, q_val * 0.40, cm->bit_depth)
+              : av1_compute_qdelta(rc, q_val, q_val * 0.50, cm->bit_depth);
       active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
     } else {
       active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
     }
   } else {
     if (oxcf->rc_mode == AOM_Q) {
-      int qindex = cq_level;
-      double q = av1_convert_qindex_to_q(qindex, cm->bit_depth);
-      double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
-                                               0.70, 1.0, 0.85, 1.0 };
-      int delta_qindex = av1_compute_qdelta(
-          rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
+      const int qindex = cq_level;
+      const double q_val = av1_convert_qindex_to_q(qindex, cm->bit_depth);
+      const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
+                                                     0.70, 1.0, 0.85, 1.0 };
+      const int delta_qindex = av1_compute_qdelta(
+          rc, q_val,
+          q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
           cm->bit_depth);
       active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
     } else {
       // Use the lower of active_worst_quality and recent/average Q.
-      if (cm->current_video_frame > 1)
-        active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
-      else
-        active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+      active_best_quality = (cm->current_video_frame > 1)
+                                ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]]
+                                : inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 2b9171f..5015837 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -619,7 +619,7 @@
   get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
 }
 
-void av1_mv_pred(AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
+void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
   int i;
   int zero_seen = 0;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 3ca4768..c9d21a8 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -398,8 +398,7 @@
 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
                                   unsigned int qstep, int *rate, int64_t *dist);
 
-int av1_get_switchable_rate(const struct AV1_COMP *cpi,
-                            const MACROBLOCKD *const xd);
+int av1_get_switchable_rate(const struct AV1_COMP *cpi, const MACROBLOCKD *xd);
 
 int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
                             int stride);
@@ -438,8 +437,9 @@
   return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
 }
 
-void av1_mv_pred(struct AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
-                 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
+void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
+                 uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
+                 BLOCK_SIZE block_size);
 
 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
   x->errorperbit = rdmult >> RD_EPB_SHIFT;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fef5901..463570a 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -135,7 +135,7 @@
   int64_t best_rd;
   int exit_early;
   int use_fast_coef_costing;
-  const scan_order *so;
+  const SCAN_ORDER *scan_order;
   uint8_t skippable;
 };
 
@@ -1070,7 +1070,7 @@
 static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
                       struct rdcost_block_args *args) {
   return av1_cost_coeffs(args->x, plane, block, coeff_ctx, tx_size,
-                         args->so->scan, args->so->neighbors,
+                         args->scan_order->scan, args->scan_order->neighbors,
                          args->use_fast_coef_costing);
 }
 
@@ -1130,11 +1130,11 @@
   if (args->exit_early) return;
 
   if (!is_inter_block(mbmi)) {
-    struct encode_b_args intra_arg = {
+    struct encode_b_args b_args = {
       x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
     };
     av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
-                           &intra_arg);
+                           &b_args);
 
     if (args->cpi->sf.use_transform_domain_distortion) {
       dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist,
@@ -1232,7 +1232,8 @@
   av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 
   tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
-  args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  args.scan_order =
+      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
 
   av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
                                          &args);
@@ -1275,7 +1276,8 @@
   av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 
   tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
-  args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+  args.scan_order =
+      get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
 
   block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
                 &args);
@@ -1294,10 +1296,10 @@
 }
 #endif  // CONFIG_SUPERTX
 
-static int64_t txfm_yrd(AV1_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d,
-                        int *s, int64_t *sse, int64_t ref_best_rd,
+static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *r,
+                        int64_t *d, int *s, int64_t *sse, int64_t ref_best_rd,
                         BLOCK_SIZE bs, TX_TYPE tx_type, int tx_size) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   int64_t rd = INT64_MAX;
@@ -1371,12 +1373,12 @@
   return rd;
 }
 
-static int64_t choose_tx_size_fix_type(AV1_COMP *cpi, BLOCK_SIZE bs,
+static int64_t choose_tx_size_fix_type(const AV1_COMP *const cpi, BLOCK_SIZE bs,
                                        MACROBLOCK *x, int *rate,
                                        int64_t *distortion, int *skip,
                                        int64_t *psse, int64_t ref_best_rd,
                                        TX_TYPE tx_type, int prune) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   int r, s;
@@ -1423,7 +1425,7 @@
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   if (evaluate_rect_tx) {
     const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
-    const int ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
+    ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
     if (ext_tx_used_inter[ext_tx_set][tx_type]) {
       rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type,
                     rect_tx_size);
@@ -1494,18 +1496,19 @@
 }
 
 #if CONFIG_EXT_INTER
-static int64_t estimate_yrd_for_sb(AV1_COMP *cpi, BLOCK_SIZE bs, MACROBLOCK *x,
-                                   int *r, int64_t *d, int *s, int64_t *sse,
-                                   int64_t ref_best_rd) {
+static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
+                                   MACROBLOCK *x, int *r, int64_t *d, int *s,
+                                   int64_t *sse, int64_t ref_best_rd) {
   return txfm_yrd(cpi, x, r, d, s, sse, ref_best_rd, bs, DCT_DCT,
                   max_txsize_lookup[bs]);
 }
 #endif  // CONFIG_EXT_INTER
 
-static void choose_largest_tx_size(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                   int64_t *distortion, int *skip, int64_t *sse,
-                                   int64_t ref_best_rd, BLOCK_SIZE bs) {
-  AV1_COMMON *const cm = &cpi->common;
+static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                   int *rate, int64_t *distortion, int *skip,
+                                   int64_t *sse, int64_t ref_best_rd,
+                                   BLOCK_SIZE bs) {
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
@@ -1629,8 +1632,8 @@
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 }
 
-static void choose_smallest_tx_size(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                    int64_t *distortion, int *skip,
+static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                    int *rate, int64_t *distortion, int *skip,
                                     int64_t *sse, int64_t ref_best_rd,
                                     BLOCK_SIZE bs) {
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1643,7 +1646,8 @@
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 }
 
-static void choose_tx_size_type_from_rd(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
+                                        MACROBLOCK *x, int *rate,
                                         int64_t *distortion, int *skip,
                                         int64_t *psse, int64_t ref_best_rd,
                                         BLOCK_SIZE bs) {
@@ -1693,7 +1697,7 @@
 #endif
 }
 
-static void super_block_yrd(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
                             int64_t *distortion, int *skip, int64_t *psse,
                             BLOCK_SIZE bs, int64_t ref_best_rd) {
   MACROBLOCKD *xd = &x->e_mbd;
@@ -1733,7 +1737,7 @@
 
 #if CONFIG_PALETTE
 static int rd_pick_palette_intra_sby(
-    AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx,
+    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx,
     int dc_mode_cost, PALETTE_MODE_INFO *palette_mode_info,
     uint8_t *best_palette_color_map, TX_SIZE *best_tx, TX_TYPE *best_tx_type,
     PREDICTION_MODE *mode_selected, int64_t *best_rd) {
@@ -1878,13 +1882,11 @@
 }
 #endif  // CONFIG_PALETTE
 
-static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
-                                     int col, PREDICTION_MODE *best_mode,
-                                     const int *bmode_costs, ENTROPY_CONTEXT *a,
-                                     ENTROPY_CONTEXT *l, int *bestrate,
-                                     int *bestratey, int64_t *bestdistortion,
-                                     BLOCK_SIZE bsize, int *y_skip,
-                                     int64_t rd_thresh) {
+static int64_t rd_pick_intra4x4block(
+    const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
+    PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
+    ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
+    BLOCK_SIZE bsize, int *y_skip, int64_t rd_thresh) {
   PREDICTION_MODE mode;
   MACROBLOCKD *const xd = &x->e_mbd;
   int64_t best_rd = rd_thresh;
@@ -1946,7 +1948,7 @@
                                     dst_stride, xd->bd);
           if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
-            const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+            const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
             const int coeff_ctx =
                 combine_entropy_contexts(*(tempa + idx), *(templ + idy));
 #if CONFIG_NEW_QUANT
@@ -1956,9 +1958,9 @@
             av1_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8,
                             TX_4X4, AV1_XFORM_QUANT_FP);
 #endif  // CONFIG_NEW_QUANT
-            ratey +=
-                av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
-                                so->neighbors, cpi->sf.use_fast_coef_costing);
+            ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+                                     scan_order->scan, scan_order->neighbors,
+                                     cpi->sf.use_fast_coef_costing);
             *(tempa + idx) = !(p->eobs[block] == 0);
             *(templ + idy) = !(p->eobs[block] == 0);
             can_skip &= (p->eobs[block] == 0);
@@ -1971,7 +1973,7 @@
             int64_t dist;
             unsigned int tmp;
             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
-            const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+            const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
             const int coeff_ctx =
                 combine_entropy_contexts(*(tempa + idx), *(templ + idy));
 #if CONFIG_NEW_QUANT
@@ -1982,9 +1984,9 @@
                             TX_4X4, AV1_XFORM_QUANT_FP);
 #endif  // CONFIG_NEW_QUANT
             av1_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
-            ratey +=
-                av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
-                                so->neighbors, cpi->sf.use_fast_coef_costing);
+            ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+                                     scan_order->scan, scan_order->neighbors,
+                                     cpi->sf.use_fast_coef_costing);
             *(tempa + idx) = !(p->eobs[block] == 0);
             *(templ + idy) = !(p->eobs[block] == 0);
             can_skip &= (p->eobs[block] == 0);
@@ -2066,7 +2068,7 @@
 
         if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
-          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+          const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
           const int coeff_ctx =
               combine_entropy_contexts(*(tempa + idx), *(templ + idy));
 #if CONFIG_NEW_QUANT
@@ -2076,9 +2078,9 @@
           av1_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8, TX_4X4,
                           AV1_XFORM_QUANT_B);
 #endif  // CONFIG_NEW_QUANT
-          ratey +=
-              av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
-                              so->neighbors, cpi->sf.use_fast_coef_costing);
+          ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+                                   scan_order->scan, scan_order->neighbors,
+                                   cpi->sf.use_fast_coef_costing);
           *(tempa + idx) = !(p->eobs[block] == 0);
           *(templ + idy) = !(p->eobs[block] == 0);
           can_skip &= (p->eobs[block] == 0);
@@ -2090,7 +2092,7 @@
           int64_t dist;
           unsigned int tmp;
           TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
-          const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+          const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type, 0);
           const int coeff_ctx =
               combine_entropy_contexts(*(tempa + idx), *(templ + idy));
 #if CONFIG_NEW_QUANT
@@ -2101,9 +2103,9 @@
                           AV1_XFORM_QUANT_FP);
 #endif  // CONFIG_NEW_QUANT
           av1_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
-          ratey +=
-              av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
-                              so->neighbors, cpi->sf.use_fast_coef_costing);
+          ratey += av1_cost_coeffs(x, 0, block, coeff_ctx, TX_4X4,
+                                   scan_order->scan, scan_order->neighbors,
+                                   cpi->sf.use_fast_coef_costing);
           *(tempa + idx) = !(p->eobs[block] == 0);
           *(templ + idy) = !(p->eobs[block] == 0);
           can_skip &= (p->eobs[block] == 0);
@@ -2151,10 +2153,10 @@
   return best_rd;
 }
 
-static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
-                                            int *rate, int *rate_y,
-                                            int64_t *distortion, int *y_skip,
-                                            int64_t best_rd) {
+static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
+                                            MACROBLOCK *mb, int *rate,
+                                            int *rate_y, int64_t *distortion,
+                                            int *y_skip, int64_t best_rd) {
   int i, j;
   const MACROBLOCKD *const xd = &mb->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
@@ -2247,11 +2249,11 @@
 
 #if CONFIG_EXT_INTRA
 // Return 1 if an ext intra mode is selected; return 0 otherwise.
-static int rd_pick_ext_intra_sby(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                 int *rate_tokenonly, int64_t *distortion,
-                                 int *skippable, BLOCK_SIZE bsize,
-                                 int mode_cost, int64_t *best_rd,
-                                 uint16_t skip_mask) {
+static int rd_pick_ext_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                 int *rate, int *rate_tokenonly,
+                                 int64_t *distortion, int *skippable,
+                                 BLOCK_SIZE bsize, int mode_cost,
+                                 int64_t *best_rd, uint16_t skip_mask) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *mbmi = &mic->mbmi;
@@ -2310,7 +2312,7 @@
 }
 
 static void pick_intra_angle_routine_sby(
-    AV1_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
+    const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
     int64_t *distortion, int *skippable, int *best_angle_delta,
     TX_SIZE *best_tx_size, TX_TYPE *best_tx_type, INTRA_FILTER *best_filter,
     BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
@@ -2337,10 +2339,11 @@
   }
 }
 
-static int64_t rd_pick_intra_angle_sby(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                       int *rate_tokenonly, int64_t *distortion,
-                                       int *skippable, BLOCK_SIZE bsize,
-                                       int rate_overhead, int64_t best_rd) {
+static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                       int *rate, int *rate_tokenonly,
+                                       int64_t *distortion, int *skippable,
+                                       BLOCK_SIZE bsize, int rate_overhead,
+                                       int64_t best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
   MB_MODE_INFO *mbmi = &mic->mbmi;
@@ -2496,7 +2499,7 @@
 
 static void angle_estimation(const uint8_t *src, int src_stride, int rows,
                              int cols, uint8_t *directional_mode_skip_mask) {
-  int i, r, c, index, dx, dy, temp, sn, remd, quot;
+  int i, r, c, dx, dy, temp, sn, remd, quot;
   uint64_t hist[DIRECTIONAL_MODES];
   uint64_t hist_sum = 0;
 
@@ -2504,6 +2507,7 @@
   src += src_stride;
   for (r = 1; r < rows; ++r) {
     for (c = 1; c < cols; ++c) {
+      uint8_t index;
       dx = src[c] - src[c - 1];
       dy = src[c] - src[c - src_stride];
       temp = dx * dx + dy * dy;
@@ -2526,7 +2530,7 @@
   for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
   for (i = 0; i < INTRA_MODES; ++i) {
     if (i != DC_PRED && i != TM_PRED) {
-      int index = mode_to_angle_bin[i];
+      const uint8_t index = mode_to_angle_bin[i];
       uint64_t score = 2 * hist[index];
       int weight = 2;
       if (index > 0) {
@@ -2547,7 +2551,7 @@
 static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
                                     int rows, int cols,
                                     uint8_t *directional_mode_skip_mask) {
-  int i, r, c, index, dx, dy, temp, sn, remd, quot;
+  int i, r, c, dx, dy, temp, sn, remd, quot;
   uint64_t hist[DIRECTIONAL_MODES];
   uint64_t hist_sum = 0;
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
@@ -2556,6 +2560,7 @@
   src += src_stride;
   for (r = 1; r < rows; ++r) {
     for (c = 1; c < cols; ++c) {
+      uint8_t index;
       dx = src[c] - src[c - 1];
       dy = src[c] - src[c - src_stride];
       temp = dx * dx + dy * dy;
@@ -2578,7 +2583,7 @@
   for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
   for (i = 0; i < INTRA_MODES; ++i) {
     if (i != DC_PRED && i != TM_PRED) {
-      int index = mode_to_angle_bin[i];
+      const uint8_t index = mode_to_angle_bin[i];
       uint64_t score = 2 * hist[index];
       int weight = 2;
       if (index > 0) {
@@ -2598,10 +2603,10 @@
 #endif  // CONFIG_EXT_INTRA
 
 // This function is used only for intra_only frames
-static int64_t rd_pick_intra_sby_mode(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                      int *rate_tokenonly, int64_t *distortion,
-                                      int *skippable, BLOCK_SIZE bsize,
-                                      int64_t best_rd) {
+static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                      int *rate, int *rate_tokenonly,
+                                      int64_t *distortion, int *skippable,
+                                      BLOCK_SIZE bsize, int64_t best_rd) {
   uint8_t mode_idx;
   PREDICTION_MODE mode_selected = DC_PRED;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2625,7 +2630,7 @@
   int beat_best_rd = 0;
 #endif  // CONFIG_EXT_INTRA
   TX_TYPE best_tx_type = DCT_DCT;
-  int *bmode_costs;
+  const int *bmode_costs;
 #if CONFIG_PALETTE
   PALETTE_MODE_INFO palette_mode_info;
   PALETTE_MODE_INFO *const pmi = &mic->mbmi.palette_mode_info;
@@ -2812,7 +2817,7 @@
 
 // Return value 0: early termination triggered, no valid rd cost available;
 //              1: rd cost values are valid.
-static int super_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int *rate,
+static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
                             int64_t *distortion, int *skippable, int64_t *sse,
                             BLOCK_SIZE bsize, int64_t ref_best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2826,7 +2831,6 @@
   if (ref_best_rd < 0) is_cost_valid = 0;
 
   if (is_inter_block(mbmi) && is_cost_valid) {
-    int plane;
     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
       av1_subtract_plane(x, bsize, plane);
   }
@@ -2879,7 +2883,7 @@
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
-  const scan_order *const scan_order =
+  const SCAN_ORDER *const scan_order =
       get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
 
   BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
@@ -3109,7 +3113,6 @@
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
     int bsl = b_height_log2_lookup[bsize];
     int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
-    int i;
     int this_rate;
     int64_t this_dist;
     int64_t this_bsse;
@@ -3248,7 +3251,7 @@
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   if (is_rect_tx_allowed(xd, mbmi)) {
     int rate_rect_tx, skippable_rect_tx = 0;
-    int64_t dist_rect_tx, sse_rect_tx, rd, rd_rect_tx;
+    int64_t dist_rect_tx, sse_rect_tx, rd_rect_tx;
     int tx_size_cat = inter_tx_size_cat_lookup[bsize];
     TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
     TX_SIZE var_tx_size = mbmi->tx_size;
@@ -3529,7 +3532,6 @@
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 
   if (is_inter_block(mbmi) && is_cost_valid) {
-    int plane;
     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
       av1_subtract_plane(x, bsize, plane);
   }
@@ -3593,7 +3595,7 @@
 
 #if CONFIG_PALETTE
 static void rd_pick_palette_intra_sbuv(
-    AV1_COMP *cpi, MACROBLOCK *x, int dc_mode_cost,
+    const AV1_COMP *const cpi, MACROBLOCK *x, int dc_mode_cost,
     PALETTE_MODE_INFO *palette_mode_info, uint8_t *best_palette_color_map,
     PREDICTION_MODE *mode_selected, int64_t *best_rd, int *rate,
     int *rate_tokenonly, int64_t *distortion, int *skippable) {
@@ -3760,10 +3762,10 @@
 
 #if CONFIG_EXT_INTRA
 // Return 1 if an ext intra mode is selected; return 0 otherwise.
-static int rd_pick_ext_intra_sbuv(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                  int *rate_tokenonly, int64_t *distortion,
-                                  int *skippable, BLOCK_SIZE bsize,
-                                  int64_t *best_rd) {
+static int rd_pick_ext_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                  int *rate, int *rate_tokenonly,
+                                  int64_t *distortion, int *skippable,
+                                  BLOCK_SIZE bsize, int64_t *best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int ext_intra_selected_flag = 0;
@@ -3813,12 +3815,10 @@
   }
 }
 
-static void pick_intra_angle_routine_sbuv(AV1_COMP *cpi, MACROBLOCK *x,
-                                          int *rate, int *rate_tokenonly,
-                                          int64_t *distortion, int *skippable,
-                                          int *best_angle_delta,
-                                          BLOCK_SIZE bsize, int rate_overhead,
-                                          int64_t *best_rd) {
+static void pick_intra_angle_routine_sbuv(
+    const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
+    int64_t *distortion, int *skippable, int *best_angle_delta,
+    BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse, this_rd;
@@ -3839,10 +3839,11 @@
   }
 }
 
-static int rd_pick_intra_angle_sbuv(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                    int *rate_tokenonly, int64_t *distortion,
-                                    int *skippable, BLOCK_SIZE bsize,
-                                    int rate_overhead, int64_t best_rd) {
+static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                    int *rate, int *rate_tokenonly,
+                                    int64_t *distortion, int *skippable,
+                                    BLOCK_SIZE bsize, int rate_overhead,
+                                    int64_t best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int this_rate_tokenonly, this_rate, s;
@@ -3909,10 +3910,10 @@
 }
 #endif  // CONFIG_EXT_INTRA
 
-static int64_t rd_pick_intra_sbuv_mode(AV1_COMP *cpi, MACROBLOCK *x, int *rate,
-                                       int *rate_tokenonly, int64_t *distortion,
-                                       int *skippable, BLOCK_SIZE bsize,
-                                       TX_SIZE max_tx_size) {
+static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                       int *rate, int *rate_tokenonly,
+                                       int64_t *distortion, int *skippable,
+                                       BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   PREDICTION_MODE mode;
@@ -4039,7 +4040,7 @@
   return best_rd;
 }
 
-static void choose_intra_uv_mode(AV1_COMP *cpi, MACROBLOCK *const x,
+static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
                                  PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
                                  TX_SIZE max_tx_size, int *rate_uv,
                                  int *rate_uv_tokenonly, int64_t *dist_uv,
@@ -4052,7 +4053,7 @@
   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
 }
 
-static int cost_mv_ref(const AV1_COMP *cpi, PREDICTION_MODE mode,
+static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
 #if CONFIG_REF_MV && CONFIG_EXT_INTER
                        int is_compound,
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
@@ -4148,8 +4149,9 @@
              2);
 #endif  // CONFIG_GLOBAL_MOTION
 
-static int set_and_cost_bmi_mvs(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
-                                int i, PREDICTION_MODE mode, int_mv this_mv[2],
+static int set_and_cost_bmi_mvs(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                MACROBLOCKD *xd, int i, PREDICTION_MODE mode,
+                                int_mv this_mv[2],
                                 int_mv frame_mv[MB_MODE_COUNT]
                                                [TOTAL_REFS_PER_FRAME],
                                 int_mv seg_mvs[TOTAL_REFS_PER_FRAME],
@@ -4322,7 +4324,7 @@
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
 }
 
-static int64_t encode_inter_mb_segment(AV1_COMP *cpi, MACROBLOCK *x,
+static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x,
                                        int64_t best_yrd, int i, int *labelyrate,
                                        int64_t *distortion, int64_t *sse,
                                        ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
@@ -4345,7 +4347,7 @@
   TX_SIZE tx_size = mi->mbmi.tx_size;
 
   TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
-  const scan_order *so = get_scan(tx_size, tx_type, 1);
+  const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type, 1);
   const int num_4x4_w = num_4x4_blocks_wide_txsize_lookup[tx_size];
   const int num_4x4_h = num_4x4_blocks_high_txsize_lookup[tx_size];
 
@@ -4405,8 +4407,9 @@
                  &dist, &ssz);
       thisdistortion += dist;
       thissse += ssz;
-      thisrate += av1_cost_coeffs(x, 0, block, coeff_ctx, tx_size, so->scan,
-                                  so->neighbors, cpi->sf.use_fast_coef_costing);
+      thisrate +=
+          av1_cost_coeffs(x, 0, block, coeff_ctx, tx_size, scan_order->scan,
+                          scan_order->neighbors, cpi->sf.use_fast_coef_costing);
       *(ta + (k & 1)) = !(p->eobs[block] == 0);
       *(tl + (k >> 1)) = !(p->eobs[block] == 0);
 #if CONFIG_EXT_TX
@@ -4499,7 +4502,7 @@
 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
 // TODO(aconverse): Find out if this is still productive then clean up or remove
 static int check_best_zero_mv(
-    const AV1_COMP *cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
+    const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
 #if CONFIG_REF_MV && CONFIG_EXT_INTER
     const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
@@ -4601,8 +4604,9 @@
   return 1;
 }
 
-static void joint_motion_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                                int_mv *frame_mv, int mi_row, int mi_col,
+static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
+                                BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
+                                int mi_col,
 #if CONFIG_EXT_INTER
                                 int_mv *ref_mv_sub8x8[2],
 #endif
@@ -4834,7 +4838,7 @@
 }
 
 static int64_t rd_pick_best_sub8x8_mode(
-    AV1_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
+    const AV1_COMP *const cpi, MACROBLOCK *x, int_mv *best_ref_mv,
     int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
     int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
     int mvthresh,
@@ -4856,7 +4860,7 @@
   int k, br = 0, idx, idy;
   int64_t bd = 0, block_sse = 0;
   PREDICTION_MODE this_mode;
-  AV1_COMMON *cm = &cpi->common;
+  const AV1_COMMON *cm = &cpi->common;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
   const int label_count = 4;
@@ -4917,8 +4921,8 @@
       int_mv mode_mv[MB_MODE_COUNT][2];
       int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
       PREDICTION_MODE mode_selected = ZEROMV;
-      int64_t best_rd = INT64_MAX;
-      const int i = idy * 2 + idx;
+      int64_t new_best_rd = INT64_MAX;
+      const int index = idy * 2 + idx;
       int ref;
 #if CONFIG_REF_MV
       CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
@@ -4933,7 +4937,7 @@
         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
 #if CONFIG_EXT_INTER
         int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
-        av1_update_mv_context(xd, mi, frame, mv_ref_list, i, mi_row, mi_col,
+        av1_update_mv_context(xd, mi, frame, mv_ref_list, index, mi_row, mi_col,
                               NULL);
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_GLOBAL_MOTION
@@ -4942,7 +4946,7 @@
 #else   // CONFIG_GLOBAL_MOTION
         frame_mv[ZEROMV][frame].as_int = 0;
 #endif  // CONFIG_GLOBAL_MOTION
-        av1_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+        av1_append_sub8x8_mvs_for_idx(cm, xd, index, ref, mi_row, mi_col,
 #if CONFIG_REF_MV
                                       ref_mv_stack[ref], &ref_mv_count[ref],
 #endif
@@ -5017,7 +5021,7 @@
         for (ref = 0; ref < 1 + has_second_rf; ++ref)
           bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int;
 #endif  // CONFIG_EXT_INTER
-        bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
+        bsi->rdstat[index][mode_idx].brdcost = INT64_MAX;
         if (!(inter_mode_mask & (1 << this_mode))) continue;
 
 #if CONFIG_REF_MV
@@ -5025,16 +5029,16 @@
 #if !CONFIG_EXT_INTER
         if (filter_idx > 0 && this_mode == NEWMV) {
           BEST_SEG_INFO *ref_bsi = bsi_buf;
-          SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+          SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
 
           if (has_second_rf) {
-            if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+            if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
                     ref_rdstat->mvs[0].as_int &&
                 ref_rdstat->mvs[0].as_int != INVALID_MV)
               if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
                 --run_mv_search;
 
-            if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+            if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
                     ref_rdstat->mvs[1].as_int &&
                 ref_rdstat->mvs[1].as_int != INVALID_MV)
               if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5043,23 +5047,24 @@
             if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
                 ref_rdstat->mvs[0].as_int != INVALID_MV) {
               run_mv_search = 0;
-              seg_mvs[i][mbmi->ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int;
+              seg_mvs[index][mbmi->ref_frame[0]].as_int =
+                  ref_rdstat->mvs[0].as_int;
             }
           }
 
           if (run_mv_search != 0 && filter_idx > 1) {
             ref_bsi = bsi_buf + 1;
-            ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+            ref_rdstat = &ref_bsi->rdstat[index][mode_idx];
             run_mv_search = 2;
 
             if (has_second_rf) {
-              if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+              if (seg_mvs[index][mbmi->ref_frame[0]].as_int ==
                       ref_rdstat->mvs[0].as_int &&
                   ref_rdstat->mvs[0].as_int != INVALID_MV)
                 if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
                   --run_mv_search;
 
-              if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+              if (seg_mvs[index][mbmi->ref_frame[1]].as_int ==
                       ref_rdstat->mvs[1].as_int &&
                   ref_rdstat->mvs[1].as_int != INVALID_MV)
                 if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
@@ -5068,7 +5073,7 @@
               if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
                   ref_rdstat->mvs[0].as_int != INVALID_MV) {
                 run_mv_search = 0;
-                seg_mvs[i][mbmi->ref_frame[0]].as_int =
+                seg_mvs[index][mbmi->ref_frame[0]].as_int =
                     ref_rdstat->mvs[0].as_int;
               }
             }
@@ -5088,24 +5093,24 @@
                                   mbmi_ext->compound_mode_context,
 #endif  // CONFIG_REF_MV && CONFIG_EXT_INTER
                                   frame_mv, this_mode, mbmi->ref_frame, bsize,
-                                  i))
+                                  index))
             continue;
 
         memcpy(orig_pre, pd->pre, sizeof(orig_pre));
-        memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
-               sizeof(bsi->rdstat[i][mode_idx].ta));
-        memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
-               sizeof(bsi->rdstat[i][mode_idx].tl));
+        memcpy(bsi->rdstat[index][mode_idx].ta, t_above,
+               sizeof(bsi->rdstat[index][mode_idx].ta));
+        memcpy(bsi->rdstat[index][mode_idx].tl, t_left,
+               sizeof(bsi->rdstat[index][mode_idx].tl));
 
         // motion search for newmv (single predictor case only)
         if (!has_second_rf &&
 #if CONFIG_EXT_INTER
             have_newmv_in_inter_mode(this_mode) &&
-            (seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+            (seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
              av1_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
 #else
             this_mode == NEWMV &&
-            (seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+            (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV ||
              run_mv_search)
 #endif  // CONFIG_EXT_INTER
                 ) {
@@ -5122,7 +5127,7 @@
 
           /* Is the best so far sufficiently good that we cant justify doing
            * and new motion search. */
-          if (best_rd < label_mv_thresh) break;
+          if (new_best_rd < label_mv_thresh) break;
 
           if (cpi->oxcf.mode != BEST) {
 #if CONFIG_EXT_INTER
@@ -5130,18 +5135,18 @@
 #else
 // use previous block's result as next block's MV predictor.
 #if !CONFIG_REF_MV
-            if (i > 0) {
-              bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
-              if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
+            if (index > 0) {
+              bsi->mvp.as_int = mi->bmi[index - 1].as_mv[0].as_int;
+              if (index == 2)
+                bsi->mvp.as_int = mi->bmi[index - 2].as_mv[0].as_int;
             }
 #endif
 #endif  // CONFIG_EXT_INTER
           }
-          if (i == 0)
-            max_mv = x->max_mv_context[mbmi->ref_frame[0]];
-          else
-            max_mv =
-                AOMMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+          max_mv = (index == 0) ? (int)x->max_mv_context[mbmi->ref_frame[0]]
+                                : AOMMAX(abs(bsi->mvp.as_mv.row),
+                                         abs(bsi->mvp.as_mv.col)) >>
+                                      3;
 
           if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
             // Take wtd average of the step_params based on the last frame's
@@ -5168,7 +5173,7 @@
           }
 
           // adjust src pointer for this block
-          mi_buf_shift(x, i);
+          mi_buf_shift(x, index);
 
           av1_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
 
@@ -5197,7 +5202,6 @@
               const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
               const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
               // Use up-sampled reference frames.
-              struct macroblockd_plane *const pd = &xd->plane[0];
               struct buf_2d backup_pred = pd->pre[0];
               const YV12_BUFFER_CONFIG *upsampled_ref =
                   get_upsampled_ref(cpi, mbmi->ref_frame[0]);
@@ -5211,7 +5215,7 @@
 
               // adjust pred pointer for this block
               pd->pre[0].buf =
-                  &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, i,
+                  &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, index,
                                                            pd->pre[0].stride))
                                   << 3];
 
@@ -5265,9 +5269,9 @@
 
 // save motion search result for use in compound prediction
 #if CONFIG_EXT_INTER
-            seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+            seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
 #else
-            seg_mvs[i][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+            seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
 #endif  // CONFIG_EXT_INTER
           }
 
@@ -5286,11 +5290,11 @@
 
         if (has_second_rf) {
 #if CONFIG_EXT_INTER
-          if (seg_mvs[i][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
-              seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
+          if (seg_mvs[index][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+              seg_mvs[index][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
 #else
-          if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
-              seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
+          if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+              seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV)
 #endif  // CONFIG_EXT_INTER
             continue;
         }
@@ -5312,26 +5316,26 @@
 #endif
         {
           // adjust src pointers
-          mi_buf_shift(x, i);
+          mi_buf_shift(x, index);
           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
             int rate_mv;
             joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
                                 mi_col,
 #if CONFIG_EXT_INTER
-                                bsi->ref_mv, seg_mvs[i][mv_idx],
+                                bsi->ref_mv, seg_mvs[index][mv_idx],
 #else
-                                seg_mvs[i],
+                                seg_mvs[index],
 #endif  // CONFIG_EXT_INTER
-                                &rate_mv, i);
+                                &rate_mv, index);
 #if CONFIG_EXT_INTER
-            compound_seg_newmvs[i][0].as_int =
+            compound_seg_newmvs[index][0].as_int =
                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
-            compound_seg_newmvs[i][1].as_int =
+            compound_seg_newmvs[index][1].as_int =
                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
 #else
-            seg_mvs[i][mbmi->ref_frame[0]].as_int =
+            seg_mvs[index][mbmi->ref_frame[0]].as_int =
                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
-            seg_mvs[i][mbmi->ref_frame[1]].as_int =
+            seg_mvs[index][mbmi->ref_frame[1]].as_int =
                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
 #endif  // CONFIG_EXT_INTER
           }
@@ -5339,42 +5343,42 @@
           mi_buf_restore(x, orig_src, orig_pre);
         }
 
-        bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
-            cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+        bsi->rdstat[index][mode_idx].brate = set_and_cost_bmi_mvs(
+            cpi, x, xd, index, this_mode, mode_mv[this_mode], frame_mv,
 #if CONFIG_EXT_INTER
-            seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+            seg_mvs[index][mv_idx], compound_seg_newmvs[index],
 #else
-            seg_mvs[i],
+            seg_mvs[index],
 #endif  // CONFIG_EXT_INTER
             bsi->ref_mv, x->nmvjointcost, x->mvcost);
 
         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
-          bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+          bsi->rdstat[index][mode_idx].mvs[ref].as_int =
               mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+            bsi->rdstat[index + 1][mode_idx].mvs[ref].as_int =
                 mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+            bsi->rdstat[index + 2][mode_idx].mvs[ref].as_int =
                 mode_mv[this_mode][ref].as_int;
 #if CONFIG_REF_MV
-          bsi->rdstat[i][mode_idx].pred_mv[ref].as_int =
-              mi->bmi[i].pred_mv[ref].as_int;
+          bsi->rdstat[index][mode_idx].pred_mv[ref].as_int =
+              mi->bmi[index].pred_mv[ref].as_int;
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].pred_mv[ref].as_int =
-                mi->bmi[i].pred_mv[ref].as_int;
+            bsi->rdstat[index + 1][mode_idx].pred_mv[ref].as_int =
+                mi->bmi[index].pred_mv[ref].as_int;
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].pred_mv[ref].as_int =
-                mi->bmi[i].pred_mv[ref].as_int;
+            bsi->rdstat[index + 2][mode_idx].pred_mv[ref].as_int =
+                mi->bmi[index].pred_mv[ref].as_int;
 #endif
 #if CONFIG_EXT_INTER
-          bsi->rdstat[i][mode_idx].ref_mv[ref].as_int =
+          bsi->rdstat[index][mode_idx].ref_mv[ref].as_int =
               bsi->ref_mv[ref]->as_int;
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].ref_mv[ref].as_int =
+            bsi->rdstat[index + 1][mode_idx].ref_mv[ref].as_int =
                 bsi->ref_mv[ref]->as_int;
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].ref_mv[ref].as_int =
+            bsi->rdstat[index + 2][mode_idx].ref_mv[ref].as_int =
                 bsi->ref_mv[ref]->as_int;
 #endif  // CONFIG_EXT_INTER
         }
@@ -5393,17 +5397,18 @@
             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
 #if CONFIG_EXT_INTER
             if (have_newmv_in_inter_mode(this_mode))
-              have_ref &= ((mode_mv[this_mode][ref].as_int ==
-                            ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
-                           (bsi->ref_mv[ref]->as_int ==
-                            ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+              have_ref &=
+                  ((mode_mv[this_mode][ref].as_int ==
+                    ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
+                   (bsi->ref_mv[ref]->as_int ==
+                    ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
             else
 #endif  // CONFIG_EXT_INTER
               have_ref &= mode_mv[this_mode][ref].as_int ==
-                          ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+                          ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
           }
 
-          have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+          have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
 
           if (filter_idx > 1 && !subpelmv && !have_ref) {
             ref_bsi = bsi_buf + 1;
@@ -5411,118 +5416,126 @@
             for (ref = 0; ref < 1 + has_second_rf; ++ref)
 #if CONFIG_EXT_INTER
               if (have_newmv_in_inter_mode(this_mode))
-                have_ref &= ((mode_mv[this_mode][ref].as_int ==
-                              ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
-                             (bsi->ref_mv[ref]->as_int ==
-                              ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+                have_ref &=
+                    ((mode_mv[this_mode][ref].as_int ==
+                      ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) &&
+                     (bsi->ref_mv[ref]->as_int ==
+                      ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int));
               else
 #endif  // CONFIG_EXT_INTER
                 have_ref &= mode_mv[this_mode][ref].as_int ==
-                            ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+                            ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int;
 
-            have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+            have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0;
           }
 
           if (!subpelmv && have_ref &&
-              ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
+              ref_bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
 #if CONFIG_REF_MV
-            bsi->rdstat[i][mode_idx].byrate =
-                ref_bsi->rdstat[i][mode_idx].byrate;
-            bsi->rdstat[i][mode_idx].bdist = ref_bsi->rdstat[i][mode_idx].bdist;
-            bsi->rdstat[i][mode_idx].bsse = ref_bsi->rdstat[i][mode_idx].bsse;
-            bsi->rdstat[i][mode_idx].brate +=
-                ref_bsi->rdstat[i][mode_idx].byrate;
-            bsi->rdstat[i][mode_idx].eobs = ref_bsi->rdstat[i][mode_idx].eobs;
+            bsi->rdstat[index][mode_idx].byrate =
+                ref_bsi->rdstat[index][mode_idx].byrate;
+            bsi->rdstat[index][mode_idx].bdist =
+                ref_bsi->rdstat[index][mode_idx].bdist;
+            bsi->rdstat[index][mode_idx].bsse =
+                ref_bsi->rdstat[index][mode_idx].bsse;
+            bsi->rdstat[index][mode_idx].brate +=
+                ref_bsi->rdstat[index][mode_idx].byrate;
+            bsi->rdstat[index][mode_idx].eobs =
+                ref_bsi->rdstat[index][mode_idx].eobs;
 
-            bsi->rdstat[i][mode_idx].brdcost =
-                RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate,
-                       bsi->rdstat[i][mode_idx].bdist);
+            bsi->rdstat[index][mode_idx].brdcost =
+                RDCOST(x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate,
+                       bsi->rdstat[index][mode_idx].bdist);
 
-            memcpy(bsi->rdstat[i][mode_idx].ta, ref_bsi->rdstat[i][mode_idx].ta,
-                   sizeof(bsi->rdstat[i][mode_idx].ta));
-            memcpy(bsi->rdstat[i][mode_idx].tl, ref_bsi->rdstat[i][mode_idx].tl,
-                   sizeof(bsi->rdstat[i][mode_idx].tl));
+            memcpy(bsi->rdstat[index][mode_idx].ta,
+                   ref_bsi->rdstat[index][mode_idx].ta,
+                   sizeof(bsi->rdstat[index][mode_idx].ta));
+            memcpy(bsi->rdstat[index][mode_idx].tl,
+                   ref_bsi->rdstat[index][mode_idx].tl,
+                   sizeof(bsi->rdstat[index][mode_idx].tl));
 #else
-            memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
-                   sizeof(SEG_RDSTAT));
+            memcpy(&bsi->rdstat[index][mode_idx],
+                   &ref_bsi->rdstat[index][mode_idx], sizeof(SEG_RDSTAT));
 #endif
             if (num_4x4_blocks_wide > 1)
-              bsi->rdstat[i + 1][mode_idx].eobs =
-                  ref_bsi->rdstat[i + 1][mode_idx].eobs;
+              bsi->rdstat[index + 1][mode_idx].eobs =
+                  ref_bsi->rdstat[index + 1][mode_idx].eobs;
             if (num_4x4_blocks_high > 1)
-              bsi->rdstat[i + 2][mode_idx].eobs =
-                  ref_bsi->rdstat[i + 2][mode_idx].eobs;
+              bsi->rdstat[index + 2][mode_idx].eobs =
+                  ref_bsi->rdstat[index + 2][mode_idx].eobs;
 
-            if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+            if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
 #if CONFIG_REF_MV
               // If the NEWMV mode is using the same motion vector as the
               // NEARESTMV mode, skip the rest rate-distortion calculations
               // and use the inferred motion vector modes.
               if (this_mode == NEWMV) {
                 if (has_second_rf) {
-                  if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+                  if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
                           bsi->ref_mv[0]->as_int &&
-                      bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+                      bsi->rdstat[index][mode_idx].mvs[1].as_int ==
                           bsi->ref_mv[1]->as_int)
                     continue;
                 } else {
-                  if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+                  if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
                       bsi->ref_mv[0]->as_int)
                     continue;
                 }
               }
 #endif
               mode_selected = this_mode;
-              best_rd = bsi->rdstat[i][mode_idx].brdcost;
+              new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
             }
             continue;
           }
         }
 
-        bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
-            cpi, x, bsi->segment_rd - this_segment_rd, i,
-            &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
-            &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
-            bsi->rdstat[i][mode_idx].tl, idy, idx, mi_row, mi_col);
+        bsi->rdstat[index][mode_idx].brdcost = encode_inter_mb_segment(
+            cpi, x, bsi->segment_rd - this_segment_rd, index,
+            &bsi->rdstat[index][mode_idx].byrate,
+            &bsi->rdstat[index][mode_idx].bdist,
+            &bsi->rdstat[index][mode_idx].bsse, bsi->rdstat[index][mode_idx].ta,
+            bsi->rdstat[index][mode_idx].tl, idy, idx, mi_row, mi_col);
 
-        if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
-          bsi->rdstat[i][mode_idx].brdcost +=
-              RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
-          bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
-          bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
+        if (bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) {
+          bsi->rdstat[index][mode_idx].brdcost += RDCOST(
+              x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, 0);
+          bsi->rdstat[index][mode_idx].brate +=
+              bsi->rdstat[index][mode_idx].byrate;
+          bsi->rdstat[index][mode_idx].eobs = p->eobs[index];
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
+            bsi->rdstat[index + 1][mode_idx].eobs = p->eobs[index + 1];
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
+            bsi->rdstat[index + 2][mode_idx].eobs = p->eobs[index + 2];
         }
 
-        if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+        if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) {
 #if CONFIG_REF_MV
           // If the NEWMV mode is using the same motion vector as the
           // NEARESTMV mode, skip the rest rate-distortion calculations
           // and use the inferred motion vector modes.
           if (this_mode == NEWMV) {
             if (has_second_rf) {
-              if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+              if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
                       bsi->ref_mv[0]->as_int &&
-                  bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+                  bsi->rdstat[index][mode_idx].mvs[1].as_int ==
                       bsi->ref_mv[1]->as_int)
                 continue;
             } else {
-              if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+              if (bsi->rdstat[index][mode_idx].mvs[0].as_int ==
                   bsi->ref_mv[0]->as_int)
                 continue;
             }
           }
 #endif
           mode_selected = this_mode;
-          best_rd = bsi->rdstat[i][mode_idx].brdcost;
+          new_best_rd = bsi->rdstat[index][mode_idx].brdcost;
         }
       } /*for each 4x4 mode*/
 
-      if (best_rd == INT64_MAX) {
+      if (new_best_rd == INT64_MAX) {
         int iy, midx;
-        for (iy = i + 1; iy < 4; ++iy)
+        for (iy = index + 1; iy < 4; ++iy)
 #if CONFIG_EXT_INTER
           for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
 #else
@@ -5534,33 +5547,33 @@
       }
 
       mode_idx = INTER_OFFSET(mode_selected);
-      memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
-      memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
+      memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above));
+      memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left));
 
 #if CONFIG_EXT_INTER
       mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
-      bsi->ref_mv[0]->as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
+      bsi->ref_mv[0]->as_int = bsi->rdstat[index][mode_idx].ref_mv[0].as_int;
       if (has_second_rf)
-        bsi->ref_mv[1]->as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int;
+        bsi->ref_mv[1]->as_int = bsi->rdstat[index][mode_idx].ref_mv[1].as_int;
 #endif  // CONFIG_EXT_INTER
-      set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
-                           frame_mv,
+      set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected,
+                           mode_mv[mode_selected], frame_mv,
 #if CONFIG_EXT_INTER
-                           seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+                           seg_mvs[index][mv_idx], compound_seg_newmvs[index],
 #else
-                           seg_mvs[i],
+                           seg_mvs[index],
 #endif  // CONFIG_EXT_INTER
                            bsi->ref_mv, x->nmvjointcost, x->mvcost);
 
-      br += bsi->rdstat[i][mode_idx].brate;
-      bd += bsi->rdstat[i][mode_idx].bdist;
-      block_sse += bsi->rdstat[i][mode_idx].bsse;
-      segmentyrate += bsi->rdstat[i][mode_idx].byrate;
-      this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
+      br += bsi->rdstat[index][mode_idx].brate;
+      bd += bsi->rdstat[index][mode_idx].bdist;
+      block_sse += bsi->rdstat[index][mode_idx].bsse;
+      segmentyrate += bsi->rdstat[index][mode_idx].byrate;
+      this_segment_rd += bsi->rdstat[index][mode_idx].brdcost;
 
       if (this_segment_rd > bsi->segment_rd) {
         int iy, midx;
-        for (iy = i + 1; iy < 4; ++iy)
+        for (iy = index + 1; iy < 4; ++iy)
 #if CONFIG_EXT_INTER
           for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
 #else
@@ -5772,7 +5785,7 @@
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
 }
 
-static void setup_buffer_inter(AV1_COMP *cpi, MACROBLOCK *x,
+static void setup_buffer_inter(const AV1_COMP *const cpi, MACROBLOCK *x,
                                MV_REFERENCE_FRAME ref_frame,
                                BLOCK_SIZE block_size, int mi_row, int mi_col,
                                int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
@@ -5817,8 +5830,8 @@
                 block_size);
 }
 
-static void single_motion_search(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
-                                 int mi_row, int mi_col,
+static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
 #if CONFIG_EXT_INTER
                                  int ref_idx, int mv_idx,
 #endif  // CONFIG_EXT_INTER
@@ -5908,9 +5921,9 @@
           x->best_mv.as_int = INVALID_MV;
 
           if (scaled_ref_frame) {
-            int i;
-            for (i = 0; i < MAX_MB_PLANE; ++i)
-              xd->plane[i].pre[ref_idx] = backup_yv12[i];
+            int j;
+            for (j = 0; j < MAX_MB_PLANE; ++j)
+              xd->plane[j].pre[ref_idx] = backup_yv12[j];
           }
           return;
         }
@@ -6063,7 +6076,7 @@
 }
 
 #if CONFIG_EXT_INTER
-static void do_masked_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
+static void do_masked_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     const uint8_t *mask, int mask_stride,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     int_mv *tmp_mv, int *rate_mv, int ref_idx,
@@ -6086,6 +6099,7 @@
 
   const YV12_BUFFER_CONFIG *scaled_ref_frame =
       av1_get_scaled_ref_frame(cpi, ref);
+  int i;
 
   MV pred_mv[3];
   pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
@@ -6097,7 +6111,6 @@
 #endif
 
   if (scaled_ref_frame) {
-    int i;
     // Swap out the reference frame for a version that's been scaled to
     // match the resolution of the current frame, allowing the existing
     // motion search code to be used without additional modifications.
@@ -6139,7 +6152,6 @@
 
     // prev_mv_sad is not setup for dynamically scaled frames.
     if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
-      int i;
       for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
         if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
           x->pred_mv[ref].row = 0;
@@ -6147,9 +6159,9 @@
           tmp_mv->as_int = INVALID_MV;
 
           if (scaled_ref_frame) {
-            int i;
-            for (i = 0; i < MAX_MB_PLANE; ++i)
-              xd->plane[i].pre[ref_idx] = backup_yv12[i];
+            int j;
+            for (j = 0; j < MAX_MB_PLANE; ++j)
+              xd->plane[j].pre[ref_idx] = backup_yv12[j];
           }
           return;
         }
@@ -6188,18 +6200,17 @@
     x->pred_mv[ref] = tmp_mv->as_mv;
 
   if (scaled_ref_frame) {
-    int i;
     for (i = 0; i < MAX_MB_PLANE; i++)
       xd->plane[i].pre[ref_idx] = backup_yv12[i];
   }
 }
 
-static void do_masked_motion_search_indexed(AV1_COMP *cpi, MACROBLOCK *x,
-                                            int wedge_index, int wedge_sign,
-                                            BLOCK_SIZE bsize, int mi_row,
-                                            int mi_col, int_mv *tmp_mv,
-                                            int *rate_mv, int mv_idx[2],
-                                            int which) {
+static void do_masked_motion_search_indexed(const AV1_COMP *const cpi,
+                                            MACROBLOCK *x, int wedge_index,
+                                            int wedge_sign, BLOCK_SIZE bsize,
+                                            int mi_row, int mi_col,
+                                            int_mv *tmp_mv, int *rate_mv,
+                                            int mv_idx[2], int which) {
   // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
@@ -6228,7 +6239,7 @@
 // However, once established that vector may be usable through the nearest and
 // near mv modes to reduce distortion in subsequent blocks and also improve
 // visual quality.
-static int discount_newmv_test(const AV1_COMP *cpi, int this_mode,
+static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
                                int_mv this_mv,
                                int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
                                int ref_frame) {
@@ -6594,7 +6605,7 @@
 #endif  // CONFIG_EXT_INTER
 
 static int64_t handle_inter_mode(
-    AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
+    const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
     int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
     int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
     int mi_col,
@@ -6613,7 +6624,7 @@
     InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME],
     int (*single_skippable)[TOTAL_REFS_PER_FRAME], int64_t *psse,
     const int64_t ref_best_rd) {
-  AV1_COMMON *cm = &cpi->common;
+  const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
@@ -7108,21 +7119,21 @@
 #endif  // CONFIG_MOTION_VAR
 
   if (is_comp_pred && is_interinter_wedge_used(bsize)) {
-    int rate_sum, rs;
+    int rate_sum, rs2;
     int64_t dist_sum;
     int64_t best_rd_nowedge = INT64_MAX;
     int64_t best_rd_wedge = INT64_MAX;
     int tmp_skip_txfm_sb;
     int64_t tmp_skip_sse_sb;
 
-    rs = av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+    rs2 = av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
     mbmi->use_wedge_interinter = 0;
     av1_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
     av1_subtract_plane(x, bsize, 0);
     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                              &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
     if (rd != INT64_MAX)
-      rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+      rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
     best_rd_nowedge = rd;
 
     // Disbale wedge search if source variance is small
@@ -7135,8 +7146,8 @@
       int strides[1] = { bw };
 
       mbmi->use_wedge_interinter = 1;
-      rs = av1_cost_literal(get_interinter_wedge_bits(bsize)) +
-           av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+      rs2 = av1_cost_literal(get_interinter_wedge_bits(bsize)) +
+            av1_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
 
       av1_build_inter_predictors_for_planes_single_buf(
           xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
@@ -7145,7 +7156,7 @@
 
       // Choose the best wedge
       best_rd_wedge = pick_interinter_wedge(cpi, x, bsize, pred0, pred1);
-      best_rd_wedge += RDCOST(x->rdmult, x->rddiv, rs + rate_mv, 0);
+      best_rd_wedge += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
 
       if (have_newmv_in_inter_mode(this_mode)) {
         int_mv tmp_mv[2];
@@ -7176,7 +7187,8 @@
         av1_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
         model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
                         &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
-        rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+        rd =
+            RDCOST(x->rdmult, x->rddiv, rs2 + tmp_rate_mv + rate_sum, dist_sum);
         if (rd < best_rd_wedge) {
           best_rd_wedge = rd;
         } else {
@@ -7191,7 +7203,7 @@
             estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                                 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
         if (rd != INT64_MAX)
-          rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum,
+          rd = RDCOST(x->rdmult, x->rddiv, rs2 + tmp_rate_mv + rate_sum,
                       dist_sum);
         best_rd_wedge = rd;
 
@@ -7216,7 +7228,7 @@
             estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
                                 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
         if (rd != INT64_MAX)
-          rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+          rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
         best_rd_wedge = rd;
         if (best_rd_wedge < best_rd_nowedge) {
           mbmi->use_wedge_interinter = 1;
@@ -7464,7 +7476,6 @@
   for (mbmi->motion_mode = SIMPLE_TRANSLATION;
        mbmi->motion_mode < (allow_motvar ? MOTION_MODES : 1);
        mbmi->motion_mode++) {
-    int64_t tmp_rd;
 #if CONFIG_EXT_INTER
     int tmp_rate2 = mbmi->motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff
                                                             : rate2_nocoeff;
@@ -7752,10 +7763,10 @@
   return 0;  // The rate-distortion cost will be re-calculated by caller.
 }
 
-void av1_rd_pick_intra_mode_sb(AV1_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
-                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-                               int64_t best_rd) {
-  AV1_COMMON *const cm = &cpi->common;
+void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
+                               RD_COST *rd_cost, BLOCK_SIZE bsize,
+                               PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = xd->plane;
   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
@@ -7800,7 +7811,7 @@
 }
 
 // Do we have an internal image edge (e.g. formatting bars).
-int av1_internal_image_edge(AV1_COMP *cpi) {
+int av1_internal_image_edge(const AV1_COMP *cpi) {
   return (cpi->oxcf.pass == 2) &&
          ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
           (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
@@ -7809,14 +7820,14 @@
 // Checks to see if a super block is on a horizontal image edge.
 // In most cases this is the "real" edge unless there are formatting
 // bars embedded in the stream.
-int av1_active_h_edge(AV1_COMP *cpi, int mi_row, int mi_step) {
+int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
   int top_edge = 0;
   int bottom_edge = cpi->common.mi_rows;
   int is_active_h_edge = 0;
 
   // For two pass account for any formatting bars detected.
   if (cpi->oxcf.pass == 2) {
-    TWO_PASS *twopass = &cpi->twopass;
+    const TWO_PASS *const twopass = &cpi->twopass;
 
     // The inactive region is specified in MBs not mi units.
     // The image edge is in the following MB row.
@@ -7836,14 +7847,14 @@
 // Checks to see if a super block is on a vertical image edge.
 // In most cases this is the "real" edge unless there are formatting
 // bars embedded in the stream.
-int av1_active_v_edge(AV1_COMP *cpi, int mi_col, int mi_step) {
+int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
   int left_edge = 0;
   int right_edge = cpi->common.mi_cols;
   int is_active_v_edge = 0;
 
   // For two pass account for any formatting bars detected.
   if (cpi->oxcf.pass == 2) {
-    TWO_PASS *twopass = &cpi->twopass;
+    const TWO_PASS *const twopass = &cpi->twopass;
 
     // The inactive region is specified in MBs not mi units.
     // The image edge is in the following MB row.
@@ -7863,13 +7874,13 @@
 // Checks to see if a super block is at the edge of the active image.
 // In most cases this is the "real" edge unless there are formatting
 // bars embedded in the stream.
-int av1_active_edge_sb(AV1_COMP *cpi, int mi_row, int mi_col) {
+int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
   return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
          av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
 }
 
 #if CONFIG_PALETTE
-static void restore_uv_color_map(AV1_COMP *cpi, MACROBLOCK *x) {
+static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
@@ -7920,10 +7931,10 @@
 
 #if CONFIG_EXT_INTRA
 static void pick_ext_intra_interframe(
-    AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
-    int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
-    PREDICTION_MODE *mode_uv, EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv,
-    int8_t *uv_angle_delta,
+    const AV1_COMP *const cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+    BLOCK_SIZE bsize, int *rate_uv_intra, int *rate_uv_tokenonly,
+    int64_t *dist_uv, int *skip_uv, PREDICTION_MODE *mode_uv,
+    EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv, int8_t *uv_angle_delta,
 #if CONFIG_PALETTE
     PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
 #endif  // CONFIG_PALETTE
@@ -7934,7 +7945,7 @@
     int *returnrate_nocoef,
 #endif  // CONFIG_SUPERTX
     int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_COST *rd_cost) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 #if CONFIG_PALETTE
@@ -8096,7 +8107,7 @@
                                       int left_stride);
 #endif  // CONFIG_MOTION_VAR
 
-void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
+void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
                                MACROBLOCK *x, int mi_row, int mi_col,
                                RD_COST *rd_cost,
 #if CONFIG_SUPERTX
@@ -8104,9 +8115,9 @@
 #endif  // CONFIG_SUPERTX
                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
                                int64_t best_rd_so_far) {
-  AV1_COMMON *const cm = &cpi->common;
-  RD_OPT *const rd_opt = &cpi->rd;
-  SPEED_FEATURES *const sf = &cpi->sf;
+  const AV1_COMMON *const cm = &cpi->common;
+  const RD_OPT *const rd_opt = &cpi->rd;
+  const SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 #if CONFIG_PALETTE
@@ -8160,8 +8171,8 @@
   unsigned int best_pred_sse = UINT_MAX;
   PREDICTION_MODE best_intra_mode = DC_PRED;
   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
-  int64_t dist_uv[TX_SIZES];
-  int skip_uv[TX_SIZES];
+  int64_t dist_uvs[TX_SIZES];
+  int skip_uvs[TX_SIZES];
   PREDICTION_MODE mode_uv[TX_SIZES];
 #if CONFIG_PALETTE
   PALETTE_MODE_INFO pmi_uv[TX_SIZES];
@@ -8711,8 +8722,8 @@
                               [pd->subsampling_y];
       if (rate_uv_intra[uv_tx] == INT_MAX) {
         choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
-                             &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
-                             &skip_uv[uv_tx], &mode_uv[uv_tx]);
+                             &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
+                             &skip_uvs[uv_tx], &mode_uv[uv_tx]);
 #if CONFIG_PALETTE
         if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
 #endif  // CONFIG_PALETTE
@@ -8724,8 +8735,8 @@
       }
 
       rate_uv = rate_uv_tokenonly[uv_tx];
-      distortion_uv = dist_uv[uv_tx];
-      skippable = skippable && skip_uv[uv_tx];
+      distortion_uv = dist_uvs[uv_tx];
+      skippable = skippable && skip_uvs[uv_tx];
       mbmi->uv_mode = mode_uv[uv_tx];
 #if CONFIG_PALETTE
       if (cm->allow_screen_content_tools) {
@@ -8943,7 +8954,6 @@
                                                   };
             int dummy_single_skippable[MB_MODE_COUNT]
                                       [TOTAL_REFS_PER_FRAME] = { { 0 } };
-            int dummy_disable_skip = 0;
 #if CONFIG_EXT_INTER
             int_mv dummy_single_newmvs[2][TOTAL_REFS_PER_FRAME] = { { { 0 } },
                                                                     { { 0 } } };
@@ -9325,7 +9335,7 @@
     int best_rate_nocoef;
 #endif
     int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
-    int skippable = 0, rate_overhead = 0;
+    int skippable = 0;
     TX_SIZE best_tx_size, uv_tx;
     TX_TYPE best_tx_type;
     PALETTE_MODE_INFO palette_mode_info;
@@ -9333,6 +9343,7 @@
         x->palette_buffer->best_palette_color_map;
     uint8_t *const color_map = xd->plane[0].color_index_map;
 
+    rate_overhead = 0;
     mbmi->mode = DC_PRED;
     mbmi->uv_mode = DC_PRED;
     mbmi->ref_frame[0] = INTRA_FRAME;
@@ -9358,8 +9369,8 @@
                             [xd->plane[1].subsampling_y];
     if (rate_uv_intra[uv_tx] == INT_MAX) {
       choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
-                           &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
-                           &skip_uv[uv_tx], &mode_uv[uv_tx]);
+                           &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
+                           &skip_uvs[uv_tx], &mode_uv[uv_tx]);
       pmi_uv[uv_tx] = *pmi;
 #if CONFIG_EXT_INTRA
       ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
@@ -9381,8 +9392,8 @@
           ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
     }
 #endif  // CONFIG_EXT_INTRA
-    skippable = skippable && skip_uv[uv_tx];
-    distortion2 = distortion_y + dist_uv[uv_tx];
+    skippable = skippable && skip_uvs[uv_tx];
+    distortion2 = distortion_y + dist_uvs[uv_tx];
     rate2 = rate_y + rate_overhead + rate_uv_intra[uv_tx];
     rate2 += ref_costs_single[INTRA_FRAME];
 
@@ -9427,8 +9438,8 @@
       !dc_skipped && best_mode_index >= 0 &&
       best_intra_rd < (best_rd + (best_rd >> 3))) {
     pick_ext_intra_interframe(
-        cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uv, skip_uv,
-        mode_uv, ext_intra_mode_info_uv, uv_angle_delta,
+        cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uvs,
+        skip_uvs, mode_uv, ext_intra_mode_info_uv, uv_angle_delta,
 #if CONFIG_PALETTE
         pmi_uv, palette_ctx,
 #endif  // CONFIG_PALETTE
@@ -9466,7 +9477,6 @@
 #endif  // CONFIG_GLOBAL_MOTION
 #if CONFIG_REF_MV
     if (!comp_pred_mode) {
-      int i;
       int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
                         ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
                         : INT_MAX;
@@ -9496,7 +9506,6 @@
         nearmv[1] = frame_mv[NEARMV][refs[1]];
       }
 #else
-      int i;
       int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
                         ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
                         : INT_MAX;
@@ -9677,12 +9686,12 @@
 #endif  // CONFIG_PALETTE
 }
 
-void av1_rd_pick_inter_mode_sb_seg_skip(AV1_COMP *cpi, TileDataEnc *tile_data,
-                                        MACROBLOCK *x, RD_COST *rd_cost,
-                                        BLOCK_SIZE bsize,
+void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
+                                        TileDataEnc *tile_data, MACROBLOCK *x,
+                                        RD_COST *rd_cost, BLOCK_SIZE bsize,
                                         PICK_MODE_CONTEXT *ctx,
                                         int64_t best_rd_so_far) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   unsigned char segment_id = mbmi->segment_id;
@@ -9813,17 +9822,18 @@
   store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
 }
 
-void av1_rd_pick_inter_mode_sub8x8(struct AV1_COMP *cpi, TileDataEnc *tile_data,
-                                   struct macroblock *x, int mi_row, int mi_col,
+void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
+                                   TileDataEnc *tile_data, struct macroblock *x,
+                                   int mi_row, int mi_col,
                                    struct RD_COST *rd_cost,
 #if CONFIG_SUPERTX
                                    int *returnrate_nocoef,
 #endif  // CONFIG_SUPERTX
                                    BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
                                    int64_t best_rd_so_far) {
-  AV1_COMMON *const cm = &cpi->common;
-  RD_OPT *const rd_opt = &cpi->rd;
-  SPEED_FEATURES *const sf = &cpi->sf;
+  const AV1_COMMON *const cm = &cpi->common;
+  const RD_OPT *const rd_opt = &cpi->rd;
+  const SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -9950,7 +9960,6 @@
     int rate2 = 0, rate_y = 0, rate_uv = 0;
     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
     int skippable = 0;
-    int i;
     int this_skip2 = 0;
     int64_t total_sse = INT_MAX;
 
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 584c439..16afaf3 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -29,20 +29,20 @@
 int av1_cost_coeffs(MACROBLOCK *x, int plane, int block, int coeff_ctx,
                     TX_SIZE tx_size, const int16_t *scan, const int16_t *nb,
                     int use_fast_coef_costing);
-void av1_rd_pick_intra_mode_sb(struct AV1_COMP *cpi, struct macroblock *x,
+void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
                                struct RD_COST *rd_cost, BLOCK_SIZE bsize,
                                PICK_MODE_CONTEXT *ctx, int64_t best_rd);
 
-unsigned int av1_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
                                            const struct buf_2d *ref,
                                            BLOCK_SIZE bs);
 #if CONFIG_AOM_HIGHBITDEPTH
-unsigned int av1_high_get_sby_perpixel_variance(AV1_COMP *cpi,
+unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
                                                 const struct buf_2d *ref,
                                                 BLOCK_SIZE bs, int bd);
 #endif
 
-void av1_rd_pick_inter_mode_sb(struct AV1_COMP *cpi,
+void av1_rd_pick_inter_mode_sb(const struct AV1_COMP *cpi,
                                struct TileDataEnc *tile_data,
                                struct macroblock *x, int mi_row, int mi_col,
                                struct RD_COST *rd_cost,
@@ -53,16 +53,16 @@
                                int64_t best_rd_so_far);
 
 void av1_rd_pick_inter_mode_sb_seg_skip(
-    struct AV1_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x,
-    struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-    int64_t best_rd_so_far);
+    const struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
+    struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize,
+    PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far);
 
-int av1_internal_image_edge(struct AV1_COMP *cpi);
-int av1_active_h_edge(struct AV1_COMP *cpi, int mi_row, int mi_step);
-int av1_active_v_edge(struct AV1_COMP *cpi, int mi_col, int mi_step);
-int av1_active_edge_sb(struct AV1_COMP *cpi, int mi_row, int mi_col);
+int av1_internal_image_edge(const struct AV1_COMP *cpi);
+int av1_active_h_edge(const struct AV1_COMP *cpi, int mi_row, int mi_step);
+int av1_active_v_edge(const struct AV1_COMP *cpi, int mi_col, int mi_step);
+int av1_active_edge_sb(const struct AV1_COMP *cpi, int mi_row, int mi_col);
 
-void av1_rd_pick_inter_mode_sub8x8(struct AV1_COMP *cpi,
+void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi,
                                    struct TileDataEnc *tile_data,
                                    struct macroblock *x, int mi_row, int mi_col,
                                    struct RD_COST *rd_cost,
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index dafb6ff..2285e46 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -336,7 +336,7 @@
         ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND);
 
     for (mb_col = 0; mb_col < mb_cols; mb_col++) {
-      int i, j, k;
+      int j, k;
       int stride;
 
       memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 173c934..e95e52b 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -344,7 +344,7 @@
 #endif  // !CONFIG_ANS
 
 struct tokenize_b_args {
-  AV1_COMP *cpi;
+  const AV1_COMP *cpi;
   ThreadData *td;
   TOKENEXTRA **tp;
   int this_rate;
@@ -362,11 +362,11 @@
   const PLANE_TYPE type = pd->plane_type;
   const int ref = is_inter_block(mbmi);
   const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
-  const scan_order *const so = get_scan(tx_size, tx_type, ref);
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type, ref);
   int pt = get_entropy_context(tx_size, pd->above_context + blk_col,
                                pd->left_context + blk_row);
-  int rate =
-      av1_cost_coeffs(x, plane, block, pt, tx_size, so->scan, so->neighbors, 0);
+  int rate = av1_cost_coeffs(x, plane, block, pt, tx_size, scan_order->scan,
+                             scan_order->neighbors, 0);
   args->this_rate += rate;
   av1_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, blk_col,
                    blk_row);
@@ -409,7 +409,7 @@
 }
 
 #if CONFIG_PALETTE
-void av1_tokenize_palette_sb(AV1_COMP *cpi, struct ThreadData *const td,
+void av1_tokenize_palette_sb(const AV1_COMP *cpi, struct ThreadData *const td,
                              int plane, TOKENEXTRA **t, RUN_TYPE dry_run,
                              BLOCK_SIZE bsize, int *rate) {
   MACROBLOCK *const x = &td->mb;
@@ -454,7 +454,7 @@
 static void tokenize_b(int plane, int block, int blk_row, int blk_col,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
   struct tokenize_b_args *const args = arg;
-  AV1_COMP *cpi = args->cpi;
+  const AV1_COMP *cpi = args->cpi;
   ThreadData *const td = args->td;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -476,12 +476,13 @@
 #endif  // CONFIG_SUEPRTX
   const int16_t *scan, *nb;
   const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
-  const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
+  const SCAN_ORDER *const scan_order =
+      get_scan(tx_size, tx_type, is_inter_block(mbmi));
   const int ref = is_inter_block(mbmi);
   unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
       td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
 #if CONFIG_ENTROPY
-  aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+  const aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
       cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
                                         [txsize_sqr_map[tx_size]][type][ref];
 #else
@@ -501,8 +502,8 @@
   EXTRABIT extra;
   pt = get_entropy_context(tx_size, pd->above_context + blk_col,
                            pd->left_context + blk_row);
-  scan = so->scan;
-  nb = so->neighbors;
+  scan = scan_order->scan;
+  nb = scan_order->neighbors;
   c = 0;
 
   while (c < eob) {
@@ -612,8 +613,7 @@
             : mbmi->inter_tx_size[tx_row][tx_col];
 
   if (tx_size == plane_tx_size) {
-    const struct macroblockd_plane *const pd = &xd->plane[plane];
-    BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+    plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
     if (!dry_run)
       tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
     else if (dry_run == DRY_RUN_NORMAL)
@@ -641,10 +641,10 @@
   }
 }
 
-void av1_tokenize_sb_vartx(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                            RUN_TYPE dry_run, int mi_row, int mi_col,
                            BLOCK_SIZE bsize, int *rate) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -696,9 +696,9 @@
 }
 #endif  // CONFIG_VAR_TX
 
-void av1_tokenize_sb(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                      RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) {
-  AV1_COMMON *const cm = &cpi->common;
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -732,9 +732,10 @@
 }
 
 #if CONFIG_SUPERTX
-void av1_tokenize_sb_supertx(AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
-                             RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate) {
-  AV1_COMMON *const cm = &cpi->common;
+void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td,
+                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+                             int *rate) {
+  const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &td->mb.e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   TOKENEXTRA *t_backup = *t;
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 677d041..e869a19 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -66,20 +66,21 @@
 // with the coefficient token cost only if dry_run = DRY_RUN_COSTCOEFS,
 // otherwise rate is not incremented.
 #if CONFIG_VAR_TX
-void av1_tokenize_sb_vartx(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td,
                            TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
                            int mi_col, BLOCK_SIZE bsize, int *rate);
 #endif
 #if CONFIG_PALETTE
-void av1_tokenize_palette_sb(struct AV1_COMP *cpi, struct ThreadData *const td,
-                             int plane, TOKENEXTRA **t, RUN_TYPE dry_run,
-                             BLOCK_SIZE bsize, int *rate);
+void av1_tokenize_palette_sb(const struct AV1_COMP *cpi,
+                             struct ThreadData *const td, int plane,
+                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+                             int *rate);
 #endif  // CONFIG_PALETTE
-void av1_tokenize_sb(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
                      TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                      int *rate);
 #if CONFIG_SUPERTX
-void av1_tokenize_sb_supertx(struct AV1_COMP *cpi, struct ThreadData *td,
+void av1_tokenize_sb_supertx(const struct AV1_COMP *cpi, struct ThreadData *td,
                              TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                              int *rate);
 #endif
diff --git a/av1/encoder/variance_tree.h b/av1/encoder/variance_tree.h
index 728d7f4..6397084 100644
--- a/av1/encoder/variance_tree.h
+++ b/av1/encoder/variance_tree.h
@@ -31,12 +31,12 @@
   int64_t sum_error;
   int log2_count;
   int variance;
-} var;
+} VAR;
 
 typedef struct {
-  var none;
-  var horz[2];
-  var vert[2];
+  VAR none;
+  VAR horz[2];
+  VAR vert[2];
 } partition_variance;
 
 typedef struct VAR_TREE {
@@ -59,7 +59,7 @@
 void av1_free_var_tree(struct ThreadData *td);
 
 // Set variance values given sum square error, sum error, count.
-static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) {
+static INLINE void fill_variance(int64_t s2, int64_t s, int c, VAR *v) {
   v->sum_square_error = s2;
   v->sum_error = s;
   v->log2_count = c;
@@ -69,7 +69,7 @@
             v->log2_count);
 }
 
-static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
+static INLINE void sum_2_variances(const VAR *a, const VAR *b, VAR *r) {
   assert(a->log2_count == b->log2_count);
   fill_variance(a->sum_square_error + b->sum_square_error,
                 a->sum_error + b->sum_error, a->log2_count + 1, r);
diff --git a/configure b/configure
index f7fb2ca..611756f 100755
--- a/configure
+++ b/configure
@@ -610,12 +610,15 @@
         check_add_cflags -Wfloat-conversion
         check_add_cflags -Wpointer-arith
         check_add_cflags -Wtype-limits
-        check_add_cflags -Wcast-qual
         check_add_cflags -Wvla
         check_add_cflags -Wimplicit-function-declaration
         check_add_cflags -Wuninitialized
         check_add_cflags -Wunused-variable
         check_add_cflags -Wsign-compare
+        # Enabling the following warning for C++ generates some useless warnings
+        # about some function parameters shadowing class member function names.
+        # So, only enable this warning for C code.
+        check_cflags "-Wshadow" && add_cflags_only "-Wshadow"
         case ${CC} in
           *clang*) ;;
           *) check_add_cflags -Wunused-but-set-variable ;;
diff --git a/test/av1_fwd_txfm1d_test.cc b/test/av1_fwd_txfm1d_test.cc
index 03bed19..f671097 100644
--- a/test/av1_fwd_txfm1d_test.cc
+++ b/test/av1_fwd_txfm1d_test.cc
@@ -23,10 +23,15 @@
 const TYPE_TXFM txfm_type_ls[2] = { TYPE_DCT, TYPE_ADST };
 
 const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
 
 const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+    av1_fdct64_new },
+#else
   { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
   { av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
 };
 
diff --git a/test/av1_inv_txfm1d_test.cc b/test/av1_inv_txfm1d_test.cc
index 110d4c3..8470fc0 100644
--- a/test/av1_inv_txfm1d_test.cc
+++ b/test/av1_inv_txfm1d_test.cc
@@ -18,15 +18,25 @@
 namespace {
 const int txfm_type_num = 2;
 const int txfm_size_num = 5;
-const int txfm_size_ls[5] = { 4, 8, 16, 32 };
+const int txfm_size_ls[5] = { 4, 8, 16, 32, 64 };
 
 const TxfmFunc fwd_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+  { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new,
+    av1_fdct64_new },
+#else
   { av1_fdct4_new, av1_fdct8_new, av1_fdct16_new, av1_fdct32_new, NULL },
+#endif
   { av1_fadst4_new, av1_fadst8_new, av1_fadst16_new, av1_fadst32_new, NULL }
 };
 
 const TxfmFunc inv_txfm_func_ls[2][5] = {
+#if CONFIG_TX64X64
+  { av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new,
+    av1_idct64_new },
+#else
   { av1_idct4_new, av1_idct8_new, av1_idct16_new, av1_idct32_new, NULL },
+#endif
   { av1_iadst4_new, av1_iadst8_new, av1_iadst16_new, av1_iadst32_new, NULL }
 };
 
diff --git a/test/av1_inv_txfm_test.cc b/test/av1_inv_txfm_test.cc
index 83a7680..f637d51 100644
--- a/test/av1_inv_txfm_test.cc
+++ b/test/av1_inv_txfm_test.cc
@@ -138,7 +138,6 @@
 };
 
 TEST_P(AV1PartialIDctTest, RunQuantCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
   int size;
   switch (tx_size_) {
     case TX_4X4: size = 4; break;
@@ -159,7 +158,7 @@
   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
 
   int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
+  for (int m = 0; m < count_test_block; ++m) {
     // clear out destination buffer
     memset(dst1, 0, sizeof(*dst1) * block_size);
     memset(dst2, 0, sizeof(*dst2) * block_size);
@@ -168,11 +167,11 @@
 
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
-    for (int i = 0; i < count_test_block; ++i) {
+    for (int n = 0; n < count_test_block; ++n) {
       // Initialize a test block with input range [-255, 255].
-      if (i == 0) {
+      if (n == 0) {
         for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
-      } else if (i == 1) {
+      } else if (n == 1) {
         for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
       } else {
         for (int j = 0; j < block_size; ++j) {
diff --git a/test/av1_quantize_test.cc b/test/av1_quantize_test.cc
index db1c969..4e1aabd 100644
--- a/test/av1_quantize_test.cc
+++ b/test/av1_quantize_test.cc
@@ -70,7 +70,7 @@
     QuantizeFpFunc quanFunc = params_.qFunc;
     QuantizeFpFunc quanFuncRef = params_.qFuncRef;
 
-    const scan_order scanOrder = av1_default_scan_orders[txSize];
+    const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
     for (int i = 0; i < numTests; i++) {
       int err_count = 0;
       ref_eob = eob = -1;
@@ -137,7 +137,7 @@
     int log_scale = (txSize == TX_32X32);
     QuantizeFpFunc quanFunc = params_.qFunc;
     QuantizeFpFunc quanFuncRef = params_.qFuncRef;
-    const scan_order scanOrder = av1_default_scan_orders[txSize];
+    const SCAN_ORDER scanOrder = av1_default_scan_orders[txSize];
 
     for (int i = 0; i < numTests; i++) {
       ref_eob = eob = -1;
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index 1000f58..c0e6d48 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc
@@ -68,11 +68,13 @@
 
         aom_stop_encode(&bw);
 
+#if !CONFIG_DAALA_EC
         // First bit should be zero
         GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
+#endif
 
         aom_reader br;
-        aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+        aom_reader_init(&br, bw_buffer, bw.pos, NULL, NULL);
         bit_rnd.Reset(random_seed);
         for (int i = 0; i < kBitsToTest; ++i) {
           if (bit_method == 2) {
@@ -110,15 +112,15 @@
     aom_stop_encode(&bw);
     aom_reader br;
     aom_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
-    ptrdiff_t last_tell = aom_reader_tell(&br);
-    ptrdiff_t last_tell_frac = aom_reader_tell_frac(&br);
+    uint32_t last_tell = aom_reader_tell(&br);
+    uint32_t last_tell_frac = aom_reader_tell_frac(&br);
     double frac_diff_total = 0;
-    GTEST_ASSERT_GE(aom_reader_tell(&br), 0);
-    GTEST_ASSERT_LE(aom_reader_tell(&br), 1);
+    GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
+    GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
     for (int i = 0; i < kSymbols; i++) {
       aom_read(&br, p, NULL);
-      ptrdiff_t tell = aom_reader_tell(&br);
-      ptrdiff_t tell_frac = aom_reader_tell_frac(&br);
+      uint32_t tell = aom_reader_tell(&br);
+      uint32_t tell_frac = aom_reader_tell_frac(&br);
       GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell
                                        << ", last_tell: " << last_tell;
       GTEST_ASSERT_GE(tell_frac, last_tell_frac)
@@ -131,7 +133,7 @@
           fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
       last_tell_frac = tell_frac;
     }
-    const int expected = (int)(-kSymbols * log2(probability));
+    const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
     // Last tell should be close to the expected value.
     GTEST_ASSERT_LE(last_tell - expected, 20) << " last_tell: " << last_tell;
     // The average frac_diff error should be pretty small.
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index de1ae04..837b282 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -239,23 +239,24 @@
 
   // Vertical pass (transposed intermediate -> dst).
   {
-    uint16_t *src_ptr = intermediate_buffer;
+    const uint16_t *interm_ptr = intermediate_buffer;
     const int dst_next_row_stride = dst_stride - output_width;
     unsigned int i, j;
     for (i = 0; i < output_height; ++i) {
       for (j = 0; j < output_width; ++j) {
         // Apply filter...
-        const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
-                         (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
-                         (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
-                         (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
-                         (AV1_FILTER_WEIGHT >> 1);  // Rounding
+        const int temp =
+            (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
+            (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
+            (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
+            (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
+            (AV1_FILTER_WEIGHT >> 1);  // Rounding
 
         // Normalize back to 0-255...
         *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
-        src_ptr += intermediate_height;
+        interm_ptr += intermediate_height;
       }
-      src_ptr += intermediate_next_stride;
+      interm_ptr += intermediate_next_stride;
       dst_ptr += dst_next_row_stride;
     }
   }
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index 5d814f4..318df19 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -160,7 +160,7 @@
         loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
-    for (int j = 0; j < kNumCoeffs; ++j) {
+    for (j = 0; j < kNumCoeffs; ++j) {
       err_count += ref_s[j] != s[j];
     }
     if (err_count && !err_count_total) {
@@ -324,7 +324,7 @@
     ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0,
                                             thresh0, blimit1, limit1, thresh1));
 #endif  // CONFIG_AOM_HIGHBITDEPTH
-    for (int j = 0; j < kNumCoeffs; ++j) {
+    for (j = 0; j < kNumCoeffs; ++j) {
       err_count += ref_s[j] != s[j];
     }
     if (err_count && !err_count_total) {
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index c18f48b..7eedfaf 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -55,7 +55,6 @@
 };
 
 TEST_P(PartialIDctTest, RunQuantCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
   int size;
   switch (tx_size_) {
     case TX_4X4: size = 4; break;
@@ -76,7 +75,7 @@
   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
 
   int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
+  for (int m = 0; m < count_test_block; ++m) {
     // clear out destination buffer
     memset(dst1, 0, sizeof(*dst1) * block_size);
     memset(dst2, 0, sizeof(*dst2) * block_size);
@@ -85,11 +84,11 @@
 
     ACMRandom rnd(ACMRandom::DeterministicSeed());
 
-    for (int i = 0; i < count_test_block; ++i) {
+    for (int n = 0; n < count_test_block; ++n) {
       // Initialize a test block with input range [-255, 255].
-      if (i == 0) {
+      if (n == 0) {
         for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
-      } else if (i == 1) {
+      } else if (n == 1) {
         for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
       } else {
         for (int j = 0; j < block_size; ++j) {