Merge "msvs_common.sh: make fix_file_list generic"
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 12022be..22a2e77 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -69,6 +69,21 @@
 
 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 
+#define ALL_SIZES(convolve_fn) \
+    make_tuple(4, 4, &convolve_fn),     \
+    make_tuple(8, 4, &convolve_fn),     \
+    make_tuple(4, 8, &convolve_fn),     \
+    make_tuple(8, 8, &convolve_fn),     \
+    make_tuple(16, 8, &convolve_fn),    \
+    make_tuple(8, 16, &convolve_fn),    \
+    make_tuple(16, 16, &convolve_fn),   \
+    make_tuple(32, 16, &convolve_fn),   \
+    make_tuple(16, 32, &convolve_fn),   \
+    make_tuple(32, 32, &convolve_fn),   \
+    make_tuple(64, 32, &convolve_fn),   \
+    make_tuple(32, 64, &convolve_fn),   \
+    make_tuple(64, 64, &convolve_fn)
+
 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
 #define VP9_FILTER_WEIGHT 128
 #define VP9_FILTER_SHIFT 7
@@ -1034,20 +1049,6 @@
     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
     wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
-INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_c),
-    make_tuple(8, 4, &convolve8_c),
-    make_tuple(4, 8, &convolve8_c),
-    make_tuple(8, 8, &convolve8_c),
-    make_tuple(16, 8, &convolve8_c),
-    make_tuple(8, 16, &convolve8_c),
-    make_tuple(16, 16, &convolve8_c),
-    make_tuple(32, 16, &convolve8_c),
-    make_tuple(16, 32, &convolve8_c),
-    make_tuple(32, 32, &convolve8_c),
-    make_tuple(64, 32, &convolve8_c),
-    make_tuple(32, 64, &convolve8_c),
-    make_tuple(64, 64, &convolve8_c)));
 const ConvolveFunctions convolve10_c(
     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
@@ -1056,20 +1057,6 @@
     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
     wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
-INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve10_c),
-    make_tuple(8, 4, &convolve10_c),
-    make_tuple(4, 8, &convolve10_c),
-    make_tuple(8, 8, &convolve10_c),
-    make_tuple(16, 8, &convolve10_c),
-    make_tuple(8, 16, &convolve10_c),
-    make_tuple(16, 16, &convolve10_c),
-    make_tuple(32, 16, &convolve10_c),
-    make_tuple(16, 32, &convolve10_c),
-    make_tuple(32, 32, &convolve10_c),
-    make_tuple(64, 32, &convolve10_c),
-    make_tuple(32, 64, &convolve10_c),
-    make_tuple(64, 64, &convolve10_c)));
 const ConvolveFunctions convolve12_c(
     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
@@ -1078,23 +1065,13 @@
     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
     wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
-INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve12_c),
-    make_tuple(8, 4, &convolve12_c),
-    make_tuple(4, 8, &convolve12_c),
-    make_tuple(8, 8, &convolve12_c),
-    make_tuple(16, 8, &convolve12_c),
-    make_tuple(8, 16, &convolve12_c),
-    make_tuple(16, 16, &convolve12_c),
-    make_tuple(32, 16, &convolve12_c),
-    make_tuple(16, 32, &convolve12_c),
-    make_tuple(32, 32, &convolve12_c),
-    make_tuple(64, 32, &convolve12_c),
-    make_tuple(32, 64, &convolve12_c),
-    make_tuple(64, 64, &convolve12_c)));
+const ConvolveParam kArrayConvolve_c[] = {
+    ALL_SIZES(convolve8_c),
+    ALL_SIZES(convolve10_c),
+    ALL_SIZES(convolve12_c)
+};
 
 #else
-
 const ConvolveFunctions convolve8_c(
     vpx_convolve_copy_c, vpx_convolve_avg_c,
     vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c,
@@ -1103,22 +1080,10 @@
     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-
-INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_c),
-    make_tuple(8, 4, &convolve8_c),
-    make_tuple(4, 8, &convolve8_c),
-    make_tuple(8, 8, &convolve8_c),
-    make_tuple(16, 8, &convolve8_c),
-    make_tuple(8, 16, &convolve8_c),
-    make_tuple(16, 16, &convolve8_c),
-    make_tuple(32, 16, &convolve8_c),
-    make_tuple(16, 32, &convolve8_c),
-    make_tuple(32, 32, &convolve8_c),
-    make_tuple(64, 32, &convolve8_c),
-    make_tuple(32, 64, &convolve8_c),
-    make_tuple(64, 64, &convolve8_c)));
+const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
 #endif
+INSTANTIATE_TEST_CASE_P(C, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve_c));
 
 #if HAVE_SSE2 && ARCH_X86_64
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -1158,46 +1123,11 @@
     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_sse2),
-    make_tuple(8, 4, &convolve8_sse2),
-    make_tuple(4, 8, &convolve8_sse2),
-    make_tuple(8, 8, &convolve8_sse2),
-    make_tuple(16, 8, &convolve8_sse2),
-    make_tuple(8, 16, &convolve8_sse2),
-    make_tuple(16, 16, &convolve8_sse2),
-    make_tuple(32, 16, &convolve8_sse2),
-    make_tuple(16, 32, &convolve8_sse2),
-    make_tuple(32, 32, &convolve8_sse2),
-    make_tuple(64, 32, &convolve8_sse2),
-    make_tuple(32, 64, &convolve8_sse2),
-    make_tuple(64, 64, &convolve8_sse2),
-    make_tuple(4, 4, &convolve10_sse2),
-    make_tuple(8, 4, &convolve10_sse2),
-    make_tuple(4, 8, &convolve10_sse2),
-    make_tuple(8, 8, &convolve10_sse2),
-    make_tuple(16, 8, &convolve10_sse2),
-    make_tuple(8, 16, &convolve10_sse2),
-    make_tuple(16, 16, &convolve10_sse2),
-    make_tuple(32, 16, &convolve10_sse2),
-    make_tuple(16, 32, &convolve10_sse2),
-    make_tuple(32, 32, &convolve10_sse2),
-    make_tuple(64, 32, &convolve10_sse2),
-    make_tuple(32, 64, &convolve10_sse2),
-    make_tuple(64, 64, &convolve10_sse2),
-    make_tuple(4, 4, &convolve12_sse2),
-    make_tuple(8, 4, &convolve12_sse2),
-    make_tuple(4, 8, &convolve12_sse2),
-    make_tuple(8, 8, &convolve12_sse2),
-    make_tuple(16, 8, &convolve12_sse2),
-    make_tuple(8, 16, &convolve12_sse2),
-    make_tuple(16, 16, &convolve12_sse2),
-    make_tuple(32, 16, &convolve12_sse2),
-    make_tuple(16, 32, &convolve12_sse2),
-    make_tuple(32, 32, &convolve12_sse2),
-    make_tuple(64, 32, &convolve12_sse2),
-    make_tuple(32, 64, &convolve12_sse2),
-    make_tuple(64, 64, &convolve12_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = {
+    ALL_SIZES(convolve8_sse2),
+    ALL_SIZES(convolve10_sse2),
+    ALL_SIZES(convolve12_sse2)
+};
 #else
 const ConvolveFunctions convolve8_sse2(
 #if CONFIG_USE_X86INC
@@ -1212,21 +1142,10 @@
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_sse2),
-    make_tuple(8, 4, &convolve8_sse2),
-    make_tuple(4, 8, &convolve8_sse2),
-    make_tuple(8, 8, &convolve8_sse2),
-    make_tuple(16, 8, &convolve8_sse2),
-    make_tuple(8, 16, &convolve8_sse2),
-    make_tuple(16, 16, &convolve8_sse2),
-    make_tuple(32, 16, &convolve8_sse2),
-    make_tuple(16, 32, &convolve8_sse2),
-    make_tuple(32, 32, &convolve8_sse2),
-    make_tuple(64, 32, &convolve8_sse2),
-    make_tuple(32, 64, &convolve8_sse2),
-    make_tuple(64, 64, &convolve8_sse2)));
+const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve_sse2));
 #endif
 
 #if HAVE_SSSE3
@@ -1237,22 +1156,11 @@
     vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c,
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
-    vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
+    vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
 
-INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_ssse3),
-    make_tuple(8, 4, &convolve8_ssse3),
-    make_tuple(4, 8, &convolve8_ssse3),
-    make_tuple(8, 8, &convolve8_ssse3),
-    make_tuple(16, 8, &convolve8_ssse3),
-    make_tuple(8, 16, &convolve8_ssse3),
-    make_tuple(16, 16, &convolve8_ssse3),
-    make_tuple(32, 16, &convolve8_ssse3),
-    make_tuple(16, 32, &convolve8_ssse3),
-    make_tuple(32, 32, &convolve8_ssse3),
-    make_tuple(64, 32, &convolve8_ssse3),
-    make_tuple(32, 64, &convolve8_ssse3),
-    make_tuple(64, 64, &convolve8_ssse3)));
+const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
+INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_ssse3));
 #endif
 
 #if HAVE_AVX2 && HAVE_SSSE3
@@ -1265,20 +1173,9 @@
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 
-INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_avx2),
-    make_tuple(8, 4, &convolve8_avx2),
-    make_tuple(4, 8, &convolve8_avx2),
-    make_tuple(8, 8, &convolve8_avx2),
-    make_tuple(8, 16, &convolve8_avx2),
-    make_tuple(16, 8, &convolve8_avx2),
-    make_tuple(16, 16, &convolve8_avx2),
-    make_tuple(32, 16, &convolve8_avx2),
-    make_tuple(16, 32, &convolve8_avx2),
-    make_tuple(32, 32, &convolve8_avx2),
-    make_tuple(64, 32, &convolve8_avx2),
-    make_tuple(32, 64, &convolve8_avx2),
-    make_tuple(64, 64, &convolve8_avx2)));
+const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
+INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_avx2));
 #endif  // HAVE_AVX2 && HAVE_SSSE3
 
 #if HAVE_NEON
@@ -1302,20 +1199,9 @@
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 #endif  // HAVE_NEON_ASM
 
-INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_neon),
-    make_tuple(8, 4, &convolve8_neon),
-    make_tuple(4, 8, &convolve8_neon),
-    make_tuple(8, 8, &convolve8_neon),
-    make_tuple(16, 8, &convolve8_neon),
-    make_tuple(8, 16, &convolve8_neon),
-    make_tuple(16, 16, &convolve8_neon),
-    make_tuple(32, 16, &convolve8_neon),
-    make_tuple(16, 32, &convolve8_neon),
-    make_tuple(32, 32, &convolve8_neon),
-    make_tuple(64, 32, &convolve8_neon),
-    make_tuple(32, 64, &convolve8_neon),
-    make_tuple(64, 64, &convolve8_neon)));
+const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_neon));
 #endif  // HAVE_NEON
 
 #if HAVE_DSPR2
@@ -1328,21 +1214,10 @@
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 
-INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_dspr2),
-    make_tuple(8, 4, &convolve8_dspr2),
-    make_tuple(4, 8, &convolve8_dspr2),
-    make_tuple(8, 8, &convolve8_dspr2),
-    make_tuple(16, 8, &convolve8_dspr2),
-    make_tuple(8, 16, &convolve8_dspr2),
-    make_tuple(16, 16, &convolve8_dspr2),
-    make_tuple(32, 16, &convolve8_dspr2),
-    make_tuple(16, 32, &convolve8_dspr2),
-    make_tuple(32, 32, &convolve8_dspr2),
-    make_tuple(64, 32, &convolve8_dspr2),
-    make_tuple(32, 64, &convolve8_dspr2),
-    make_tuple(64, 64, &convolve8_dspr2)));
-#endif
+const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
+INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_dspr2));
+#endif  // HAVE_DSPR2
 
 #if HAVE_MSA
 const ConvolveFunctions convolve8_msa(
@@ -1354,19 +1229,8 @@
     vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
 
-INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_msa),
-    make_tuple(8, 4, &convolve8_msa),
-    make_tuple(4, 8, &convolve8_msa),
-    make_tuple(8, 8, &convolve8_msa),
-    make_tuple(16, 8, &convolve8_msa),
-    make_tuple(8, 16, &convolve8_msa),
-    make_tuple(16, 16, &convolve8_msa),
-    make_tuple(32, 16, &convolve8_msa),
-    make_tuple(16, 32, &convolve8_msa),
-    make_tuple(32, 32, &convolve8_msa),
-    make_tuple(64, 32, &convolve8_msa),
-    make_tuple(32, 64, &convolve8_msa),
-    make_tuple(64, 64, &convolve8_msa)));
+const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
+INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
+                        ::testing::ValuesIn(kArrayConvolve8_msa));
 #endif  // HAVE_MSA
 }  // namespace
diff --git a/vp10/encoder/quantize.c b/vp10/encoder/quantize.c
index 86b324f..136efe3 100644
--- a/vp10/encoder/quantize.c
+++ b/vp10/encoder/quantize.c
@@ -219,12 +219,12 @@
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {
   unsigned t;
-  int l;
+  int l, m;
   t = d;
   for (l = 0; t > 1; l++)
     t >>= 1;
-  t = 1 + (1 << (16 + l)) / d;
-  *quant = (int16_t)(t - (1 << 16));
+  m = 1 + (1 << (16 + l)) / d;
+  *quant = (int16_t)(m - (1 << 16));
   *shift = 1 << (16 - l);
 }
 
diff --git a/vp8/encoder/vp8_quantize.c b/vp8/encoder/vp8_quantize.c
index ee922c9..0d101ba 100644
--- a/vp8/encoder/vp8_quantize.c
+++ b/vp8/encoder/vp8_quantize.c
@@ -227,12 +227,12 @@
     if(improved_quant)
     {
         unsigned t;
-        int l;
+        int l, m;
         t = d;
         for(l = 0; t > 1; l++)
             t>>=1;
-        t = 1 + (1<<(16+l))/d;
-        *quant = (short)(t - (1<<16));
+        m = 1 + (1<<(16+l))/d;
+        *quant = (short)(m - (1<<16));
         *shift = l;
         /* use multiplication and constant shift by 16 */
         *shift = 1 << (16 - *shift);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 68537e9..4be043d 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1692,9 +1692,6 @@
   cpi->use_skin_detection = 0;
   cpi->common.buffer_pool = pool;
 
-  cpi->rc.high_source_sad = 0;
-  cpi->rc.count_last_scene_change = 0;
-
   init_config(cpi, oxcf);
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 2e27f94..fd51598 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1845,8 +1845,7 @@
       cpi->denoiser.denoising_level > kDenLowLow &&
       cpi->denoiser.reset == 0) {
     VP9_DENOISER_DECISION decision = COPY_BLOCK;
-    vp9_denoiser_denoise(cpi, x, mi_row, mi_col, VPXMAX(BLOCK_8X8, bsize),
-                         ctx, &decision);
+    vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
     // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on denoised
     // result. Only do this under noise conditions, and if rdcost of ZEROMV on
     // original source is not significantly higher than rdcost of best mode.
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 91f877e..9766c05 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -219,12 +219,12 @@
 
 static void invert_quant(int16_t *quant, int16_t *shift, int d) {
   unsigned t;
-  int l;
+  int l, m;
   t = d;
   for (l = 0; t > 1; l++)
     t >>= 1;
-  t = 1 + (1 << (16 + l)) / d;
-  *quant = (int16_t)(t - (1 << 16));
+  m = 1 + (1 << (16 + l)) / d;
+  *quant = (int16_t)(m - (1 << 16));
   *shift = 1 << (16 - l);
 }
 
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index d53e60a..0675d4a 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -339,6 +339,9 @@
   rc->total_target_vs_actual = 0;
   rc->avg_intersize_gfint = 0;
   rc->avg_frame_low_motion = 0;
+  rc->high_source_sad = 0;
+  rc->count_last_scene_change = 0;
+  rc->avg_source_sad = 0;
 
   rc->frames_since_key = 8;  // Sensible default for first frame.
   rc->this_key_frame_forced = 0;
diff --git a/vpx_dsp/bitreader.c b/vpx_dsp/bitreader.c
index 6ad806a..8140e78 100644
--- a/vpx_dsp/bitreader.c
+++ b/vpx_dsp/bitreader.c
@@ -69,7 +69,7 @@
       buffer += (bits >> 3);
       value = r->value | (nv << (shift & 0x7));
   } else {
-    const int bits_over = (int)(shift + CHAR_BIT - bits_left);
+    const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left);
     int loop_end = 0;
     if (bits_over >= 0) {
       count += LOTS_OF_BITS;
diff --git a/vpx_dsp/x86/variance_sse2.c b/vpx_dsp/x86/variance_sse2.c
index e6c9365..43f4603 100644
--- a/vpx_dsp/x86/variance_sse2.c
+++ b/vpx_dsp/x86/variance_sse2.c
@@ -171,7 +171,7 @@
                                   unsigned int *sse) {
   int sum;
   get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 4);
+  return *sse - ((sum * sum) >> 4);
 }
 
 unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
@@ -180,7 +180,7 @@
   int sum;
   variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
                 sse, &sum, get4x4var_sse2, 4);
-  return *sse - (((unsigned int)sum * sum) >> 5);
+  return *sse - ((sum * sum) >> 5);
 }
 
 unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
@@ -189,7 +189,7 @@
   int sum;
   variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
                 sse, &sum, get4x4var_sse2, 4);
-  return *sse - (((unsigned int)sum * sum) >> 5);
+  return *sse - ((sum * sum) >> 5);
 }
 
 unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
@@ -197,7 +197,7 @@
                                   unsigned int *sse) {
   int sum;
   vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 6);
+  return *sse - ((sum * sum) >> 6);
 }
 
 unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
@@ -206,7 +206,7 @@
   int sum;
   variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
                 sse, &sum, vpx_get8x8var_sse2, 8);
-  return *sse - (((unsigned int)sum * sum) >> 7);
+  return *sse - ((sum * sum) >> 7);
 }
 
 unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
@@ -215,7 +215,7 @@
   int sum;
   variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
                 sse, &sum, vpx_get8x8var_sse2, 8);
-  return *sse - (((unsigned int)sum * sum) >> 7);
+  return *sse - ((sum * sum) >> 7);
 }
 
 unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
@@ -223,7 +223,7 @@
                                     unsigned int *sse) {
   int sum;
   vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
-  return *sse - (((unsigned int)sum * sum) >> 8);
+  return *sse - (((uint32_t)((int64_t)sum * sum)) >> 8);
 }
 
 unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
@@ -329,7 +329,7 @@
 #undef DECLS
 #undef DECL
 
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
 unsigned int vpx_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
                                                      int src_stride, \
                                                      int x_offset, \
@@ -365,23 +365,23 @@
     } \
   } \
   *sse_ptr = sse; \
-  return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+  return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
 }
 
 #define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16,  8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8,  16,  8, 3, 4, opt1, (uint32_t)); \
-FN(8,   8,  8, 3, 3, opt1, (uint32_t)); \
-FN(8,   4,  8, 3, 2, opt1, (uint32_t)); \
-FN(4,   8,  4, 2, 3, opt2, (uint32_t)); \
-FN(4,   4,  4, 2, 2, opt2, (uint32_t))
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16,  8, 16, 4, 3, opt1, (int32_t), (int32_t)); \
+FN(8,  16,  8, 3, 4, opt1, (int32_t), (int32_t)); \
+FN(8,   8,  8, 3, 3, opt1, (int32_t), (int32_t)); \
+FN(8,   4,  8, 3, 2, opt1, (int32_t), (int32_t)); \
+FN(4,   8,  4, 2, 3, opt2, (int32_t), (int32_t)); \
+FN(4,   4,  4, 2, 2, opt2, (int32_t), (int32_t))
 
 FNS(sse2, sse);
 FNS(ssse3, ssse3);
@@ -410,7 +410,7 @@
 #undef DECL
 #undef DECLS
 
-#define FN(w, h, wf, wlog2, hlog2, opt, cast) \
+#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
 unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
                                                          int src_stride, \
                                                          int x_offset, \
@@ -451,23 +451,23 @@
     } \
   } \
   *sseptr = sse; \
-  return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+  return sse - (cast_prod (cast se * se) >> (wlog2 + hlog2)); \
 }
 
 #define FNS(opt1, opt2) \
-FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
-FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
-FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
-FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
-FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
-FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
-FN(16, 16, 16, 4, 4, opt1, (uint32_t)); \
-FN(16,  8, 16, 4, 3, opt1, (uint32_t)); \
-FN(8,  16,  8, 3, 4, opt1, (uint32_t)); \
-FN(8,   8,  8, 3, 3, opt1, (uint32_t)); \
-FN(8,   4,  8, 3, 2, opt1, (uint32_t)); \
-FN(4,   8,  4, 2, 3, opt2, (uint32_t)); \
-FN(4,   4,  4, 2, 2, opt2, (uint32_t))
+FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \
+FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \
+FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \
+FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \
+FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \
+FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \
+FN(16,  8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \
+FN(8,  16,  8, 3, 4, opt1, (uint32_t), (int32_t)); \
+FN(8,   8,  8, 3, 3, opt1, (uint32_t), (int32_t)); \
+FN(8,   4,  8, 3, 2, opt1, (uint32_t), (int32_t)); \
+FN(4,   8,  4, 2, 3, opt2, (uint32_t), (int32_t)); \
+FN(4,   4,  4, 2, 2, opt2, (uint32_t), (int32_t))
 
 FNS(sse2, sse);
 FNS(ssse3, ssse3);