Correct function signature for Visual Studio

- There would be VS build warning on unaligned formal parameter.

Change-Id: I6e122c4fec2505ef3458e4bdf218d3cd30bb494f
diff --git a/aom_dsp/x86/inv_txfm_common_avx2.h b/aom_dsp/x86/inv_txfm_common_avx2.h
index 00236b5..4238e65 100644
--- a/aom_dsp/x86/inv_txfm_common_avx2.h
+++ b/aom_dsp/x86/inv_txfm_common_avx2.h
@@ -71,8 +71,8 @@
   __m256i x0, x1;
   x0 = _mm256_unpacklo_epi16(*a0, *a1);
   x1 = _mm256_unpackhi_epi16(*a0, *a1);
-  *b0 = butter_fly(x0, x1, *c0);
-  *b1 = butter_fly(x0, x1, *c1);
+  *b0 = butter_fly(&x0, &x1, c0);
+  *b1 = butter_fly(&x0, &x1, c1);
 }
 
 void av1_idct16_avx2(__m256i *in);
diff --git a/aom_dsp/x86/txfm_common_avx2.h b/aom_dsp/x86/txfm_common_avx2.h
index 9f09989..4f7a60c 100644
--- a/aom_dsp/x86/txfm_common_avx2.h
+++ b/aom_dsp/x86/txfm_common_avx2.h
@@ -163,10 +163,11 @@
   out[15] = _mm256_permute2x128_si256(tr0_7, tr0_f, 0x31);
 }
 
-static INLINE __m256i butter_fly(__m256i a0, __m256i a1, const __m256i cospi) {
+static INLINE __m256i butter_fly(const __m256i *a0, const __m256i *a1,
+                                 const __m256i *cospi) {
   const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING);
-  __m256i y0 = _mm256_madd_epi16(a0, cospi);
-  __m256i y1 = _mm256_madd_epi16(a1, cospi);
+  __m256i y0 = _mm256_madd_epi16(*a0, *cospi);
+  __m256i y1 = _mm256_madd_epi16(*a1, *cospi);
 
   y0 = _mm256_add_epi32(y0, dct_rounding);
   y1 = _mm256_add_epi32(y1, dct_rounding);
diff --git a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
index acb20af..8495ad1 100644
--- a/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
+++ b/av1/encoder/x86/hybrid_fwd_txfm_avx2.c
@@ -269,8 +269,8 @@
   x0 = _mm256_unpacklo_epi16(v0, v1);
   x1 = _mm256_unpackhi_epi16(v0, v1);
 
-  t0 = butter_fly(x0, x1, cospi_p16_p16);
-  t1 = butter_fly(x0, x1, cospi_p16_m16);
+  t0 = butter_fly(&x0, &x1, &cospi_p16_p16);
+  t1 = butter_fly(&x0, &x1, &cospi_p16_m16);
 
   // 4, 12
   v0 = _mm256_sub_epi16(s1, s2);
@@ -279,8 +279,8 @@
   x0 = _mm256_unpacklo_epi16(v0, v1);
   x1 = _mm256_unpackhi_epi16(v0, v1);
 
-  t2 = butter_fly(x0, x1, cospi_p24_p08);
-  t3 = butter_fly(x0, x1, cospi_m08_p24);
+  t2 = butter_fly(&x0, &x1, &cospi_p24_p08);
+  t3 = butter_fly(&x0, &x1, &cospi_m08_p24);
 
   // 2, 6, 10, 14
   s0 = _mm256_sub_epi16(u3, u4);
@@ -294,8 +294,8 @@
   x0 = _mm256_unpacklo_epi16(s2, s1);
   x1 = _mm256_unpackhi_epi16(s2, s1);
 
-  v2 = butter_fly(x0, x1, cospi_p16_p16);  // output[5]
-  v1 = butter_fly(x0, x1, cospi_p16_m16);  // output[6]
+  v2 = butter_fly(&x0, &x1, &cospi_p16_p16);  // output[5]
+  v1 = butter_fly(&x0, &x1, &cospi_p16_m16);  // output[6]
 
   s0 = _mm256_add_epi16(v0, v1);  // step[4]
   s1 = _mm256_sub_epi16(v0, v1);  // step[5]
@@ -306,14 +306,14 @@
   x0 = _mm256_unpacklo_epi16(s0, s3);
   x1 = _mm256_unpackhi_epi16(s0, s3);
 
-  t4 = butter_fly(x0, x1, cospi_p28_p04);
-  t5 = butter_fly(x0, x1, cospi_m04_p28);
+  t4 = butter_fly(&x0, &x1, &cospi_p28_p04);
+  t5 = butter_fly(&x0, &x1, &cospi_m04_p28);
 
   // 10, 6
   x0 = _mm256_unpacklo_epi16(s1, s2);
   x1 = _mm256_unpackhi_epi16(s1, s2);
-  t6 = butter_fly(x0, x1, cospi_p12_p20);
-  t7 = butter_fly(x0, x1, cospi_m20_p12);
+  t6 = butter_fly(&x0, &x1, &cospi_p12_p20);
+  t7 = butter_fly(&x0, &x1, &cospi_m20_p12);
 
   // 1, 3, 5, 7, 9, 11, 13, 15
   s0 = _mm256_sub_epi16(in[7], in[8]);  // step[8]
@@ -337,14 +337,14 @@
   x0 = _mm256_unpacklo_epi16(u5, u2);
   x1 = _mm256_unpackhi_epi16(u5, u2);
 
-  s2 = butter_fly(x0, x1, cospi_p16_p16);  // step[13]
-  s5 = butter_fly(x0, x1, cospi_p16_m16);  // step[10]
+  s2 = butter_fly(&x0, &x1, &cospi_p16_p16);  // step[13]
+  s5 = butter_fly(&x0, &x1, &cospi_p16_m16);  // step[10]
 
   x0 = _mm256_unpacklo_epi16(u4, u3);
   x1 = _mm256_unpackhi_epi16(u4, u3);
 
-  s3 = butter_fly(x0, x1, cospi_p16_p16);  // step[12]
-  s4 = butter_fly(x0, x1, cospi_p16_m16);  // step[11]
+  s3 = butter_fly(&x0, &x1, &cospi_p16_p16);  // step[12]
+  s4 = butter_fly(&x0, &x1, &cospi_p16_m16);  // step[11]
 
   u0 = _mm256_add_epi16(s0, s4);  // output[8]
   u1 = _mm256_add_epi16(s1, s5);
@@ -364,14 +364,14 @@
   x0 = _mm256_unpacklo_epi16(u1, u6);
   x1 = _mm256_unpackhi_epi16(u1, u6);
 
-  s1 = butter_fly(x0, x1, cospi_m08_p24);
-  s6 = butter_fly(x0, x1, cospi_p24_p08);
+  s1 = butter_fly(&x0, &x1, &cospi_m08_p24);
+  s6 = butter_fly(&x0, &x1, &cospi_p24_p08);
 
   x0 = _mm256_unpacklo_epi16(u2, u5);
   x1 = _mm256_unpackhi_epi16(u2, u5);
 
-  s2 = butter_fly(x0, x1, cospi_m24_m08);
-  s5 = butter_fly(x0, x1, cospi_m08_p24);
+  s2 = butter_fly(&x0, &x1, &cospi_m24_m08);
+  s5 = butter_fly(&x0, &x1, &cospi_m08_p24);
 
   // stage 5
   u0 = _mm256_add_epi16(s0, s1);
@@ -386,23 +386,23 @@
   // stage 6
   x0 = _mm256_unpacklo_epi16(u0, u7);
   x1 = _mm256_unpackhi_epi16(u0, u7);
-  in[1] = butter_fly(x0, x1, cospi_p30_p02);
-  in[15] = butter_fly(x0, x1, cospi_m02_p30);
+  in[1] = butter_fly(&x0, &x1, &cospi_p30_p02);
+  in[15] = butter_fly(&x0, &x1, &cospi_m02_p30);
 
   x0 = _mm256_unpacklo_epi16(u1, u6);
   x1 = _mm256_unpackhi_epi16(u1, u6);
-  in[9] = butter_fly(x0, x1, cospi_p14_p18);
-  in[7] = butter_fly(x0, x1, cospi_m18_p14);
+  in[9] = butter_fly(&x0, &x1, &cospi_p14_p18);
+  in[7] = butter_fly(&x0, &x1, &cospi_m18_p14);
 
   x0 = _mm256_unpacklo_epi16(u2, u5);
   x1 = _mm256_unpackhi_epi16(u2, u5);
-  in[5] = butter_fly(x0, x1, cospi_p22_p10);
-  in[11] = butter_fly(x0, x1, cospi_m10_p22);
+  in[5] = butter_fly(&x0, &x1, &cospi_p22_p10);
+  in[11] = butter_fly(&x0, &x1, &cospi_m10_p22);
 
   x0 = _mm256_unpacklo_epi16(u3, u4);
   x1 = _mm256_unpackhi_epi16(u3, u4);
-  in[13] = butter_fly(x0, x1, cospi_p06_p26);
-  in[3] = butter_fly(x0, x1, cospi_m26_p06);
+  in[13] = butter_fly(&x0, &x1, &cospi_p06_p26);
+  in[3] = butter_fly(&x0, &x1, &cospi_m26_p06);
 }
 
 void fadst16_avx2(__m256i *in) {
@@ -1249,23 +1249,23 @@
 
   u0 = _mm256_unpacklo_epi16(in[4], in[11]);
   u1 = _mm256_unpackhi_epi16(in[4], in[11]);
-  y4 = butter_fly(u0, u1, cospi_m16_p16);
-  y11 = butter_fly(u0, u1, cospi_p16_p16);
+  y4 = butter_fly(&u0, &u1, &cospi_m16_p16);
+  y11 = butter_fly(&u0, &u1, &cospi_p16_p16);
 
   u0 = _mm256_unpacklo_epi16(in[5], in[10]);
   u1 = _mm256_unpackhi_epi16(in[5], in[10]);
-  y5 = butter_fly(u0, u1, cospi_m16_p16);
-  y10 = butter_fly(u0, u1, cospi_p16_p16);
+  y5 = butter_fly(&u0, &u1, &cospi_m16_p16);
+  y10 = butter_fly(&u0, &u1, &cospi_p16_p16);
 
   u0 = _mm256_unpacklo_epi16(in[6], in[9]);
   u1 = _mm256_unpackhi_epi16(in[6], in[9]);
-  y6 = butter_fly(u0, u1, cospi_m16_p16);
-  y9 = butter_fly(u0, u1, cospi_p16_p16);
+  y6 = butter_fly(&u0, &u1, &cospi_m16_p16);
+  y9 = butter_fly(&u0, &u1, &cospi_p16_p16);
 
   u0 = _mm256_unpacklo_epi16(in[7], in[8]);
   u1 = _mm256_unpackhi_epi16(in[7], in[8]);
-  y7 = butter_fly(u0, u1, cospi_m16_p16);
-  y8 = butter_fly(u0, u1, cospi_p16_p16);
+  y7 = butter_fly(&u0, &u1, &cospi_m16_p16);
+  y8 = butter_fly(&u0, &u1, &cospi_p16_p16);
 
   y12 = in[12];
   y13 = in[13];
@@ -1302,23 +1302,23 @@
 
   u0 = _mm256_unpacklo_epi16(x2, x13);
   u1 = _mm256_unpackhi_epi16(x2, x13);
-  y2 = butter_fly(u0, u1, cospi_m08_p24);
-  y13 = butter_fly(u0, u1, cospi_p24_p08);
+  y2 = butter_fly(&u0, &u1, &cospi_m08_p24);
+  y13 = butter_fly(&u0, &u1, &cospi_p24_p08);
 
   u0 = _mm256_unpacklo_epi16(x3, x12);
   u1 = _mm256_unpackhi_epi16(x3, x12);
-  y3 = butter_fly(u0, u1, cospi_m08_p24);
-  y12 = butter_fly(u0, u1, cospi_p24_p08);
+  y3 = butter_fly(&u0, &u1, &cospi_m08_p24);
+  y12 = butter_fly(&u0, &u1, &cospi_p24_p08);
 
   u0 = _mm256_unpacklo_epi16(x4, x11);
   u1 = _mm256_unpackhi_epi16(x4, x11);
-  y4 = butter_fly(u0, u1, cospi_m24_m08);
-  y11 = butter_fly(u0, u1, cospi_m08_p24);
+  y4 = butter_fly(&u0, &u1, &cospi_m24_m08);
+  y11 = butter_fly(&u0, &u1, &cospi_m08_p24);
 
   u0 = _mm256_unpacklo_epi16(x5, x10);
   u1 = _mm256_unpackhi_epi16(x5, x10);
-  y5 = butter_fly(u0, u1, cospi_m24_m08);
-  y10 = butter_fly(u0, u1, cospi_m08_p24);
+  y5 = butter_fly(&u0, &u1, &cospi_m24_m08);
+  y10 = butter_fly(&u0, &u1, &cospi_m08_p24);
 
   // stage 5
   x0 = _mm256_add_epi16(y0, y3);
@@ -1351,23 +1351,23 @@
 
   u0 = _mm256_unpacklo_epi16(x1, x14);
   u1 = _mm256_unpackhi_epi16(x1, x14);
-  y1 = butter_fly(u0, u1, cospi_m04_p28);
-  y14 = butter_fly(u0, u1, cospi_p28_p04);
+  y1 = butter_fly(&u0, &u1, &cospi_m04_p28);
+  y14 = butter_fly(&u0, &u1, &cospi_p28_p04);
 
   u0 = _mm256_unpacklo_epi16(x2, x13);
   u1 = _mm256_unpackhi_epi16(x2, x13);
-  y2 = butter_fly(u0, u1, cospi_m28_m04);
-  y13 = butter_fly(u0, u1, cospi_m04_p28);
+  y2 = butter_fly(&u0, &u1, &cospi_m28_m04);
+  y13 = butter_fly(&u0, &u1, &cospi_m04_p28);
 
   u0 = _mm256_unpacklo_epi16(x5, x10);
   u1 = _mm256_unpackhi_epi16(x5, x10);
-  y5 = butter_fly(u0, u1, cospi_m20_p12);
-  y10 = butter_fly(u0, u1, cospi_p12_p20);
+  y5 = butter_fly(&u0, &u1, &cospi_m20_p12);
+  y10 = butter_fly(&u0, &u1, &cospi_p12_p20);
 
   u0 = _mm256_unpacklo_epi16(x6, x9);
   u1 = _mm256_unpackhi_epi16(x6, x9);
-  y6 = butter_fly(u0, u1, cospi_m12_m20);
-  y9 = butter_fly(u0, u1, cospi_m20_p12);
+  y6 = butter_fly(&u0, &u1, &cospi_m12_m20);
+  y9 = butter_fly(&u0, &u1, &cospi_m20_p12);
 
   // stage 7
   x0 = _mm256_add_epi16(y0, y1);
@@ -1391,43 +1391,43 @@
   // stage 8
   u0 = _mm256_unpacklo_epi16(x0, x15);
   u1 = _mm256_unpackhi_epi16(x0, x15);
-  in[0] = butter_fly(u0, u1, cospi_p31_p01);
-  in[15] = butter_fly(u0, u1, cospi_m01_p31);
+  in[0] = butter_fly(&u0, &u1, &cospi_p31_p01);
+  in[15] = butter_fly(&u0, &u1, &cospi_m01_p31);
 
   u0 = _mm256_unpacklo_epi16(x1, x14);
   u1 = _mm256_unpackhi_epi16(x1, x14);
-  in[1] = butter_fly(u0, u1, cospi_p15_p17);
-  in[14] = butter_fly(u0, u1, cospi_m17_p15);
+  in[1] = butter_fly(&u0, &u1, &cospi_p15_p17);
+  in[14] = butter_fly(&u0, &u1, &cospi_m17_p15);
 
   u0 = _mm256_unpacklo_epi16(x2, x13);
   u1 = _mm256_unpackhi_epi16(x2, x13);
-  in[2] = butter_fly(u0, u1, cospi_p23_p09);
-  in[13] = butter_fly(u0, u1, cospi_m09_p23);
+  in[2] = butter_fly(&u0, &u1, &cospi_p23_p09);
+  in[13] = butter_fly(&u0, &u1, &cospi_m09_p23);
 
   u0 = _mm256_unpacklo_epi16(x3, x12);
   u1 = _mm256_unpackhi_epi16(x3, x12);
-  in[3] = butter_fly(u0, u1, cospi_p07_p25);
-  in[12] = butter_fly(u0, u1, cospi_m25_p07);
+  in[3] = butter_fly(&u0, &u1, &cospi_p07_p25);
+  in[12] = butter_fly(&u0, &u1, &cospi_m25_p07);
 
   u0 = _mm256_unpacklo_epi16(x4, x11);
   u1 = _mm256_unpackhi_epi16(x4, x11);
-  in[4] = butter_fly(u0, u1, cospi_p27_p05);
-  in[11] = butter_fly(u0, u1, cospi_m05_p27);
+  in[4] = butter_fly(&u0, &u1, &cospi_p27_p05);
+  in[11] = butter_fly(&u0, &u1, &cospi_m05_p27);
 
   u0 = _mm256_unpacklo_epi16(x5, x10);
   u1 = _mm256_unpackhi_epi16(x5, x10);
-  in[5] = butter_fly(u0, u1, cospi_p11_p21);
-  in[10] = butter_fly(u0, u1, cospi_m21_p11);
+  in[5] = butter_fly(&u0, &u1, &cospi_p11_p21);
+  in[10] = butter_fly(&u0, &u1, &cospi_m21_p11);
 
   u0 = _mm256_unpacklo_epi16(x6, x9);
   u1 = _mm256_unpackhi_epi16(x6, x9);
-  in[6] = butter_fly(u0, u1, cospi_p19_p13);
-  in[9] = butter_fly(u0, u1, cospi_m13_p19);
+  in[6] = butter_fly(&u0, &u1, &cospi_p19_p13);
+  in[9] = butter_fly(&u0, &u1, &cospi_m13_p19);
 
   u0 = _mm256_unpacklo_epi16(x7, x8);
   u1 = _mm256_unpackhi_epi16(x7, x8);
-  in[7] = butter_fly(u0, u1, cospi_p03_p29);
-  in[8] = butter_fly(u0, u1, cospi_m29_p03);
+  in[7] = butter_fly(&u0, &u1, &cospi_p03_p29);
+  in[8] = butter_fly(&u0, &u1, &cospi_m29_p03);
 }
 
 static void fdct32_avx2(__m256i *in0, __m256i *in1) {