nasm: match instruction length (movd/movq) to parameters
nasm requires the instruction length (movd/movq) to match to its
parameters. I find it more clear to really use 64bit instructions when
we use 64bit registers in the assembly.
Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.
Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 3f0671c..10b5274 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -69,7 +69,7 @@
movq mm2, [rsi + 16] ;ip[8]
movq mm3, [rsi + 24] ;ip[12]
- movd mm7, rax
+ movq mm7, rax
movq mm4, mm0
punpcklwd mm7, mm7 ;0003000300030003h
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 150d090..eb9d1f8 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -288,7 +288,7 @@
psrlq mm0, 16
movzx rcx, word ptr arg(6) ;Dc
psllq mm0, 16
- movd mm7, rcx
+ movq mm7, rcx
por mm0, mm7
movsxd rax, dword ptr arg(4) ;pitch
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 413d74d..c0f06bb 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -50,7 +50,7 @@
psrldq xmm0, 8
paddd xmm0, xmm3
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
@@ -115,7 +115,7 @@
psrlq mm1, 32
paddd mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -192,7 +192,7 @@
psrlq mm2, 32
paddd mm0, mm2
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -260,7 +260,7 @@
psrldq xmm0, 8
paddd xmm0, xmm1
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
@@ -317,7 +317,7 @@
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -374,7 +374,7 @@
psrldq xmm1, 8
paddd xmm1, xmm2
- movd rax, xmm1
+ movq rax, xmm1
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index a867409..51cd940 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -249,7 +249,7 @@
paddd mm0, mm5
; eob adjustment begins here
- movd rcx, mm0
+ movq rcx, mm0
and rcx, 0xffff
xor rdx, rdx
@@ -262,7 +262,7 @@
and rax, rdx
; Substitute the sse assembly for the old mmx mixed assembly/C. The
; following is kept as reference
- ; movd rcx, mm0
+ ; movq rcx, mm0
; bsr rax, rcx
;
; mov eob, rax
@@ -418,7 +418,7 @@
psrldq xmm0, 4
paddd xmm1, xmm0
- movd rcx, xmm1
+ movq rcx, xmm1
and rcx, 0xffff
xor rdx, rdx
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index ad9658b..19041d4 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -100,7 +100,7 @@
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -172,7 +172,7 @@
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -242,7 +242,7 @@
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
@@ -331,7 +331,7 @@
psrlq mm0, 32
paddw mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
@@ -418,7 +418,7 @@
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 9f34a7a..0f6c5d9 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -75,7 +75,7 @@
psrldq xmm7, 8
paddw xmm0, xmm7
- movd rax, xmm0
+ movq rax, xmm0
; begin epilog
pop rdi
@@ -113,7 +113,7 @@
x8x16sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x16sad_wmt_early_exit
@@ -135,7 +135,7 @@
cmp rsi, rcx
jne x8x16sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x16sad_wmt_early_exit:
@@ -174,7 +174,7 @@
x8x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x8sad_wmt_early_exit
@@ -190,7 +190,7 @@
cmp rsi, rcx
jne x8x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x8sad_wmt_early_exit:
; begin epilog
@@ -246,7 +246,7 @@
psadbw mm4, mm5
paddw mm0, mm4
- movd rax, mm0
+ movq rax, mm0
; begin epilog
pop rdi
@@ -283,7 +283,7 @@
x16x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x16x8sad_wmt_early_exit
@@ -317,7 +317,7 @@
cmp rsi, rcx
jne x16x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x16x8sad_wmt_early_exit:
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index c2a1ae7..b12c815 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -530,7 +530,7 @@
vp8_sad16x16_sse3_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg vp8_sad16x16_early_exit
@@ -564,7 +564,7 @@
cmp rsi, rcx
jne vp8_sad16x16_sse3_loop
- movd rax, mm7
+ movq rax, mm7
vp8_sad16x16_early_exit:
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index 173238e..d4ec63b 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -498,7 +498,7 @@
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
; begin epilog
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index f47d9cc..38b3f33 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -58,7 +58,7 @@
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
- movd rax,xmm4
+ movq rax,xmm4
; begin epilog
@@ -471,7 +471,7 @@
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
- movd rdx, xmm7
+ movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index dc9e2d9..470c58a 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -36,6 +36,43 @@
%define rsp esp
%define rbp ebp
%define movsxd mov
+%macro movq 2
+ %ifidn %1,eax
+ movd %1,%2
+ %elifidn %2,eax
+ movd %1,%2
+ %elifidn %1,ebx
+ movd %1,%2
+ %elifidn %2,ebx
+ movd %1,%2
+ %elifidn %1,ecx
+ movd %1,%2
+ %elifidn %2,ecx
+ movd %1,%2
+ %elifidn %1,edx
+ movd %1,%2
+ %elifidn %2,edx
+ movd %1,%2
+ %elifidn %1,esi
+ movd %1,%2
+ %elifidn %2,esi
+ movd %1,%2
+ %elifidn %1,edi
+ movd %1,%2
+ %elifidn %2,edi
+ movd %1,%2
+ %elifidn %1,esp
+ movd %1,%2
+ %elifidn %2,esp
+ movd %1,%2
+ %elifidn %1,ebp
+ movd %1,%2
+ %elifidn %2,ebp
+ movd %1,%2
+ %else
+ movq %1,%2
+ %endif
+%endmacro
%endif