; | |
; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
; | |
; Use of this source code is governed by a BSD-style license | |
; that can be found in the LICENSE file in the root of the source | |
; tree. An additional intellectual property rights grant can be found | |
; in the file PATENTS. All contributing project authors may | |
; be found in the AUTHORS file in the root of the source tree. | |
; | |
%include "vpx_ports/x86_abi_support.asm" | |
;void copy_mem16x16_sse2( | |
; unsigned char *src, | |
; int src_stride, | |
; unsigned char *dst, | |
; int dst_stride | |
; ) | |
global sym(vp9_copy_mem16x16_sse2) PRIVATE | |
sym(vp9_copy_mem16x16_sse2): | |
push rbp | |
mov rbp, rsp | |
SHADOW_ARGS_TO_STACK 4 | |
push rsi | |
push rdi | |
; end prolog | |
mov rsi, arg(0) ;src; | |
movdqu xmm0, [rsi] | |
movsxd rax, dword ptr arg(1) ;src_stride; | |
mov rdi, arg(2) ;dst; | |
movdqu xmm1, [rsi+rax] | |
movdqu xmm2, [rsi+rax*2] | |
movsxd rcx, dword ptr arg(3) ;dst_stride | |
lea rsi, [rsi+rax*2] | |
movdqa [rdi], xmm0 | |
add rsi, rax | |
movdqa [rdi+rcx], xmm1 | |
movdqa [rdi+rcx*2],xmm2 | |
lea rdi, [rdi+rcx*2] | |
movdqu xmm3, [rsi] | |
add rdi, rcx | |
movdqu xmm4, [rsi+rax] | |
movdqu xmm5, [rsi+rax*2] | |
lea rsi, [rsi+rax*2] | |
movdqa [rdi], xmm3 | |
add rsi, rax | |
movdqa [rdi+rcx], xmm4 | |
movdqa [rdi+rcx*2],xmm5 | |
lea rdi, [rdi+rcx*2] | |
movdqu xmm0, [rsi] | |
add rdi, rcx | |
movdqu xmm1, [rsi+rax] | |
movdqu xmm2, [rsi+rax*2] | |
lea rsi, [rsi+rax*2] | |
movdqa [rdi], xmm0 | |
add rsi, rax | |
movdqa [rdi+rcx], xmm1 | |
movdqa [rdi+rcx*2], xmm2 | |
movdqu xmm3, [rsi] | |
movdqu xmm4, [rsi+rax] | |
lea rdi, [rdi+rcx*2] | |
add rdi, rcx | |
movdqu xmm5, [rsi+rax*2] | |
lea rsi, [rsi+rax*2] | |
movdqa [rdi], xmm3 | |
add rsi, rax | |
movdqa [rdi+rcx], xmm4 | |
movdqa [rdi+rcx*2],xmm5 | |
movdqu xmm0, [rsi] | |
lea rdi, [rdi+rcx*2] | |
movdqu xmm1, [rsi+rax] | |
add rdi, rcx | |
movdqu xmm2, [rsi+rax*2] | |
lea rsi, [rsi+rax*2] | |
movdqa [rdi], xmm0 | |
movdqa [rdi+rcx], xmm1 | |
movdqa [rdi+rcx*2],xmm2 | |
movdqu xmm3, [rsi+rax] | |
lea rdi, [rdi+rcx*2] | |
movdqa [rdi+rcx], xmm3 | |
; begin epilog | |
pop rdi | |
pop rsi | |
UNSHADOW_ARGS | |
pop rbp | |
ret |