| ; | 
 | ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
 | ; | 
 | ;  Use of this source code is governed by a BSD-style license | 
 | ;  that can be found in the LICENSE file in the root of the source | 
 | ;  tree. An additional intellectual property rights grant can be found | 
 | ;  in the file PATENTS.  All contributing project authors may | 
 | ;  be found in the AUTHORS file in the root of the source tree. | 
 | ; | 
 |  | 
 |  | 
 | %include "vpx_ports/x86_abi_support.asm" | 
 |  | 
 | ;void vp9_subtract_b_sse2_impl(unsigned char *z,  int src_stride, | 
 | ;                            short *diff, unsigned char *Predictor, | 
 | ;                            int pitch); | 
 | global sym(vp9_subtract_b_sse2_impl) PRIVATE | 
 | sym(vp9_subtract_b_sse2_impl): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 5 | 
 |     GET_GOT     rbx | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |         mov     rdi,        arg(2) ;diff | 
 |         mov     rax,        arg(3) ;Predictor | 
 |         mov     rsi,        arg(0) ;z | 
 |         movsxd  rdx,        dword ptr arg(1);src_stride; | 
 |         movsxd  rcx,        dword ptr arg(4);pitch | 
 |         pxor    mm7,        mm7 | 
 |  | 
 |         movd    mm0,        [rsi] | 
 |         movd    mm1,        [rax] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    MMWORD PTR [rdi],      mm0 | 
 |  | 
 |         movd    mm0,        [rsi+rdx] | 
 |         movd    mm1,        [rax+rcx] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    MMWORD PTR [rdi+rcx*2], mm0 | 
 |  | 
 |         movd    mm0,        [rsi+rdx*2] | 
 |         movd    mm1,        [rax+rcx*2] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    MMWORD PTR [rdi+rcx*4], mm0 | 
 |  | 
 |         lea     rsi,        [rsi+rdx*2] | 
 |         lea     rcx,        [rcx+rcx*2] | 
 |  | 
 |         movd    mm0,        [rsi+rdx] | 
 |         movd    mm1,        [rax+rcx] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    MMWORD PTR [rdi+rcx*2], mm0 | 
 |  | 
 |     ; begin epilog | 
 |     pop rdi | 
 |     pop rsi | 
 |     RESTORE_GOT | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret | 
 |  | 
 |  | 
 | ;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride) | 
 | global sym(vp9_subtract_mby_sse2) PRIVATE | 
 | sym(vp9_subtract_mby_sse2): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 4 | 
 |     SAVE_XMM 7 | 
 |     GET_GOT     rbx | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |             mov         rsi,            arg(1) ;src | 
 |             mov         rdi,            arg(0) ;diff | 
 |  | 
 |             mov         rax,            arg(2) ;pred | 
 |             movsxd      rdx,            dword ptr arg(3) ;stride | 
 |  | 
 |             mov         rcx,            8      ; do two lines at one time | 
 |  | 
 | .submby_loop: | 
 |             movdqa      xmm0,           XMMWORD PTR [rsi]   ; src | 
 |             movdqa      xmm1,           XMMWORD PTR [rax]   ; pred | 
 |  | 
 |             movdqa      xmm2,           xmm0 | 
 |             psubb       xmm0,           xmm1 | 
 |  | 
 |             pxor        xmm1,           [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor        xmm2,           [GLOBAL(t80)] | 
 |             pcmpgtb     xmm1,           xmm2            ; obtain sign information | 
 |  | 
 |             movdqa      xmm2,    xmm0 | 
 |             movdqa      xmm3,    xmm1 | 
 |             punpcklbw   xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw   xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa      XMMWORD PTR [rdi],   xmm0 | 
 |             movdqa      XMMWORD PTR [rdi +16], xmm2 | 
 |  | 
 |             movdqa      xmm4,           XMMWORD PTR [rsi + rdx] | 
 |             movdqa      xmm5,           XMMWORD PTR [rax + 16] | 
 |  | 
 |             movdqa      xmm6,           xmm4 | 
 |             psubb       xmm4,           xmm5 | 
 |  | 
 |             pxor        xmm5,           [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor        xmm6,           [GLOBAL(t80)] | 
 |             pcmpgtb     xmm5,           xmm6            ; obtain sign information | 
 |  | 
 |             movdqa      xmm6,    xmm4 | 
 |             movdqa      xmm7,    xmm5 | 
 |             punpcklbw   xmm4,    xmm5            ; put sign back to subtraction | 
 |             punpckhbw   xmm6,    xmm7            ; put sign back to subtraction | 
 |  | 
 |             movdqa      XMMWORD PTR [rdi +32], xmm4 | 
 |             movdqa      XMMWORD PTR [rdi +48], xmm6 | 
 |  | 
 |             add         rdi,            64 | 
 |             add         rax,            32 | 
 |             lea         rsi,            [rsi+rdx*2] | 
 |  | 
 |             sub         rcx,            1 | 
 |             jnz         .submby_loop | 
 |  | 
 |     pop rdi | 
 |     pop rsi | 
 |     ; begin epilog | 
 |     RESTORE_GOT | 
 |     RESTORE_XMM | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret | 
 |  | 
 |  | 
 | ;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) | 
 | global sym(vp9_subtract_mbuv_sse2) PRIVATE | 
 | sym(vp9_subtract_mbuv_sse2): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 5 | 
 |     GET_GOT     rbx | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |             mov     rdi,        arg(0) ;diff | 
 |             mov     rax,        arg(3) ;pred | 
 |             mov     rsi,        arg(1) ;z = usrc | 
 |             add     rdi,        256*2  ;diff = diff + 256 (shorts) | 
 |             add     rax,        256    ;Predictor = pred + 256 | 
 |             movsxd  rdx,        dword ptr arg(4) ;stride; | 
 |             lea     rcx,        [rdx + rdx*2] | 
 |  | 
 |             ;u | 
 |             ;line 0 1 | 
 |             movq       xmm0,    MMWORD PTR [rsi]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rdx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi +16],   xmm2 | 
 |  | 
 |             ;line 2 3 | 
 |             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rcx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 32],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 48],   xmm2 | 
 |  | 
 |             ;line 4 5 | 
 |             lea        rsi,     [rsi + rdx*4] | 
 |  | 
 |             movq       xmm0,    MMWORD PTR [rsi]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rdx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 64],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 80],   xmm2 | 
 |  | 
 |             ;line 6 7 | 
 |             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rcx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 96],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 112],  xmm2 | 
 |  | 
 |             ;v | 
 |             mov     rsi,        arg(2) ;z = vsrc | 
 |             add     rdi,        64*2  ;diff = diff + 320 (shorts) | 
 |             add     rax,        64    ;Predictor = pred + 320 | 
 |  | 
 |             ;line 0 1 | 
 |             movq       xmm0,    MMWORD PTR [rsi]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rdx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi +16],   xmm2 | 
 |  | 
 |             ;line 2 3 | 
 |             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rcx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 32],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 48],   xmm2 | 
 |  | 
 |             ;line 4 5 | 
 |             lea        rsi,     [rsi + rdx*4] | 
 |  | 
 |             movq       xmm0,    MMWORD PTR [rsi]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rdx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 64],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 80],   xmm2 | 
 |  | 
 |             ;line 6 7 | 
 |             movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src | 
 |             movq       xmm2,    MMWORD PTR [rsi+rcx] | 
 |             movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred | 
 |             punpcklqdq xmm0,    xmm2 | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             psubb      xmm0,    xmm1            ; subtraction with sign missed | 
 |  | 
 |             pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values | 
 |             pxor       xmm2,    [GLOBAL(t80)] | 
 |             pcmpgtb    xmm1,    xmm2            ; obtain sign information | 
 |  | 
 |             movdqa     xmm2,    xmm0 | 
 |             movdqa     xmm3,    xmm1 | 
 |             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction | 
 |             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction | 
 |  | 
 |             movdqa     XMMWORD PTR [rdi + 96],   xmm0 | 
 |             movdqa     XMMWORD PTR [rdi + 112],  xmm2 | 
 |  | 
 |     ; begin epilog | 
 |     pop rdi | 
 |     pop rsi | 
 |     RESTORE_GOT | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret | 
 |  | 
 | SECTION_RODATA | 
 | align 16 | 
 | t80: | 
 |     times 16 db 0x80 |