| ; | 
 | ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
 | ; | 
 | ;  Use of this source code is governed by a BSD-style license | 
 | ;  that can be found in the LICENSE file in the root of the source | 
 | ;  tree. An additional intellectual property rights grant can be found | 
 | ;  in the file PATENTS.  All contributing project authors may | 
 | ;  be found in the AUTHORS file in the root of the source tree. | 
 | ; | 
 |  | 
 |  | 
 | %include "vpx_ports/x86_abi_support.asm" | 
 |  | 
 | ;void vp9_subtract_b_mmx_impl(unsigned char *z,  int src_stride, | 
 | ;                            short *diff, unsigned char *Predictor, | 
 | ;                            int pitch); | 
 | global sym(vp9_subtract_b_mmx_impl) PRIVATE | 
 | sym(vp9_subtract_b_mmx_impl): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 5 | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |  | 
 |         mov     rdi,        arg(2) ;diff | 
 |         mov     rax,        arg(3) ;Predictor | 
 |         mov     rsi,        arg(0) ;z | 
 |         movsxd  rdx,        dword ptr arg(1);src_stride; | 
 |         movsxd  rcx,        dword ptr arg(4);pitch | 
 |         pxor    mm7,        mm7 | 
 |  | 
 |         movd    mm0,        [rsi] | 
 |         movd    mm1,        [rax] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    [rdi],      mm0 | 
 |  | 
 |  | 
 |         movd    mm0,        [rsi+rdx] | 
 |         movd    mm1,        [rax+rcx] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    [rdi+rcx*2],mm0 | 
 |  | 
 |  | 
 |         movd    mm0,        [rsi+rdx*2] | 
 |         movd    mm1,        [rax+rcx*2] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    [rdi+rcx*4],        mm0 | 
 |  | 
 |         lea     rsi,        [rsi+rdx*2] | 
 |         lea     rcx,        [rcx+rcx*2] | 
 |  | 
 |  | 
 |  | 
 |         movd    mm0,        [rsi+rdx] | 
 |         movd    mm1,        [rax+rcx] | 
 |         punpcklbw   mm0,    mm7 | 
 |         punpcklbw   mm1,    mm7 | 
 |         psubw   mm0,        mm1 | 
 |         movq    [rdi+rcx*2],        mm0 | 
 |  | 
 |     ; begin epilog | 
 |     pop rdi | 
 |     pop rsi | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret | 
 |  | 
 | ;void vp9_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride) | 
 | global sym(vp9_subtract_mby_mmx) PRIVATE | 
 | sym(vp9_subtract_mby_mmx): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 4 | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |  | 
 |             mov         rsi,            arg(1) ;src | 
 |             mov         rdi,            arg(0) ;diff | 
 |  | 
 |             mov         rax,            arg(2) ;pred | 
 |             movsxd      rdx,            dword ptr arg(3) ;stride | 
 |  | 
 |             mov         rcx,            16 | 
 |             pxor        mm0,            mm0 | 
 |  | 
 | .submby_loop: | 
 |  | 
 |             movq        mm1,            [rsi] | 
 |             movq        mm3,            [rax] | 
 |  | 
 |             movq        mm2,            mm1 | 
 |             movq        mm4,            mm3 | 
 |  | 
 |             punpcklbw   mm1,            mm0 | 
 |             punpcklbw   mm3,            mm0 | 
 |  | 
 |             punpckhbw   mm2,            mm0 | 
 |             punpckhbw   mm4,            mm0 | 
 |  | 
 |             psubw       mm1,            mm3 | 
 |             psubw       mm2,            mm4 | 
 |  | 
 |             movq        [rdi],          mm1 | 
 |             movq        [rdi+8],        mm2 | 
 |  | 
 |  | 
 |             movq        mm1,            [rsi+8] | 
 |             movq        mm3,            [rax+8] | 
 |  | 
 |             movq        mm2,            mm1 | 
 |             movq        mm4,            mm3 | 
 |  | 
 |             punpcklbw   mm1,            mm0 | 
 |             punpcklbw   mm3,            mm0 | 
 |  | 
 |             punpckhbw   mm2,            mm0 | 
 |             punpckhbw   mm4,            mm0 | 
 |  | 
 |             psubw       mm1,            mm3 | 
 |             psubw       mm2,            mm4 | 
 |  | 
 |             movq        [rdi+16],       mm1 | 
 |             movq        [rdi+24],       mm2 | 
 |  | 
 |  | 
 |             add         rdi,            32 | 
 |             add         rax,            16 | 
 |  | 
 |             lea         rsi,            [rsi+rdx] | 
 |  | 
 |             sub         rcx,            1 | 
 |             jnz         .submby_loop | 
 |  | 
 |     pop rdi | 
 |     pop rsi | 
 |     ; begin epilog | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret | 
 |  | 
 |  | 
 | ;void vp9_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) | 
 | global sym(vp9_subtract_mbuv_mmx) PRIVATE | 
 | sym(vp9_subtract_mbuv_mmx): | 
 |     push        rbp | 
 |     mov         rbp, rsp | 
 |     SHADOW_ARGS_TO_STACK 5 | 
 |     push rsi | 
 |     push rdi | 
 |     ; end prolog | 
 |  | 
 |     ;short *udiff = diff + 256; | 
 |     ;short *vdiff = diff + 320; | 
 |     ;unsigned char *upred = pred + 256; | 
 |     ;unsigned char *vpred = pred + 320; | 
 |  | 
 |         ;unsigned char  *z    = usrc; | 
 |         ;unsigned short *diff = udiff; | 
 |         ;unsigned char  *Predictor= upred; | 
 |  | 
 |             mov     rdi,        arg(0) ;diff | 
 |             mov     rax,        arg(3) ;pred | 
 |             mov     rsi,        arg(1) ;z = usrc | 
 |             add     rdi,        256*2  ;diff = diff + 256 (shorts) | 
 |             add     rax,        256    ;Predictor = pred + 256 | 
 |             movsxd  rdx,        dword ptr arg(4) ;stride; | 
 |             pxor    mm7,        mm7 | 
 |  | 
 |             movq    mm0,        [rsi] | 
 |             movq    mm1,        [rax] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi],      mm0 | 
 |             movq    [rdi+8],    mm3 | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+8] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+16],   mm0 | 
 |             movq    [rdi+24],   mm3 | 
 |  | 
 |             movq    mm0,        [rsi+rdx*2] | 
 |             movq    mm1,        [rax+16] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+32],   mm0 | 
 |             movq    [rdi+40],   mm3 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+24] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |  | 
 |             movq    [rdi+48],   mm0 | 
 |             movq    [rdi+56],   mm3 | 
 |  | 
 |  | 
 |             add     rdi,        64 | 
 |             add     rax,        32 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi] | 
 |             movq    mm1,        [rax] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi],      mm0 | 
 |             movq    [rdi+8],    mm3 | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+8] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+16],   mm0 | 
 |             movq    [rdi+24],   mm3 | 
 |  | 
 |             movq    mm0,        [rsi+rdx*2] | 
 |             movq    mm1,        [rax+16] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+32],   mm0 | 
 |             movq    [rdi+40],   mm3 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+24] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |  | 
 |             movq    [rdi+48],   mm0 | 
 |             movq    [rdi+56],   mm3 | 
 |  | 
 |         ;unsigned char  *z    = vsrc; | 
 |         ;unsigned short *diff = vdiff; | 
 |         ;unsigned char  *Predictor= vpred; | 
 |  | 
 |             mov     rdi,        arg(0) ;diff | 
 |             mov     rax,        arg(3) ;pred | 
 |             mov     rsi,        arg(2) ;z = usrc | 
 |             add     rdi,        320*2  ;diff = diff + 320 (shorts) | 
 |             add     rax,        320    ;Predictor = pred + 320 | 
 |             movsxd  rdx,        dword ptr arg(4) ;stride; | 
 |             pxor    mm7,        mm7 | 
 |  | 
 |             movq    mm0,        [rsi] | 
 |             movq    mm1,        [rax] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi],      mm0 | 
 |             movq    [rdi+8],    mm3 | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+8] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+16],   mm0 | 
 |             movq    [rdi+24],   mm3 | 
 |  | 
 |             movq    mm0,        [rsi+rdx*2] | 
 |             movq    mm1,        [rax+16] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+32],   mm0 | 
 |             movq    [rdi+40],   mm3 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+24] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |  | 
 |             movq    [rdi+48],   mm0 | 
 |             movq    [rdi+56],   mm3 | 
 |  | 
 |  | 
 |             add     rdi,        64 | 
 |             add     rax,        32 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi] | 
 |             movq    mm1,        [rax] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi],      mm0 | 
 |             movq    [rdi+8],    mm3 | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+8] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+16],   mm0 | 
 |             movq    [rdi+24],   mm3 | 
 |  | 
 |             movq    mm0,        [rsi+rdx*2] | 
 |             movq    mm1,        [rax+16] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |             movq    [rdi+32],   mm0 | 
 |             movq    [rdi+40],   mm3 | 
 |             lea     rsi,        [rsi+rdx*2] | 
 |  | 
 |  | 
 |             movq    mm0,        [rsi+rdx] | 
 |             movq    mm1,        [rax+24] | 
 |             movq    mm3,        mm0 | 
 |             movq    mm4,        mm1 | 
 |             punpcklbw   mm0,    mm7 | 
 |             punpcklbw   mm1,    mm7 | 
 |             punpckhbw   mm3,    mm7 | 
 |             punpckhbw   mm4,    mm7 | 
 |             psubw   mm0,        mm1 | 
 |             psubw   mm3,        mm4 | 
 |  | 
 |             movq    [rdi+48],   mm0 | 
 |             movq    [rdi+56],   mm3 | 
 |  | 
 |     ; begin epilog | 
 |     pop rdi | 
 |     pop rsi | 
 |     UNSHADOW_ARGS | 
 |     pop         rbp | 
 |     ret |