Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | ; |
| 2 | ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 | ; |
| 4 | ; Use of this source code is governed by a BSD-style license |
| 5 | ; that can be found in the LICENSE file in the root of the source |
| 6 | ; tree. An additional intellectual property rights grant can be found |
| 7 | ; in the file PATENTS. All contributing project authors may |
| 8 | ; be found in the AUTHORS file in the root of the source tree. |
| 9 | ; |
| 10 | |
| 11 | |
| 12 | %include "aom_ports/x86_abi_support.asm" |
| 13 | |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 14 | ;void aom_plane_add_noise_sse2(unsigned char *start, unsigned char *noise, |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 15 | ; unsigned char blackclamp[16], |
| 16 | ; unsigned char whiteclamp[16], |
| 17 | ; unsigned char bothclamp[16], |
| 18 | ; unsigned int width, unsigned int height, |
| 19 | ; int pitch) |
Yaowu Xu | f883b42 | 2016-08-30 14:01:10 -0700 | [diff] [blame] | 20 | global sym(aom_plane_add_noise_sse2) PRIVATE |
| 21 | sym(aom_plane_add_noise_sse2): |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 22 | push rbp |
| 23 | mov rbp, rsp |
| 24 | SHADOW_ARGS_TO_STACK 8 |
| 25 | GET_GOT rbx |
| 26 | push rsi |
| 27 | push rdi |
| 28 | ; end prolog |
| 29 | |
| 30 | ; get the clamps in registers |
| 31 | mov rdx, arg(2) ; blackclamp |
| 32 | movdqu xmm3, [rdx] |
| 33 | mov rdx, arg(3) ; whiteclamp |
| 34 | movdqu xmm4, [rdx] |
| 35 | mov rdx, arg(4) ; bothclamp |
| 36 | movdqu xmm5, [rdx] |
| 37 | |
| 38 | .addnoise_loop: |
| 39 | call sym(LIBAOM_RAND) WRT_PLT |
| 40 | mov rcx, arg(1) ;noise |
| 41 | and rax, 0xff |
| 42 | add rcx, rax |
| 43 | |
| 44 | mov rdi, rcx |
| 45 | movsxd rcx, dword arg(5) ;[Width] |
| 46 | mov rsi, arg(0) ;Pos |
| 47 | xor rax,rax |
| 48 | |
| 49 | .addnoise_nextset: |
| 50 | movdqu xmm1,[rsi+rax] ; get the source |
| 51 | |
| 52 | psubusb xmm1, xmm3 ; subtract black clamp |
| 53 | paddusb xmm1, xmm5 ; add both clamp |
| 54 | psubusb xmm1, xmm4 ; subtract whiteclamp |
| 55 | |
| 56 | movdqu xmm2,[rdi+rax] ; get the noise for this line |
| 57 | paddb xmm1,xmm2 ; add it in |
| 58 | movdqu [rsi+rax],xmm1 ; store the result |
| 59 | |
| 60 | add rax,16 ; move to the next line |
| 61 | |
| 62 | cmp rax, rcx |
| 63 | jl .addnoise_nextset |
| 64 | |
| 65 | movsxd rax, dword arg(7) ; Pitch |
| 66 | add arg(0), rax ; Start += Pitch |
| 67 | sub dword arg(6), 1 ; Height -= 1 |
| 68 | jg .addnoise_loop |
| 69 | |
| 70 | ; begin epilog |
| 71 | pop rdi |
| 72 | pop rsi |
| 73 | RESTORE_GOT |
| 74 | UNSHADOW_ARGS |
| 75 | pop rbp |
| 76 | ret |
| 77 | |
| 78 | SECTION_RODATA |
| 79 | align 16 |
| 80 | rd42: |
| 81 | times 8 dw 0x04 |
| 82 | four8s: |
| 83 | times 4 dd 8 |