| ; |
| ; Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| ; |
| ; This source code is subject to the terms of the BSD 2 Clause License and |
| ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| ; was not distributed with this source code in the LICENSE file, you can |
| ; obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| ; Media Patent License 1.0 was not distributed with this source code in the |
| ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| ; |
| |
| ; |
| |
| |
| %include "aom_config.asm" |
| |
| ; 32/64 bit compatibility macros |
| ; |
| ; In general, we make the source use 64 bit syntax, then twiddle with it using |
| ; the preprocessor to get the 32 bit syntax on 32 bit platforms. |
| ; |
| %ifidn __OUTPUT_FORMAT__,elf32 |
| %define ABI_IS_32BIT 1 |
| %elifidn __OUTPUT_FORMAT__,macho32 |
| %define ABI_IS_32BIT 1 |
| %elifidn __OUTPUT_FORMAT__,win32 |
| %define ABI_IS_32BIT 1 |
| %elifidn __OUTPUT_FORMAT__,aout |
| %define ABI_IS_32BIT 1 |
| %else |
| %define ABI_IS_32BIT 0 |
| %endif |
| |
| %if ABI_IS_32BIT |
| %define rax eax |
| %define rbx ebx |
| %define rcx ecx |
| %define rdx edx |
| %define rsi esi |
| %define rdi edi |
| %define rsp esp |
| %define rbp ebp |
| %define movsxd mov |
| %macro movq 2 |
| %ifidn %1,eax |
| movd %1,%2 |
| %elifidn %2,eax |
| movd %1,%2 |
| %elifidn %1,ebx |
| movd %1,%2 |
| %elifidn %2,ebx |
| movd %1,%2 |
| %elifidn %1,ecx |
| movd %1,%2 |
| %elifidn %2,ecx |
| movd %1,%2 |
| %elifidn %1,edx |
| movd %1,%2 |
| %elifidn %2,edx |
| movd %1,%2 |
| %elifidn %1,esi |
| movd %1,%2 |
| %elifidn %2,esi |
| movd %1,%2 |
| %elifidn %1,edi |
| movd %1,%2 |
| %elifidn %2,edi |
| movd %1,%2 |
| %elifidn %1,esp |
| movd %1,%2 |
| %elifidn %2,esp |
| movd %1,%2 |
| %elifidn %1,ebp |
| movd %1,%2 |
| %elifidn %2,ebp |
| movd %1,%2 |
| %else |
| movq %1,%2 |
| %endif |
| %endmacro |
| %endif |
| |
| |
| ; LIBAOM_YASM_WIN64 |
| ; Set LIBAOM_YASM_WIN64 if output is Windows 64bit so the code will work if x64 |
| ; or win64 is defined on the Yasm command line. |
| %ifidn __OUTPUT_FORMAT__,win64 |
| %define LIBAOM_YASM_WIN64 1 |
| %elifidn __OUTPUT_FORMAT__,x64 |
| %define LIBAOM_YASM_WIN64 1 |
| %else |
| %define LIBAOM_YASM_WIN64 0 |
| %endif |
| |
| ; sym() |
| ; Return the proper symbol name for the target ABI. |
| ; |
| ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols |
| ; with C linkage be prefixed with an underscore. |
| ; |
| %ifidn __OUTPUT_FORMAT__,elf32 |
| %define sym(x) x |
| %elifidn __OUTPUT_FORMAT__,elf64 |
| %define sym(x) x |
| %elifidn __OUTPUT_FORMAT__,elfx32 |
| %define sym(x) x |
| %elif LIBAOM_YASM_WIN64 |
| %define sym(x) x |
| %else |
| %define sym(x) _ %+ x |
| %endif |
| |
| ; PRIVATE |
| ; Macro for the attribute to hide a global symbol for the target ABI. |
| ; This is only active if CHROMIUM is defined. |
| ; |
| ; Chromium doesn't like exported global symbols due to symbol clashing with |
| ; plugins among other things. |
| ; |
| ; Requires Chromium's patched copy of yasm: |
| ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 |
| ; http://www.tortall.net/projects/yasm/ticket/236 |
| ; |
| %ifdef CHROMIUM |
| %ifidn __OUTPUT_FORMAT__,elf32 |
| %define PRIVATE :hidden |
| %elifidn __OUTPUT_FORMAT__,elf64 |
| %define PRIVATE :hidden |
| %elifidn __OUTPUT_FORMAT__,elfx32 |
| %define PRIVATE :hidden |
| %elif LIBAOM_YASM_WIN64 |
| %define PRIVATE |
| %else |
| %define PRIVATE :private_extern |
| %endif |
| %else |
| %define PRIVATE |
| %endif |
| |
| ; arg() |
| ; Return the address specification of the given argument |
| ; |
| %if ABI_IS_32BIT |
| %define arg(x) [ebp+8+4*x] |
| %else |
| ; 64 bit ABI passes arguments in registers. This is a workaround to get up |
| ; and running quickly. Relies on SHADOW_ARGS_TO_STACK |
| %if LIBAOM_YASM_WIN64 |
| %define arg(x) [rbp+16+8*x] |
| %else |
| %define arg(x) [rbp-8-8*x] |
| %endif |
| %endif |
| |
| ; REG_SZ_BYTES, REG_SZ_BITS |
| ; Size of a register |
| %if ABI_IS_32BIT |
| %define REG_SZ_BYTES 4 |
| %define REG_SZ_BITS 32 |
| %else |
| %define REG_SZ_BYTES 8 |
| %define REG_SZ_BITS 64 |
| %endif |
| |
| |
| ; ALIGN_STACK <alignment> <register> |
| ; This macro aligns the stack to the given alignment (in bytes). The stack |
| ; is left such that the previous value of the stack pointer is the first |
| ; argument on the stack (ie, the inverse of this macro is 'pop rsp.') |
| ; This macro uses one temporary register, which is not preserved, and thus |
| ; must be specified as an argument. |
| %macro ALIGN_STACK 2 |
| mov %2, rsp |
| and rsp, -%1 |
| lea rsp, [rsp - (%1 - REG_SZ_BYTES)] |
| push %2 |
| %endmacro |
| |
| |
| ; |
| ; The Microsoft assembler tries to impose a certain amount of type safety in |
| ; its register usage. YASM doesn't recognize these directives, so we just |
| ; %define them away to maintain as much compatibility as possible with the |
| ; original inline assembler we're porting from. |
| ; |
| %idefine PTR |
| %idefine XMMWORD |
| %idefine MMWORD |
| |
| ; PIC macros |
| ; |
| %if ABI_IS_32BIT |
| %if CONFIG_PIC=1 |
| %ifidn __OUTPUT_FORMAT__,elf32 |
| %define WRT_PLT wrt ..plt |
| %macro GET_GOT 1 |
| extern _GLOBAL_OFFSET_TABLE_ |
| push %1 |
| call %%get_got |
| %%sub_offset: |
| jmp %%exitGG |
| %%get_got: |
| mov %1, [esp] |
| add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc |
| ret |
| %%exitGG: |
| %undef GLOBAL |
| %define GLOBAL(x) x + %1 wrt ..gotoff |
| %undef RESTORE_GOT |
| %define RESTORE_GOT pop %1 |
| %endmacro |
| %elifidn __OUTPUT_FORMAT__,macho32 |
| %macro GET_GOT 1 |
| push %1 |
| call %%get_got |
| %%get_got: |
| pop %1 |
| %undef GLOBAL |
| %define GLOBAL(x) x + %1 - %%get_got |
| %undef RESTORE_GOT |
| %define RESTORE_GOT pop %1 |
| %endmacro |
| %endif |
| %endif |
| |
| %ifdef CHROMIUM |
| %ifidn __OUTPUT_FORMAT__,macho32 |
| %define HIDDEN_DATA(x) x:private_extern |
| %else |
| %define HIDDEN_DATA(x) x |
| %endif |
| %else |
| %define HIDDEN_DATA(x) x |
| %endif |
| %else |
| %macro GET_GOT 1 |
| %endmacro |
| %define GLOBAL(x) rel x |
| %ifidn __OUTPUT_FORMAT__,elf64 |
| %define WRT_PLT wrt ..plt |
| %define HIDDEN_DATA(x) x:data hidden |
| %elifidn __OUTPUT_FORMAT__,elfx32 |
| %define WRT_PLT wrt ..plt |
| %define HIDDEN_DATA(x) x:data hidden |
| %elifidn __OUTPUT_FORMAT__,macho64 |
| %ifdef CHROMIUM |
| %define HIDDEN_DATA(x) x:private_extern |
| %else |
| %define HIDDEN_DATA(x) x |
| %endif |
| %else |
| %define HIDDEN_DATA(x) x |
| %endif |
| %endif |
| %ifnmacro GET_GOT |
| %macro GET_GOT 1 |
| %endmacro |
| %define GLOBAL(x) x |
| %endif |
| %ifndef RESTORE_GOT |
| %define RESTORE_GOT |
| %endif |
| %ifndef WRT_PLT |
| %define WRT_PLT |
| %endif |
| |
| %if ABI_IS_32BIT |
| %macro SHADOW_ARGS_TO_STACK 1 |
| %endm |
| %define UNSHADOW_ARGS |
| %else |
| %if LIBAOM_YASM_WIN64 |
| %macro SHADOW_ARGS_TO_STACK 1 ; argc |
| %if %1 > 0 |
| mov arg(0),rcx |
| %endif |
| %if %1 > 1 |
| mov arg(1),rdx |
| %endif |
| %if %1 > 2 |
| mov arg(2),r8 |
| %endif |
| %if %1 > 3 |
| mov arg(3),r9 |
| %endif |
| %endm |
| %else |
| %macro SHADOW_ARGS_TO_STACK 1 ; argc |
| %if %1 > 0 |
| push rdi |
| %endif |
| %if %1 > 1 |
| push rsi |
| %endif |
| %if %1 > 2 |
| push rdx |
| %endif |
| %if %1 > 3 |
| push rcx |
| %endif |
| %if %1 > 4 |
| push r8 |
| %endif |
| %if %1 > 5 |
| push r9 |
| %endif |
| %if %1 > 6 |
| %assign i %1-6 |
| %assign off 16 |
| %rep i |
| mov rax,[rbp+off] |
| push rax |
| %assign off off+8 |
| %endrep |
| %endif |
| %endm |
| %endif |
| %define UNSHADOW_ARGS mov rsp, rbp |
| %endif |
| |
| ; Win64 ABI requires that XMM6:XMM15 are callee saved |
| ; SAVE_XMM n, [u] |
| ; store registers 6-n on the stack |
| ; if u is specified, use unaligned movs. |
| ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return |
| ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - |
| ; but in some cases this is not done and unaligned movs must be used. |
| %if LIBAOM_YASM_WIN64 |
| %macro SAVE_XMM 1-2 a |
| %if %1 < 6 |
| %error Only xmm registers 6-15 must be preserved |
| %else |
| %assign last_xmm %1 |
| %define movxmm movdq %+ %2 |
| %assign xmm_stack_space ((last_xmm - 5) * 16) |
| sub rsp, xmm_stack_space |
| %assign i 6 |
| %rep (last_xmm - 5) |
| movxmm [rsp + ((i - 6) * 16)], xmm %+ i |
| %assign i i+1 |
| %endrep |
| %endif |
| %endmacro |
| %macro RESTORE_XMM 0 |
| %ifndef last_xmm |
| %error RESTORE_XMM must be paired with SAVE_XMM n |
| %else |
| %assign i last_xmm |
| %rep (last_xmm - 5) |
| movxmm xmm %+ i, [rsp +((i - 6) * 16)] |
| %assign i i-1 |
| %endrep |
| add rsp, xmm_stack_space |
| ; there are a couple functions which return from multiple places. |
| ; otherwise, we could uncomment these: |
| ; %undef last_xmm |
| ; %undef xmm_stack_space |
| ; %undef movxmm |
| %endif |
| %endmacro |
| %else |
| %macro SAVE_XMM 1-2 |
| %endmacro |
| %macro RESTORE_XMM 0 |
| %endmacro |
| %endif |
| |
| ; Name of the rodata section |
| ; |
| ; .rodata seems to be an elf-ism, as it doesn't work on OSX. |
| ; |
| %ifidn __OUTPUT_FORMAT__,macho64 |
| %define SECTION_RODATA section .text |
| %elifidn __OUTPUT_FORMAT__,macho32 |
| %macro SECTION_RODATA 0 |
| section .text |
| %endmacro |
| %elifidn __OUTPUT_FORMAT__,aout |
| %define SECTION_RODATA section .data |
| %else |
| %define SECTION_RODATA section .rodata |
| %endif |
| |
| |
| ; Tell GNU ld that we don't require an executable stack. |
| %ifidn __OUTPUT_FORMAT__,elf32 |
| section .note.GNU-stack noalloc noexec nowrite progbits |
| section .text |
| %elifidn __OUTPUT_FORMAT__,elf64 |
| section .note.GNU-stack noalloc noexec nowrite progbits |
| section .text |
| %elifidn __OUTPUT_FORMAT__,elfx32 |
| section .note.GNU-stack noalloc noexec nowrite progbits |
| section .text |
| %endif |