Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 1 | ; |
Krishna Rapaka | 7319db5 | 2021-09-28 20:35:29 -0700 | [diff] [blame] | 2 | ; Copyright (c) 2021, Alliance for Open Media. All rights reserved |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 3 | ; |
Krishna Rapaka | 7319db5 | 2021-09-28 20:35:29 -0700 | [diff] [blame] | 4 | ; This source code is subject to the terms of the BSD 3-Clause Clear License and the |
| 5 | ; Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License was |
| 6 | ; not distributed with this source code in the LICENSE file, you can obtain it |
| 7 | ; at aomedia.org/license/software-license/bsd-3-c-c/. If the Alliance for Open Media Patent |
| 8 | ; License 1.0 was not distributed with this source code in the PATENTS file, you |
| 9 | ; can obtain it at aomedia.org/license/patent-license/. |
Yaowu Xu | 9c01aa1 | 2016-09-01 14:32:49 -0700 | [diff] [blame] | 10 | ; |
| 11 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 12 | ; |
| 13 | |
| 14 | |
Tom Finegan | 60e653d | 2018-05-22 11:34:58 -0700 | [diff] [blame] | 15 | %include "config/aom_config.asm" |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 16 | |
| 17 | ; 32/64 bit compatibility macros |
| 18 | ; |
| 19 | ; In general, we make the source use 64 bit syntax, then twiddle with it using |
| 20 | ; the preprocessor to get the 32 bit syntax on 32 bit platforms. |
| 21 | ; |
| 22 | %ifidn __OUTPUT_FORMAT__,elf32 |
| 23 | %define ABI_IS_32BIT 1 |
| 24 | %elifidn __OUTPUT_FORMAT__,macho32 |
| 25 | %define ABI_IS_32BIT 1 |
| 26 | %elifidn __OUTPUT_FORMAT__,win32 |
| 27 | %define ABI_IS_32BIT 1 |
| 28 | %elifidn __OUTPUT_FORMAT__,aout |
| 29 | %define ABI_IS_32BIT 1 |
| 30 | %else |
| 31 | %define ABI_IS_32BIT 0 |
| 32 | %endif |
| 33 | |
| 34 | %if ABI_IS_32BIT |
| 35 | %define rax eax |
| 36 | %define rbx ebx |
| 37 | %define rcx ecx |
| 38 | %define rdx edx |
| 39 | %define rsi esi |
| 40 | %define rdi edi |
| 41 | %define rsp esp |
| 42 | %define rbp ebp |
| 43 | %define movsxd mov |
| 44 | %macro movq 2 |
| 45 | %ifidn %1,eax |
| 46 | movd %1,%2 |
| 47 | %elifidn %2,eax |
| 48 | movd %1,%2 |
| 49 | %elifidn %1,ebx |
| 50 | movd %1,%2 |
| 51 | %elifidn %2,ebx |
| 52 | movd %1,%2 |
| 53 | %elifidn %1,ecx |
| 54 | movd %1,%2 |
| 55 | %elifidn %2,ecx |
| 56 | movd %1,%2 |
| 57 | %elifidn %1,edx |
| 58 | movd %1,%2 |
| 59 | %elifidn %2,edx |
| 60 | movd %1,%2 |
| 61 | %elifidn %1,esi |
| 62 | movd %1,%2 |
| 63 | %elifidn %2,esi |
| 64 | movd %1,%2 |
| 65 | %elifidn %1,edi |
| 66 | movd %1,%2 |
| 67 | %elifidn %2,edi |
| 68 | movd %1,%2 |
| 69 | %elifidn %1,esp |
| 70 | movd %1,%2 |
| 71 | %elifidn %2,esp |
| 72 | movd %1,%2 |
| 73 | %elifidn %1,ebp |
| 74 | movd %1,%2 |
| 75 | %elifidn %2,ebp |
| 76 | movd %1,%2 |
| 77 | %else |
| 78 | movq %1,%2 |
| 79 | %endif |
| 80 | %endmacro |
| 81 | %endif |
| 82 | |
| 83 | |
| 84 | ; LIBAOM_YASM_WIN64 |
| 85 | ; Set LIBAOM_YASM_WIN64 if output is Windows 64bit so the code will work if x64 |
| 86 | ; or win64 is defined on the Yasm command line. |
| 87 | %ifidn __OUTPUT_FORMAT__,win64 |
| 88 | %define LIBAOM_YASM_WIN64 1 |
| 89 | %elifidn __OUTPUT_FORMAT__,x64 |
| 90 | %define LIBAOM_YASM_WIN64 1 |
| 91 | %else |
| 92 | %define LIBAOM_YASM_WIN64 0 |
| 93 | %endif |
| 94 | |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 95 | ; Declare groups of platforms |
| 96 | %ifidn __OUTPUT_FORMAT__,elf32 |
| 97 | %define LIBAOM_ELF 1 |
| 98 | %elifidn __OUTPUT_FORMAT__,elfx32 |
| 99 | %define LIBAOM_ELF 1 |
| 100 | %elifidn __OUTPUT_FORMAT__,elf64 |
| 101 | %define LIBAOM_ELF 1 |
| 102 | %else |
| 103 | %define LIBAOM_ELF 0 |
| 104 | %endif |
| 105 | |
| 106 | %ifidn __OUTPUT_FORMAT__,macho32 |
| 107 | %define LIBAOM_MACHO 1 |
| 108 | %elifidn __OUTPUT_FORMAT__,macho64 |
| 109 | %define LIBAOM_MACHO 1 |
| 110 | %else |
| 111 | %define LIBAOM_MACHO 0 |
| 112 | %endif |
| 113 | |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 114 | ; sym() |
| 115 | ; Return the proper symbol name for the target ABI. |
| 116 | ; |
| 117 | ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols |
| 118 | ; with C linkage be prefixed with an underscore. |
| 119 | ; |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 120 | %if LIBAOM_ELF || LIBAOM_YASM_WIN64 |
| 121 | %define sym(x) x |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 122 | %else |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 123 | ; Mach-O / COFF |
| 124 | %define sym(x) _ %+ x |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 125 | %endif |
| 126 | |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 127 | ; globalsym() |
| 128 | ; Return a global declaration with the proper decoration for the target ABI. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 129 | ; |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 130 | ; When CHROMIUM is defined, include attributes to hide the symbol from the |
| 131 | ; global namespace. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 132 | ; |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 133 | ; Chromium doesn't like exported global symbols due to symbol clashing with |
| 134 | ; plugins among other things. |
| 135 | ; |
| 136 | ; Requires Chromium's patched copy of yasm: |
| 137 | ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 |
| 138 | ; http://www.tortall.net/projects/yasm/ticket/236 |
| 139 | ; or nasm > 2.14. |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 140 | ; |
| 141 | %ifdef CHROMIUM |
Johann | e963651 | 2020-04-01 08:57:52 +0900 | [diff] [blame] | 142 | %ifdef __NASM_VER__ |
| 143 | %if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14 |
| 144 | ; nasm < 2.14 does not support :private_extern directive |
| 145 | %fatal Must use nasm 2.14 or newer |
| 146 | %endif |
| 147 | %endif |
| 148 | |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 149 | %if LIBAOM_ELF |
| 150 | %define globalsym(x) global sym(x) %+ :function hidden |
| 151 | %elif LIBAOM_MACHO |
| 152 | %define globalsym(x) global sym(x) %+ :private_extern |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 153 | %else |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 154 | ; COFF / PE32+ |
| 155 | %define globalsym(x) global sym(x) |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 156 | %endif |
| 157 | %else |
Johann | 3662cec | 2020-04-26 20:22:43 +0900 | [diff] [blame] | 158 | %define globalsym(x) global sym(x) |
Yaowu Xu | c27fc14 | 2016-08-22 16:08:15 -0700 | [diff] [blame] | 159 | %endif |
| 160 | |
| 161 | ; arg() |
| 162 | ; Return the address specification of the given argument |
| 163 | ; |
| 164 | %if ABI_IS_32BIT |
| 165 | %define arg(x) [ebp+8+4*x] |
| 166 | %else |
| 167 | ; 64 bit ABI passes arguments in registers. This is a workaround to get up |
| 168 | ; and running quickly. Relies on SHADOW_ARGS_TO_STACK |
| 169 | %if LIBAOM_YASM_WIN64 |
| 170 | %define arg(x) [rbp+16+8*x] |
| 171 | %else |
| 172 | %define arg(x) [rbp-8-8*x] |
| 173 | %endif |
| 174 | %endif |
| 175 | |
| 176 | ; REG_SZ_BYTES, REG_SZ_BITS |
| 177 | ; Size of a register |
| 178 | %if ABI_IS_32BIT |
| 179 | %define REG_SZ_BYTES 4 |
| 180 | %define REG_SZ_BITS 32 |
| 181 | %else |
| 182 | %define REG_SZ_BYTES 8 |
| 183 | %define REG_SZ_BITS 64 |
| 184 | %endif |
| 185 | |
| 186 | |
| 187 | ; ALIGN_STACK <alignment> <register> |
| 188 | ; This macro aligns the stack to the given alignment (in bytes). The stack |
| 189 | ; is left such that the previous value of the stack pointer is the first |
| 190 | ; argument on the stack (ie, the inverse of this macro is 'pop rsp.') |
| 191 | ; This macro uses one temporary register, which is not preserved, and thus |
| 192 | ; must be specified as an argument. |
| 193 | %macro ALIGN_STACK 2 |
| 194 | mov %2, rsp |
| 195 | and rsp, -%1 |
| 196 | lea rsp, [rsp - (%1 - REG_SZ_BYTES)] |
| 197 | push %2 |
| 198 | %endmacro |
| 199 | |
| 200 | |
| 201 | ; |
| 202 | ; The Microsoft assembler tries to impose a certain amount of type safety in |
| 203 | ; its register usage. YASM doesn't recognize these directives, so we just |
| 204 | ; %define them away to maintain as much compatibility as possible with the |
| 205 | ; original inline assembler we're porting from. |
| 206 | ; |
| 207 | %idefine PTR |
| 208 | %idefine XMMWORD |
| 209 | %idefine MMWORD |
| 210 | |
| 211 | ; PIC macros |
| 212 | ; |
| 213 | %if ABI_IS_32BIT |
| 214 | %if CONFIG_PIC=1 |
| 215 | %ifidn __OUTPUT_FORMAT__,elf32 |
| 216 | %define WRT_PLT wrt ..plt |
| 217 | %macro GET_GOT 1 |
| 218 | extern _GLOBAL_OFFSET_TABLE_ |
| 219 | push %1 |
| 220 | call %%get_got |
| 221 | %%sub_offset: |
| 222 | jmp %%exitGG |
| 223 | %%get_got: |
| 224 | mov %1, [esp] |
| 225 | add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc |
| 226 | ret |
| 227 | %%exitGG: |
| 228 | %undef GLOBAL |
| 229 | %define GLOBAL(x) x + %1 wrt ..gotoff |
| 230 | %undef RESTORE_GOT |
| 231 | %define RESTORE_GOT pop %1 |
| 232 | %endmacro |
| 233 | %elifidn __OUTPUT_FORMAT__,macho32 |
| 234 | %macro GET_GOT 1 |
| 235 | push %1 |
| 236 | call %%get_got |
| 237 | %%get_got: |
| 238 | pop %1 |
| 239 | %undef GLOBAL |
| 240 | %define GLOBAL(x) x + %1 - %%get_got |
| 241 | %undef RESTORE_GOT |
| 242 | %define RESTORE_GOT pop %1 |
| 243 | %endmacro |
| 244 | %endif |
| 245 | %endif |
| 246 | |
| 247 | %ifdef CHROMIUM |
| 248 | %ifidn __OUTPUT_FORMAT__,macho32 |
| 249 | %define HIDDEN_DATA(x) x:private_extern |
| 250 | %else |
| 251 | %define HIDDEN_DATA(x) x |
| 252 | %endif |
| 253 | %else |
| 254 | %define HIDDEN_DATA(x) x |
| 255 | %endif |
| 256 | %else |
| 257 | %macro GET_GOT 1 |
| 258 | %endmacro |
| 259 | %define GLOBAL(x) rel x |
| 260 | %ifidn __OUTPUT_FORMAT__,elf64 |
| 261 | %define WRT_PLT wrt ..plt |
| 262 | %define HIDDEN_DATA(x) x:data hidden |
| 263 | %elifidn __OUTPUT_FORMAT__,elfx32 |
| 264 | %define WRT_PLT wrt ..plt |
| 265 | %define HIDDEN_DATA(x) x:data hidden |
| 266 | %elifidn __OUTPUT_FORMAT__,macho64 |
| 267 | %ifdef CHROMIUM |
| 268 | %define HIDDEN_DATA(x) x:private_extern |
| 269 | %else |
| 270 | %define HIDDEN_DATA(x) x |
| 271 | %endif |
| 272 | %else |
| 273 | %define HIDDEN_DATA(x) x |
| 274 | %endif |
| 275 | %endif |
| 276 | %ifnmacro GET_GOT |
| 277 | %macro GET_GOT 1 |
| 278 | %endmacro |
| 279 | %define GLOBAL(x) x |
| 280 | %endif |
| 281 | %ifndef RESTORE_GOT |
| 282 | %define RESTORE_GOT |
| 283 | %endif |
| 284 | %ifndef WRT_PLT |
| 285 | %define WRT_PLT |
| 286 | %endif |
| 287 | |
| 288 | %if ABI_IS_32BIT |
| 289 | %macro SHADOW_ARGS_TO_STACK 1 |
| 290 | %endm |
| 291 | %define UNSHADOW_ARGS |
| 292 | %else |
| 293 | %if LIBAOM_YASM_WIN64 |
| 294 | %macro SHADOW_ARGS_TO_STACK 1 ; argc |
| 295 | %if %1 > 0 |
| 296 | mov arg(0),rcx |
| 297 | %endif |
| 298 | %if %1 > 1 |
| 299 | mov arg(1),rdx |
| 300 | %endif |
| 301 | %if %1 > 2 |
| 302 | mov arg(2),r8 |
| 303 | %endif |
| 304 | %if %1 > 3 |
| 305 | mov arg(3),r9 |
| 306 | %endif |
| 307 | %endm |
| 308 | %else |
| 309 | %macro SHADOW_ARGS_TO_STACK 1 ; argc |
| 310 | %if %1 > 0 |
| 311 | push rdi |
| 312 | %endif |
| 313 | %if %1 > 1 |
| 314 | push rsi |
| 315 | %endif |
| 316 | %if %1 > 2 |
| 317 | push rdx |
| 318 | %endif |
| 319 | %if %1 > 3 |
| 320 | push rcx |
| 321 | %endif |
| 322 | %if %1 > 4 |
| 323 | push r8 |
| 324 | %endif |
| 325 | %if %1 > 5 |
| 326 | push r9 |
| 327 | %endif |
| 328 | %if %1 > 6 |
| 329 | %assign i %1-6 |
| 330 | %assign off 16 |
| 331 | %rep i |
| 332 | mov rax,[rbp+off] |
| 333 | push rax |
| 334 | %assign off off+8 |
| 335 | %endrep |
| 336 | %endif |
| 337 | %endm |
| 338 | %endif |
| 339 | %define UNSHADOW_ARGS mov rsp, rbp |
| 340 | %endif |
| 341 | |
| 342 | ; Win64 ABI requires that XMM6:XMM15 are callee saved |
| 343 | ; SAVE_XMM n, [u] |
| 344 | ; store registers 6-n on the stack |
| 345 | ; if u is specified, use unaligned movs. |
| 346 | ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return |
| 347 | ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - |
| 348 | ; but in some cases this is not done and unaligned movs must be used. |
| 349 | %if LIBAOM_YASM_WIN64 |
| 350 | %macro SAVE_XMM 1-2 a |
| 351 | %if %1 < 6 |
| 352 | %error Only xmm registers 6-15 must be preserved |
| 353 | %else |
| 354 | %assign last_xmm %1 |
| 355 | %define movxmm movdq %+ %2 |
| 356 | %assign xmm_stack_space ((last_xmm - 5) * 16) |
| 357 | sub rsp, xmm_stack_space |
| 358 | %assign i 6 |
| 359 | %rep (last_xmm - 5) |
| 360 | movxmm [rsp + ((i - 6) * 16)], xmm %+ i |
| 361 | %assign i i+1 |
| 362 | %endrep |
| 363 | %endif |
| 364 | %endmacro |
| 365 | %macro RESTORE_XMM 0 |
| 366 | %ifndef last_xmm |
| 367 | %error RESTORE_XMM must be paired with SAVE_XMM n |
| 368 | %else |
| 369 | %assign i last_xmm |
| 370 | %rep (last_xmm - 5) |
| 371 | movxmm xmm %+ i, [rsp +((i - 6) * 16)] |
| 372 | %assign i i-1 |
| 373 | %endrep |
| 374 | add rsp, xmm_stack_space |
| 375 | ; there are a couple functions which return from multiple places. |
| 376 | ; otherwise, we could uncomment these: |
| 377 | ; %undef last_xmm |
| 378 | ; %undef xmm_stack_space |
| 379 | ; %undef movxmm |
| 380 | %endif |
| 381 | %endmacro |
| 382 | %else |
| 383 | %macro SAVE_XMM 1-2 |
| 384 | %endmacro |
| 385 | %macro RESTORE_XMM 0 |
| 386 | %endmacro |
| 387 | %endif |
| 388 | |
| 389 | ; Name of the rodata section |
| 390 | ; |
| 391 | ; .rodata seems to be an elf-ism, as it doesn't work on OSX. |
| 392 | ; |
| 393 | %ifidn __OUTPUT_FORMAT__,macho64 |
| 394 | %define SECTION_RODATA section .text |
| 395 | %elifidn __OUTPUT_FORMAT__,macho32 |
| 396 | %macro SECTION_RODATA 0 |
| 397 | section .text |
| 398 | %endmacro |
| 399 | %elifidn __OUTPUT_FORMAT__,aout |
| 400 | %define SECTION_RODATA section .data |
| 401 | %else |
| 402 | %define SECTION_RODATA section .rodata |
| 403 | %endif |
| 404 | |
| 405 | |
| 406 | ; Tell GNU ld that we don't require an executable stack. |
| 407 | %ifidn __OUTPUT_FORMAT__,elf32 |
| 408 | section .note.GNU-stack noalloc noexec nowrite progbits |
| 409 | section .text |
| 410 | %elifidn __OUTPUT_FORMAT__,elf64 |
| 411 | section .note.GNU-stack noalloc noexec nowrite progbits |
| 412 | section .text |
| 413 | %elifidn __OUTPUT_FORMAT__,elfx32 |
| 414 | section .note.GNU-stack noalloc noexec nowrite progbits |
| 415 | section .text |
| 416 | %endif |