|  | ; | 
|  | ; Copyright (c) 2016, Alliance for Open Media. All rights reserved | 
|  | ; | 
|  | ; This source code is subject to the terms of the BSD 2 Clause License and | 
|  | ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | ; was not distributed with this source code in the LICENSE file, you can | 
|  | ; obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | ; Media Patent License 1.0 was not distributed with this source code in the | 
|  | ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | ; | 
|  |  | 
|  | ; | 
|  |  | 
|  | %include "third_party/x86inc/x86inc.asm" | 
|  |  | 
|  | SECTION_RODATA | 
|  | pw_4:  times 8 dw 4 | 
|  | pw_8:  times 8 dw 8 | 
|  | pw_16: times 4 dd 16 | 
|  | pw_32: times 4 dd 32 | 
|  |  | 
|  | SECTION .text | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | movq                  m0, [aboveq] | 
|  | movq                  m2, [leftq] | 
|  | paddw                 m0, m2 | 
|  | pshuflw               m1, m0, 0xe | 
|  | paddw                 m0, m1 | 
|  | pshuflw               m1, m0, 0x1 | 
|  | paddw                 m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw_4)] | 
|  | psraw                 m0, 3 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m2, [leftq] | 
|  | DEFINE_ARGS dst, stride, stride3, one | 
|  | mov                 oned, 0x00010001 | 
|  | lea             stride3q, [strideq*3] | 
|  | movd                  m3, oned | 
|  | pshufd                m3, m3, 0x0 | 
|  | paddw                 m0, m2 | 
|  | pmaddwd               m0, m3 | 
|  | packssdw              m0, m1 | 
|  | pmaddwd               m0, m3 | 
|  | packssdw              m0, m1 | 
|  | pmaddwd               m0, m3 | 
|  | paddw                 m0, [GLOBAL(pw_8)] | 
|  | psrlw                 m0, 4 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | mova   [dstq           ], m0 | 
|  | mova   [dstq+strideq*2 ], m0 | 
|  | mova   [dstq+strideq*4 ], m0 | 
|  | mova   [dstq+stride3q*2], m0 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | mova   [dstq           ], m0 | 
|  | mova   [dstq+strideq*2 ], m0 | 
|  | mova   [dstq+strideq*4 ], m0 | 
|  | mova   [dstq+stride3q*2], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m3, [aboveq+16] | 
|  | mova                  m2, [leftq] | 
|  | mova                  m4, [leftq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 4 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, m3 | 
|  | paddw                 m0, m4 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | punpcklwd             m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddd                 m0, m2 | 
|  | punpckldq             m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddd                 m0, m2 | 
|  | paddd                 m0, [GLOBAL(pw_16)] | 
|  | psrad                 m0, 5 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | .loop: | 
|  | mova   [dstq              ], m0 | 
|  | mova   [dstq           +16], m0 | 
|  | mova   [dstq+strideq*2    ], m0 | 
|  | mova   [dstq+strideq*2 +16], m0 | 
|  | mova   [dstq+strideq*4    ], m0 | 
|  | mova   [dstq+strideq*4 +16], m0 | 
|  | mova   [dstq+stride3q*2   ], m0 | 
|  | mova   [dstq+stride3q*2+16], m0 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m2, [aboveq+16] | 
|  | mova                  m3, [aboveq+32] | 
|  | mova                  m4, [aboveq+48] | 
|  | paddw                 m0, m2 | 
|  | paddw                 m3, m4 | 
|  | mova                  m2, [leftq] | 
|  | mova                  m4, [leftq+16] | 
|  | mova                  m5, [leftq+32] | 
|  | mova                  m6, [leftq+48] | 
|  | paddw                 m2, m4 | 
|  | paddw                 m5, m6 | 
|  | paddw                 m0, m3 | 
|  | paddw                 m2, m5 | 
|  | pxor                  m1, m1 | 
|  | paddw                 m0, m2 | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 8 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | punpcklwd             m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddd                 m0, m2 | 
|  | punpckldq             m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddd                 m0, m2 | 
|  | paddd                 m0, [GLOBAL(pw_32)] | 
|  | psrad                 m0, 6 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | .loop: | 
|  | mova [dstq               ], m0 | 
|  | mova [dstq          +16  ], m0 | 
|  | mova [dstq          +32  ], m0 | 
|  | mova [dstq          +48  ], m0 | 
|  | mova [dstq+strideq*2     ], m0 | 
|  | mova [dstq+strideq*2+16  ], m0 | 
|  | mova [dstq+strideq*2+32  ], m0 | 
|  | mova [dstq+strideq*2+48  ], m0 | 
|  | mova [dstq+strideq*4     ], m0 | 
|  | mova [dstq+strideq*4+16  ], m0 | 
|  | mova [dstq+strideq*4+32  ], m0 | 
|  | mova [dstq+strideq*4+48  ], m0 | 
|  | mova [dstq+stride3q*2    ], m0 | 
|  | mova [dstq+stride3q*2 +16], m0 | 
|  | mova [dstq+stride3q*2 +32], m0 | 
|  | mova [dstq+stride3q*2 +48], m0 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above | 
|  | movq                  m0, [aboveq] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above | 
|  | mova                  m0, [aboveq] | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | mova   [dstq           ], m0 | 
|  | mova   [dstq+strideq*2 ], m0 | 
|  | mova   [dstq+strideq*4 ], m0 | 
|  | mova   [dstq+stride3q*2], m0 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | mova   [dstq           ], m0 | 
|  | mova   [dstq+strideq*2 ], m0 | 
|  | mova   [dstq+strideq*4 ], m0 | 
|  | mova   [dstq+stride3q*2], m0 | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m1, [aboveq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, nlines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              nlines4d, 4 | 
|  | .loop: | 
|  | mova    [dstq              ], m0 | 
|  | mova    [dstq           +16], m1 | 
|  | mova    [dstq+strideq*2    ], m0 | 
|  | mova    [dstq+strideq*2 +16], m1 | 
|  | mova    [dstq+strideq*4    ], m0 | 
|  | mova    [dstq+strideq*4 +16], m1 | 
|  | mova    [dstq+stride3q*2   ], m0 | 
|  | mova    [dstq+stride3q*2+16], m1 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | dec             nlines4d | 
|  | jnz .loop | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m1, [aboveq+16] | 
|  | mova                  m2, [aboveq+32] | 
|  | mova                  m3, [aboveq+48] | 
|  | DEFINE_ARGS dst, stride, stride3, nlines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              nlines4d, 8 | 
|  | .loop: | 
|  | mova [dstq               ], m0 | 
|  | mova [dstq            +16], m1 | 
|  | mova [dstq            +32], m2 | 
|  | mova [dstq            +48], m3 | 
|  | mova [dstq+strideq*2     ], m0 | 
|  | mova [dstq+strideq*2  +16], m1 | 
|  | mova [dstq+strideq*2  +32], m2 | 
|  | mova [dstq+strideq*2  +48], m3 | 
|  | mova [dstq+strideq*4     ], m0 | 
|  | mova [dstq+strideq*4  +16], m1 | 
|  | mova [dstq+strideq*4  +32], m2 | 
|  | mova [dstq+strideq*4  +48], m3 | 
|  | mova [dstq+stride3q*2    ], m0 | 
|  | mova [dstq+stride3q*2 +16], m1 | 
|  | mova [dstq+stride3q*2 +32], m2 | 
|  | mova [dstq+stride3q*2 +48], m3 | 
|  | lea                 dstq, [dstq+strideq*8] | 
|  | dec             nlines4d | 
|  | jnz .loop | 
|  | REP_RET |