| ; | 
 | ; Copyright (c) 2016, Alliance for Open Media. All rights reserved. | 
 | ; | 
 | ; This source code is subject to the terms of the BSD 2 Clause License and | 
 | ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
 | ; was not distributed with this source code in the LICENSE file, you can | 
 | ; obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
 | ; Media Patent License 1.0 was not distributed with this source code in the | 
 | ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
 | ; | 
 |  | 
 | ; | 
 |  | 
 | %include "third_party/x86inc/x86inc.asm" | 
 |  | 
 | SECTION_RODATA | 
 | pw_4:  times 8 dw 4 | 
 | pw_8:  times 8 dw 8 | 
 | pw_16: times 4 dd 16 | 
 | pw_32: times 4 dd 32 | 
 |  | 
 | SECTION .text | 
 | INIT_XMM sse2 | 
 | cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset | 
 |   GET_GOT     goffsetq | 
 |  | 
 |   movq                  m0, [aboveq] | 
 |   movq                  m2, [leftq] | 
 |   paddw                 m0, m2 | 
 |   pshuflw               m1, m0, 0xe | 
 |   paddw                 m0, m1 | 
 |   pshuflw               m1, m0, 0x1 | 
 |   paddw                 m0, m1 | 
 |   paddw                 m0, [GLOBAL(pw_4)] | 
 |   psraw                 m0, 3 | 
 |   pshuflw               m0, m0, 0x0 | 
 |   movq    [dstq          ], m0 | 
 |   movq    [dstq+strideq*2], m0 | 
 |   lea                 dstq, [dstq+strideq*4] | 
 |   movq    [dstq          ], m0 | 
 |   movq    [dstq+strideq*2], m0 | 
 |  | 
 |   RESTORE_GOT | 
 |   RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset | 
 |   GET_GOT     goffsetq | 
 |  | 
 |   pxor                  m1, m1 | 
 |   mova                  m0, [aboveq] | 
 |   mova                  m2, [leftq] | 
 |   DEFINE_ARGS dst, stride, stride3, one | 
 |   mov                 oned, 0x00010001 | 
 |   lea             stride3q, [strideq*3] | 
 |   movd                  m3, oned | 
 |   pshufd                m3, m3, 0x0 | 
 |   paddw                 m0, m2 | 
 |   pmaddwd               m0, m3 | 
 |   packssdw              m0, m1 | 
 |   pmaddwd               m0, m3 | 
 |   packssdw              m0, m1 | 
 |   pmaddwd               m0, m3 | 
 |   paddw                 m0, [GLOBAL(pw_8)] | 
 |   psrlw                 m0, 4 | 
 |   pshuflw               m0, m0, 0x0 | 
 |   punpcklqdq            m0, m0 | 
 |   mova   [dstq           ], m0 | 
 |   mova   [dstq+strideq*2 ], m0 | 
 |   mova   [dstq+strideq*4 ], m0 | 
 |   mova   [dstq+stride3q*2], m0 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   mova   [dstq           ], m0 | 
 |   mova   [dstq+strideq*2 ], m0 | 
 |   mova   [dstq+strideq*4 ], m0 | 
 |   mova   [dstq+stride3q*2], m0 | 
 |  | 
 |   RESTORE_GOT | 
 |   RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset | 
 |   GET_GOT     goffsetq | 
 |  | 
 |   pxor                  m1, m1 | 
 |   mova                  m0, [aboveq] | 
 |   mova                  m3, [aboveq+16] | 
 |   mova                  m2, [leftq] | 
 |   mova                  m4, [leftq+16] | 
 |   DEFINE_ARGS dst, stride, stride3, lines4 | 
 |   lea             stride3q, [strideq*3] | 
 |   mov              lines4d, 4 | 
 |   paddw                 m0, m2 | 
 |   paddw                 m0, m3 | 
 |   paddw                 m0, m4 | 
 |   movhlps               m2, m0 | 
 |   paddw                 m0, m2 | 
 |   punpcklwd             m0, m1 | 
 |   movhlps               m2, m0 | 
 |   paddd                 m0, m2 | 
 |   punpckldq             m0, m1 | 
 |   movhlps               m2, m0 | 
 |   paddd                 m0, m2 | 
 |   paddd                 m0, [GLOBAL(pw_16)] | 
 |   psrad                 m0, 5 | 
 |   pshuflw               m0, m0, 0x0 | 
 |   punpcklqdq            m0, m0 | 
 | .loop: | 
 |   mova   [dstq              ], m0 | 
 |   mova   [dstq           +16], m0 | 
 |   mova   [dstq+strideq*2    ], m0 | 
 |   mova   [dstq+strideq*2 +16], m0 | 
 |   mova   [dstq+strideq*4    ], m0 | 
 |   mova   [dstq+strideq*4 +16], m0 | 
 |   mova   [dstq+stride3q*2   ], m0 | 
 |   mova   [dstq+stride3q*2+16], m0 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   dec              lines4d | 
 |   jnz .loop | 
 |  | 
 |   RESTORE_GOT | 
 |   REP_RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset | 
 |   GET_GOT     goffsetq | 
 |  | 
 |   mova                  m0, [aboveq] | 
 |   mova                  m2, [aboveq+16] | 
 |   mova                  m3, [aboveq+32] | 
 |   mova                  m4, [aboveq+48] | 
 |   paddw                 m0, m2 | 
 |   paddw                 m3, m4 | 
 |   mova                  m2, [leftq] | 
 |   mova                  m4, [leftq+16] | 
 |   mova                  m5, [leftq+32] | 
 |   mova                  m6, [leftq+48] | 
 |   paddw                 m2, m4 | 
 |   paddw                 m5, m6 | 
 |   paddw                 m0, m3 | 
 |   paddw                 m2, m5 | 
 |   pxor                  m1, m1 | 
 |   paddw                 m0, m2 | 
 |   DEFINE_ARGS dst, stride, stride3, lines4 | 
 |   lea             stride3q, [strideq*3] | 
 |   mov              lines4d, 8 | 
 |   movhlps               m2, m0 | 
 |   paddw                 m0, m2 | 
 |   punpcklwd             m0, m1 | 
 |   movhlps               m2, m0 | 
 |   paddd                 m0, m2 | 
 |   punpckldq             m0, m1 | 
 |   movhlps               m2, m0 | 
 |   paddd                 m0, m2 | 
 |   paddd                 m0, [GLOBAL(pw_32)] | 
 |   psrad                 m0, 6 | 
 |   pshuflw               m0, m0, 0x0 | 
 |   punpcklqdq            m0, m0 | 
 | .loop: | 
 |   mova [dstq               ], m0 | 
 |   mova [dstq          +16  ], m0 | 
 |   mova [dstq          +32  ], m0 | 
 |   mova [dstq          +48  ], m0 | 
 |   mova [dstq+strideq*2     ], m0 | 
 |   mova [dstq+strideq*2+16  ], m0 | 
 |   mova [dstq+strideq*2+32  ], m0 | 
 |   mova [dstq+strideq*2+48  ], m0 | 
 |   mova [dstq+strideq*4     ], m0 | 
 |   mova [dstq+strideq*4+16  ], m0 | 
 |   mova [dstq+strideq*4+32  ], m0 | 
 |   mova [dstq+strideq*4+48  ], m0 | 
 |   mova [dstq+stride3q*2    ], m0 | 
 |   mova [dstq+stride3q*2 +16], m0 | 
 |   mova [dstq+stride3q*2 +32], m0 | 
 |   mova [dstq+stride3q*2 +48], m0 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   dec              lines4d | 
 |   jnz .loop | 
 |  | 
 |   RESTORE_GOT | 
 |   REP_RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above | 
 |   movq                  m0, [aboveq] | 
 |   movq    [dstq          ], m0 | 
 |   movq    [dstq+strideq*2], m0 | 
 |   lea                 dstq, [dstq+strideq*4] | 
 |   movq    [dstq          ], m0 | 
 |   movq    [dstq+strideq*2], m0 | 
 |   RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above | 
 |   mova                  m0, [aboveq] | 
 |   DEFINE_ARGS dst, stride, stride3 | 
 |   lea             stride3q, [strideq*3] | 
 |   mova   [dstq           ], m0 | 
 |   mova   [dstq+strideq*2 ], m0 | 
 |   mova   [dstq+strideq*4 ], m0 | 
 |   mova   [dstq+stride3q*2], m0 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   mova   [dstq           ], m0 | 
 |   mova   [dstq+strideq*2 ], m0 | 
 |   mova   [dstq+strideq*4 ], m0 | 
 |   mova   [dstq+stride3q*2], m0 | 
 |   RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above | 
 |   mova                  m0, [aboveq] | 
 |   mova                  m1, [aboveq+16] | 
 |   DEFINE_ARGS dst, stride, stride3, nlines4 | 
 |   lea             stride3q, [strideq*3] | 
 |   mov              nlines4d, 4 | 
 | .loop: | 
 |   mova    [dstq              ], m0 | 
 |   mova    [dstq           +16], m1 | 
 |   mova    [dstq+strideq*2    ], m0 | 
 |   mova    [dstq+strideq*2 +16], m1 | 
 |   mova    [dstq+strideq*4    ], m0 | 
 |   mova    [dstq+strideq*4 +16], m1 | 
 |   mova    [dstq+stride3q*2   ], m0 | 
 |   mova    [dstq+stride3q*2+16], m1 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   dec             nlines4d | 
 |   jnz .loop | 
 |   REP_RET | 
 |  | 
 | INIT_XMM sse2 | 
 | cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above | 
 |   mova                  m0, [aboveq] | 
 |   mova                  m1, [aboveq+16] | 
 |   mova                  m2, [aboveq+32] | 
 |   mova                  m3, [aboveq+48] | 
 |   DEFINE_ARGS dst, stride, stride3, nlines4 | 
 |   lea             stride3q, [strideq*3] | 
 |   mov              nlines4d, 8 | 
 | .loop: | 
 |   mova [dstq               ], m0 | 
 |   mova [dstq            +16], m1 | 
 |   mova [dstq            +32], m2 | 
 |   mova [dstq            +48], m3 | 
 |   mova [dstq+strideq*2     ], m0 | 
 |   mova [dstq+strideq*2  +16], m1 | 
 |   mova [dstq+strideq*2  +32], m2 | 
 |   mova [dstq+strideq*2  +48], m3 | 
 |   mova [dstq+strideq*4     ], m0 | 
 |   mova [dstq+strideq*4  +16], m1 | 
 |   mova [dstq+strideq*4  +32], m2 | 
 |   mova [dstq+strideq*4  +48], m3 | 
 |   mova [dstq+stride3q*2    ], m0 | 
 |   mova [dstq+stride3q*2 +16], m1 | 
 |   mova [dstq+stride3q*2 +32], m2 | 
 |   mova [dstq+stride3q*2 +48], m3 | 
 |   lea                 dstq, [dstq+strideq*8] | 
 |   dec             nlines4d | 
 |   jnz .loop | 
 |   REP_RET |