|  | ; | 
|  | ; Copyright (c) 2016, Alliance for Open Media. All rights reserved. | 
|  | ; | 
|  | ; This source code is subject to the terms of the BSD 2 Clause License and | 
|  | ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License | 
|  | ; was not distributed with this source code in the LICENSE file, you can | 
|  | ; obtain it at www.aomedia.org/license/software. If the Alliance for Open | 
|  | ; Media Patent License 1.0 was not distributed with this source code in the | 
|  | ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. | 
|  | ; | 
|  |  | 
|  | ; | 
|  |  | 
|  | %include "third_party/x86inc/x86inc.asm" | 
|  |  | 
|  | SECTION_RODATA | 
|  | pb_1: times 16 db 1 | 
|  | pw_4:  times 8 dw 4 | 
|  | pw_8:  times 8 dw 8 | 
|  | pw_16: times 8 dw 16 | 
|  | pw_32: times 8 dw 32 | 
|  | dc_128: times 16 db 128 | 
|  | pw2_4:  times 8 dw 2 | 
|  | pw2_8:  times 8 dw 4 | 
|  | pw2_16:  times 8 dw 8 | 
|  | pw2_32:  times 8 dw 16 | 
|  |  | 
|  | SECTION .text | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | movd                  m2, [leftq] | 
|  | movd                  m0, [aboveq] | 
|  | pxor                  m1, m1 | 
|  | punpckldq             m0, m2 | 
|  | psadbw                m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw_4)] | 
|  | psraw                 m0, 3 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | packuswb              m0, m0 | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  | lea                 dstq, [dstq+strideq*2] | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset | 
|  | movifnidn          leftq, leftmp | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | movd                  m0, [leftq] | 
|  | psadbw                m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw2_4)] | 
|  | psraw                 m0, 2 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | packuswb              m0, m0 | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  | lea                 dstq, [dstq+strideq*2] | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | movd                  m0, [aboveq] | 
|  | psadbw                m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw2_4)] | 
|  | psraw                 m0, 2 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | packuswb              m0, m0 | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  | lea                 dstq, [dstq+strideq*2] | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | movq                  m0, [aboveq] | 
|  | movq                  m2, [leftq] | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | psadbw                m0, m1 | 
|  | psadbw                m2, m1 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw_8)] | 
|  | psraw                 m0, 4 | 
|  | punpcklbw             m0, m0 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | movq                  m0, [aboveq] | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | psadbw                m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw2_8)] | 
|  | psraw                 m0, 3 | 
|  | punpcklbw             m0, m0 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset | 
|  | movifnidn          leftq, leftmp | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | movq                  m0, [leftq] | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | psadbw                m0, m1 | 
|  | paddw                 m0, [GLOBAL(pw2_8)] | 
|  | psraw                 m0, 3 | 
|  | punpcklbw             m0, m0 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  |  | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | movd     m0,        [GLOBAL(dc_128)] | 
|  | movd    [dstq          ], m0 | 
|  | movd    [dstq+strideq  ], m0 | 
|  | movd    [dstq+strideq*2], m0 | 
|  | movd    [dstq+stride3q ], m0 | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | movq    m0,        [GLOBAL(dc_128)] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m2, [leftq] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 4 | 
|  | psadbw                m0, m1 | 
|  | psadbw                m2, m1 | 
|  | paddw                 m0, m2 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw_16)] | 
|  | psraw                 m0, 5 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova    [dstq          ], m0 | 
|  | mova    [dstq+strideq  ], m0 | 
|  | mova    [dstq+strideq*2], m0 | 
|  | mova    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 4 | 
|  | psadbw                m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw2_16)] | 
|  | psraw                 m0, 4 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova    [dstq          ], m0 | 
|  | mova    [dstq+strideq  ], m0 | 
|  | mova    [dstq+strideq*2], m0 | 
|  | mova    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [leftq] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 4 | 
|  | psadbw                m0, m1 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw2_16)] | 
|  | psraw                 m0, 4 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova    [dstq          ], m0 | 
|  | mova    [dstq+strideq  ], m0 | 
|  | mova    [dstq+strideq*2], m0 | 
|  | mova    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 4 | 
|  | mova    m0,        [GLOBAL(dc_128)] | 
|  | .loop: | 
|  | mova    [dstq          ], m0 | 
|  | mova    [dstq+strideq  ], m0 | 
|  | mova    [dstq+strideq*2], m0 | 
|  | mova    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m2, [aboveq+16] | 
|  | mova                  m3, [leftq] | 
|  | mova                  m4, [leftq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 8 | 
|  | psadbw                m0, m1 | 
|  | psadbw                m2, m1 | 
|  | psadbw                m3, m1 | 
|  | psadbw                m4, m1 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, m3 | 
|  | paddw                 m0, m4 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw_32)] | 
|  | psraw                 m0, 6 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova [dstq             ], m0 | 
|  | mova [dstq          +16], m0 | 
|  | mova [dstq+strideq     ], m0 | 
|  | mova [dstq+strideq  +16], m0 | 
|  | mova [dstq+strideq*2   ], m0 | 
|  | mova [dstq+strideq*2+16], m0 | 
|  | mova [dstq+stride3q    ], m0 | 
|  | mova [dstq+stride3q +16], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m2, [aboveq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 8 | 
|  | psadbw                m0, m1 | 
|  | psadbw                m2, m1 | 
|  | paddw                 m0, m2 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw2_32)] | 
|  | psraw                 m0, 5 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova [dstq             ], m0 | 
|  | mova [dstq          +16], m0 | 
|  | mova [dstq+strideq     ], m0 | 
|  | mova [dstq+strideq  +16], m0 | 
|  | mova [dstq+strideq*2   ], m0 | 
|  | mova [dstq+strideq*2+16], m0 | 
|  | mova [dstq+stride3q    ], m0 | 
|  | mova [dstq+stride3q +16], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | pxor                  m1, m1 | 
|  | mova                  m0, [leftq] | 
|  | mova                  m2, [leftq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 8 | 
|  | psadbw                m0, m1 | 
|  | psadbw                m2, m1 | 
|  | paddw                 m0, m2 | 
|  | movhlps               m2, m0 | 
|  | paddw                 m0, m2 | 
|  | paddw                 m0, [GLOBAL(pw2_32)] | 
|  | psraw                 m0, 5 | 
|  | pshuflw               m0, m0, 0x0 | 
|  | punpcklqdq            m0, m0 | 
|  | packuswb              m0, m0 | 
|  | .loop: | 
|  | mova [dstq             ], m0 | 
|  | mova [dstq          +16], m0 | 
|  | mova [dstq+strideq     ], m0 | 
|  | mova [dstq+strideq  +16], m0 | 
|  | mova [dstq+strideq*2   ], m0 | 
|  | mova [dstq+strideq*2+16], m0 | 
|  | mova [dstq+stride3q    ], m0 | 
|  | mova [dstq+stride3q +16], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  |  | 
|  | RESTORE_GOT | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset | 
|  | GET_GOT     goffsetq | 
|  |  | 
|  | DEFINE_ARGS dst, stride, stride3, lines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              lines4d, 8 | 
|  | mova    m0,        [GLOBAL(dc_128)] | 
|  | .loop: | 
|  | mova [dstq             ], m0 | 
|  | mova [dstq          +16], m0 | 
|  | mova [dstq+strideq     ], m0 | 
|  | mova [dstq+strideq  +16], m0 | 
|  | mova [dstq+strideq*2   ], m0 | 
|  | mova [dstq+strideq*2+16], m0 | 
|  | mova [dstq+stride3q    ], m0 | 
|  | mova [dstq+stride3q +16], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec              lines4d | 
|  | jnz .loop | 
|  | RESTORE_GOT | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above | 
|  | movd                  m0, [aboveq] | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  | lea                 dstq, [dstq+strideq*2] | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m0 | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above | 
|  | movq                  m0, [aboveq] | 
|  | DEFINE_ARGS dst, stride, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | movq    [dstq          ], m0 | 
|  | movq    [dstq+strideq  ], m0 | 
|  | movq    [dstq+strideq*2], m0 | 
|  | movq    [dstq+stride3q ], m0 | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above | 
|  | mova                  m0, [aboveq] | 
|  | DEFINE_ARGS dst, stride, stride3, nlines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              nlines4d, 4 | 
|  | .loop: | 
|  | mova    [dstq          ], m0 | 
|  | mova    [dstq+strideq  ], m0 | 
|  | mova    [dstq+strideq*2], m0 | 
|  | mova    [dstq+stride3q ], m0 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec             nlines4d | 
|  | jnz .loop | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above | 
|  | mova                  m0, [aboveq] | 
|  | mova                  m1, [aboveq+16] | 
|  | DEFINE_ARGS dst, stride, stride3, nlines4 | 
|  | lea             stride3q, [strideq*3] | 
|  | mov              nlines4d, 8 | 
|  | .loop: | 
|  | mova [dstq             ], m0 | 
|  | mova [dstq          +16], m1 | 
|  | mova [dstq+strideq     ], m0 | 
|  | mova [dstq+strideq  +16], m1 | 
|  | mova [dstq+strideq*2   ], m0 | 
|  | mova [dstq+strideq*2+16], m1 | 
|  | mova [dstq+stride3q    ], m0 | 
|  | mova [dstq+stride3q +16], m1 | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | dec             nlines4d | 
|  | jnz .loop | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left | 
|  | movifnidn          leftq, leftmp | 
|  | movd                  m0, [leftq] | 
|  | punpcklbw             m0, m0 | 
|  | punpcklbw             m0, m0 | 
|  | pshufd                m1, m0, 0x1 | 
|  | movd      [dstq        ], m0 | 
|  | movd      [dstq+strideq], m1 | 
|  | pshufd                m2, m0, 0x2 | 
|  | lea                 dstq, [dstq+strideq*2] | 
|  | pshufd                m3, m0, 0x3 | 
|  | movd      [dstq        ], m2 | 
|  | movd      [dstq+strideq], m3 | 
|  | RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left | 
|  | movifnidn          leftq, leftmp | 
|  | mov                lineq, -2 | 
|  | DEFINE_ARGS  dst, stride, line, left, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | movq                  m0, [leftq    ] | 
|  | punpcklbw             m0, m0              ; l1 l1 l2 l2 ... l8 l8 | 
|  | .loop: | 
|  | pshuflw               m1, m0, 0x0         ; l1 l1 l1 l1 l1 l1 l1 l1 | 
|  | pshuflw               m2, m0, 0x55        ; l2 l2 l2 l2 l2 l2 l2 l2 | 
|  | movq      [dstq        ], m1 | 
|  | movq      [dstq+strideq], m2 | 
|  | pshuflw               m1, m0, 0xaa | 
|  | pshuflw               m2, m0, 0xff | 
|  | movq    [dstq+strideq*2], m1 | 
|  | movq    [dstq+stride3q ], m2 | 
|  | pshufd                m0, m0, 0xe         ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 | 
|  | inc                lineq | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | jnz .loop | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left | 
|  | movifnidn          leftq, leftmp | 
|  | mov                lineq, -4 | 
|  | DEFINE_ARGS dst, stride, line, left, stride3 | 
|  | lea             stride3q, [strideq*3] | 
|  | .loop: | 
|  | movd                  m0, [leftq] | 
|  | punpcklbw             m0, m0 | 
|  | punpcklbw             m0, m0              ; l1 to l4 each repeated 4 times | 
|  | pshufd            m1, m0, 0x0             ; l1 repeated 16 times | 
|  | pshufd            m2, m0, 0x55            ; l2 repeated 16 times | 
|  | mova    [dstq          ], m1 | 
|  | mova    [dstq+strideq  ], m2 | 
|  | pshufd            m1, m0, 0xaa | 
|  | pshufd            m2, m0, 0xff | 
|  | mova    [dstq+strideq*2], m1 | 
|  | mova    [dstq+stride3q ], m2 | 
|  | inc                lineq | 
|  | lea                leftq, [leftq+4       ] | 
|  | lea                 dstq, [dstq+strideq*4] | 
|  | jnz .loop | 
|  | REP_RET | 
|  |  | 
|  | INIT_XMM sse2 | 
|  | cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left | 
|  | movifnidn              leftq, leftmp | 
|  | mov                    lineq, -8 | 
|  | DEFINE_ARGS dst, stride, line, left, stride3 | 
|  | lea                 stride3q, [strideq*3] | 
|  | .loop: | 
|  | movd                      m0, [leftq] | 
|  | punpcklbw                 m0, m0 | 
|  | punpcklbw                 m0, m0              ; l1 to l4 each repeated 4 times | 
|  | pshufd                m1, m0, 0x0             ; l1 repeated 16 times | 
|  | pshufd                m2, m0, 0x55            ; l2 repeated 16 times | 
|  | mova     [dstq             ], m1 | 
|  | mova     [dstq+16          ], m1 | 
|  | mova     [dstq+strideq     ], m2 | 
|  | mova     [dstq+strideq+16  ], m2 | 
|  | pshufd                m1, m0, 0xaa | 
|  | pshufd                m2, m0, 0xff | 
|  | mova     [dstq+strideq*2   ], m1 | 
|  | mova     [dstq+strideq*2+16], m1 | 
|  | mova     [dstq+stride3q    ], m2 | 
|  | mova     [dstq+stride3q+16 ], m2 | 
|  | inc                    lineq | 
|  | lea                    leftq, [leftq+4       ] | 
|  | lea                     dstq, [dstq+strideq*4] | 
|  | jnz .loop | 
|  | REP_RET |