| ; Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| ; This source code is subject to the terms of the BSD 2 Clause License and |
| ; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| ; was not distributed with this source code in the LICENSE file, you can |
| ; obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| ; Media Patent License 1.0 was not distributed with this source code in the |
| ; PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| %define private_prefix av1 |
| %include "third_party/x86inc/x86inc.asm" |
| ; int64_t av1_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, |
| cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz |
| pxor m4, m4 ; sse accumulator |
| pxor m6, m6 ; ssz accumulator |
| pxor m5, m5 ; dedicated zero register |
| mova m3, [uqcq+sizeq*2+mmsize] |
| mova m1, [dqcq+sizeq*2+mmsize] |
| ; individual errors are max. 15bit+sign, so squares are 30bit, and |
| ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) |
| ; accumulate horizontally and store in return value |