| ; |
| ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. |
| ; |
| ; Use of this source code is governed by a BSD-style license and patent |
| ; grant that can be found in the LICENSE file in the root of the source |
| ; tree. All contributing project authors may be found in the AUTHORS |
| ; file in the root of the source tree. |
| ; |
| |
| |
| .globl recon4b_ppc |
| .globl recon2b_ppc |
| .globl recon_b_ppc |
| |
| .macro row_of16 Diff Pred Dst Stride |
| lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
| addi \Pred, \Pred, 16 ;# next pred |
| vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
| lvx v3, 0, \Diff ;# v3 = d0..d7 |
| vaddshs v2, v2, v3 ;# v2 = r0..r7 |
| vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
| lvx v3, r8, \Diff ;# v3 = d8..d15 |
| addi \Diff, \Diff, 32 ;# next diff |
| vaddshs v3, v3, v1 ;# v3 = r8..r15 |
| vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 |
| stvx v2, 0, \Dst ;# to dst |
| add \Dst, \Dst, \Stride ;# next dst |
| .endm |
| |
| .text |
| .align 2 |
| ;# r3 = short *diff_ptr, |
| ;# r4 = unsigned char *pred_ptr, |
| ;# r5 = unsigned char *dst_ptr, |
| ;# r6 = int stride |
| recon4b_ppc: |
| mfspr r0, 256 ;# get old VRSAVE |
| stw r0, -8(r1) ;# save old VRSAVE to stack |
| oris r0, r0, 0xf000 |
| mtspr 256,r0 ;# set VRSAVE |
| |
| vxor v0, v0, v0 |
| li r8, 16 |
| |
| row_of16 r3, r4, r5, r6 |
| row_of16 r3, r4, r5, r6 |
| row_of16 r3, r4, r5, r6 |
| row_of16 r3, r4, r5, r6 |
| |
| lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| mtspr 256, r12 ;# reset old VRSAVE |
| |
| blr |
| |
| .macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels |
| lvx v1, 0, \Pred ;# v1 = pred = p0..p15 |
| vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 |
| lvx v3, 0, \Diff ;# v3 = d0..d7 |
| vaddshs v2, v2, v3 ;# v2 = r0..r7 |
| vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 |
| lvx v3, r8, \Diff ;# v2 = d8..d15 |
| vaddshs v3, v3, v1 ;# v3 = r8..r15 |
| vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 |
| stvx v2, 0, r10 ;# 2 rows to dst from buf |
| lwz r0, 0(r10) |
| .if \write_first_four_pels |
| stw r0, 0(\Dst) |
| .else |
| stwux r0, \Dst, \Stride |
| .endif |
| lwz r0, 4(r10) |
| stw r0, 4(\Dst) |
| lwz r0, 8(r10) |
| stwux r0, \Dst, \Stride ;# advance dst to next row |
| lwz r0, 12(r10) |
| stw r0, 4(\Dst) |
| .endm |
| |
| .align 2 |
| ;# r3 = short *diff_ptr, |
| ;# r4 = unsigned char *pred_ptr, |
| ;# r5 = unsigned char *dst_ptr, |
| ;# r6 = int stride |
| |
| recon2b_ppc: |
| mfspr r0, 256 ;# get old VRSAVE |
| stw r0, -8(r1) ;# save old VRSAVE to stack |
| oris r0, r0, 0xf000 |
| mtspr 256,r0 ;# set VRSAVE |
| |
| vxor v0, v0, v0 |
| li r8, 16 |
| |
| la r10, -48(r1) ;# buf |
| |
| two_rows_of8 r3, r4, r5, r6, 1 |
| |
| addi r4, r4, 16; ;# next pred |
| addi r3, r3, 32; ;# next diff |
| |
| two_rows_of8 r3, r4, r5, r6, 0 |
| |
| lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| mtspr 256, r12 ;# reset old VRSAVE |
| |
| blr |
| |
| .macro get_two_diff_rows |
| stw r0, 0(r10) |
| lwz r0, 4(r3) |
| stw r0, 4(r10) |
| lwzu r0, 32(r3) |
| stw r0, 8(r10) |
| lwz r0, 4(r3) |
| stw r0, 12(r10) |
| lvx v3, 0, r10 |
| .endm |
| |
| .align 2 |
| ;# r3 = short *diff_ptr, |
| ;# r4 = unsigned char *pred_ptr, |
| ;# r5 = unsigned char *dst_ptr, |
| ;# r6 = int stride |
| recon_b_ppc: |
| mfspr r0, 256 ;# get old VRSAVE |
| stw r0, -8(r1) ;# save old VRSAVE to stack |
| oris r0, r0, 0xf000 |
| mtspr 256,r0 ;# set VRSAVE |
| |
| vxor v0, v0, v0 |
| |
| la r10, -48(r1) ;# buf |
| |
| lwz r0, 0(r4) |
| stw r0, 0(r10) |
| lwz r0, 16(r4) |
| stw r0, 4(r10) |
| lwz r0, 32(r4) |
| stw r0, 8(r10) |
| lwz r0, 48(r4) |
| stw r0, 12(r10) |
| |
| lvx v1, 0, r10; ;# v1 = pred = p0..p15 |
| |
| lwz r0, 0(r3) ;# v3 = d0..d7 |
| |
| get_two_diff_rows |
| |
| vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 |
| vaddshs v2, v2, v3; ;# v2 = r0..r7 |
| |
| lwzu r0, 32(r3) ;# v3 = d8..d15 |
| |
| get_two_diff_rows |
| |
| vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 |
| vaddshs v3, v3, v1; ;# v3 = r8..r15 |
| |
| vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 |
| stvx v2, 0, r10; ;# 16 pels to dst from buf |
| |
| lwz r0, 0(r10) |
| stw r0, 0(r5) |
| lwz r0, 4(r10) |
| stwux r0, r5, r6 |
| lwz r0, 8(r10) |
| stwux r0, r5, r6 |
| lwz r0, 12(r10) |
| stwx r0, r5, r6 |
| |
| lwz r12, -8(r1) ;# restore old VRSAVE from stack |
| mtspr 256, r12 ;# reset old VRSAVE |
| |
| blr |