blob: f5da9c09eedd4cdbd516782a5c25eb919409c50e [file] [log] [blame]
Tero Rintaluomaf5e43342011-03-28 09:51:51 +03001;
2; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4; Use of this source code is governed by a BSD-style license
5; that can be found in the LICENSE file in the root of the source
6; tree. An additional intellectual property rights grant can be found
7; in the file PATENTS. All contributing project authors may
8; be found in the AUTHORS file in the root of the source tree.
9;
10
11
12 EXPORT |vp8_variance_halfpixvar16x16_v_armv6|
13
14 ARM
15 REQUIRE8
16 PRESERVE8
17
18 AREA ||.text||, CODE, READONLY, ALIGN=2
19
20; r0 unsigned char *src_ptr
21; r1 int source_stride
22; r2 unsigned char *ref_ptr
23; r3 int recon_stride
24; stack unsigned int *sse
25|vp8_variance_halfpixvar16x16_v_armv6| PROC
26
27 stmfd sp!, {r4-r12, lr}
Tero Rintaluoma5305e792011-05-30 11:10:03 +030028
29 pld [r0, r1, lsl #0]
30 pld [r2, r3, lsl #0]
31
Tero Rintaluomaf5e43342011-03-28 09:51:51 +030032 mov r8, #0 ; initialize sum = 0
33 ldr r10, c80808080
34 mov r11, #0 ; initialize sse = 0
35 mov r12, #16 ; set loop counter to 16 (=block height)
36 mov lr, #0 ; constant zero
37loop
38 add r9, r0, r1 ; set src pointer to next row
39 ; 1st 4 pixels
40 ldr r4, [r0, #0] ; load 4 src pixels
41 ldr r6, [r9, #0] ; load 4 src pixels from next row
42 ldr r5, [r2, #0] ; load 4 ref pixels
43
44 ; bilinear interpolation
45 mvn r6, r6
46 uhsub8 r4, r4, r6
47 eor r4, r4, r10
48
49 usub8 r6, r4, r5 ; calculate difference
Tero Rintaluoma5305e792011-05-30 11:10:03 +030050 pld [r0, r1, lsl #1]
Tero Rintaluomaf5e43342011-03-28 09:51:51 +030051 sel r7, r6, lr ; select bytes with positive difference
52 usub8 r6, r5, r4 ; calculate difference with reversed operands
Tero Rintaluoma5305e792011-05-30 11:10:03 +030053 pld [r2, r3, lsl #1]
Tero Rintaluomaf5e43342011-03-28 09:51:51 +030054 sel r6, r6, lr ; select bytes with negative difference
55
56 ; calculate partial sums
57 usad8 r4, r7, lr ; calculate sum of positive differences
58 usad8 r5, r6, lr ; calculate sum of negative differences
59 orr r6, r6, r7 ; differences of all 4 pixels
60 ; calculate total sum
61 adds r8, r8, r4 ; add positive differences to sum
62 subs r8, r8, r5 ; substract negative differences from sum
63
64 ; calculate sse
65 uxtb16 r5, r6 ; byte (two pixels) to halfwords
66 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
67 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
68
69 ; 2nd 4 pixels
70 ldr r4, [r0, #4] ; load 4 src pixels
71 ldr r6, [r9, #4] ; load 4 src pixels from next row
72 ldr r5, [r2, #4] ; load 4 ref pixels
73
74 ; bilinear interpolation
75 mvn r6, r6
76 uhsub8 r4, r4, r6
77 eor r4, r4, r10
78
79 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
80
81 usub8 r6, r4, r5 ; calculate difference
82 sel r7, r6, lr ; select bytes with positive difference
83 usub8 r6, r5, r4 ; calculate difference with reversed operands
84 sel r6, r6, lr ; select bytes with negative difference
85
86 ; calculate partial sums
87 usad8 r4, r7, lr ; calculate sum of positive differences
88 usad8 r5, r6, lr ; calculate sum of negative differences
89 orr r6, r6, r7 ; differences of all 4 pixels
90
91 ; calculate total sum
92 add r8, r8, r4 ; add positive differences to sum
93 sub r8, r8, r5 ; substract negative differences from sum
94
95 ; calculate sse
96 uxtb16 r5, r6 ; byte (two pixels) to halfwords
97 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
98 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
99
100 ; 3rd 4 pixels
101 ldr r4, [r0, #8] ; load 4 src pixels
102 ldr r6, [r9, #8] ; load 4 src pixels from next row
103 ldr r5, [r2, #8] ; load 4 ref pixels
104
105 ; bilinear interpolation
106 mvn r6, r6
107 uhsub8 r4, r4, r6
108 eor r4, r4, r10
109
110 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
111
112 usub8 r6, r4, r5 ; calculate difference
113 sel r7, r6, lr ; select bytes with positive difference
114 usub8 r6, r5, r4 ; calculate difference with reversed operands
115 sel r6, r6, lr ; select bytes with negative difference
116
117 ; calculate partial sums
118 usad8 r4, r7, lr ; calculate sum of positive differences
119 usad8 r5, r6, lr ; calculate sum of negative differences
120 orr r6, r6, r7 ; differences of all 4 pixels
121
122 ; calculate total sum
123 add r8, r8, r4 ; add positive differences to sum
124 sub r8, r8, r5 ; substract negative differences from sum
125
126 ; calculate sse
127 uxtb16 r5, r6 ; byte (two pixels) to halfwords
128 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
129 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
130
131 ; 4th 4 pixels
132 ldr r4, [r0, #12] ; load 4 src pixels
133 ldr r6, [r9, #12] ; load 4 src pixels from next row
134 ldr r5, [r2, #12] ; load 4 ref pixels
135
136 ; bilinear interpolation
137 mvn r6, r6
138 uhsub8 r4, r4, r6
139 eor r4, r4, r10
140
141 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
142
143 usub8 r6, r4, r5 ; calculate difference
144 add r0, r0, r1 ; set src_ptr to next row
145 sel r7, r6, lr ; select bytes with positive difference
146 usub8 r6, r5, r4 ; calculate difference with reversed operands
147 add r2, r2, r3 ; set dst_ptr to next row
148 sel r6, r6, lr ; select bytes with negative difference
149
150 ; calculate partial sums
151 usad8 r4, r7, lr ; calculate sum of positive differences
152 usad8 r5, r6, lr ; calculate sum of negative differences
153 orr r6, r6, r7 ; differences of all 4 pixels
154
155 ; calculate total sum
156 add r8, r8, r4 ; add positive differences to sum
157 sub r8, r8, r5 ; substract negative differences from sum
158
159 ; calculate sse
160 uxtb16 r5, r6 ; byte (two pixels) to halfwords
161 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
162 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
163 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
164
165
166 subs r12, r12, #1
167
168 bne loop
169
170 ; return stuff
171 ldr r6, [sp, #40] ; get address of sse
172 mul r0, r8, r8 ; sum * sum
173 str r11, [r6] ; store sse
Johannfea35562012-02-09 12:38:31 -0800174 sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
Tero Rintaluomaf5e43342011-03-28 09:51:51 +0300175
176 ldmfd sp!, {r4-r12, pc}
177
178 ENDP
179
180c80808080
181 DCD 0x80808080
182
183 END
184