blob: 3f0d2d82286045ac22c6eaaee845d9b223345ca0 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Krishna Rapaka7319db52021-09-28 20:35:29 -07002 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Vibhoothi41c6dd72021-10-12 18:48:26 +00004 * This source code is subject to the terms of the BSD 3-Clause Clear License
5 * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
6 * License was not distributed with this source code in the LICENSE file, you
7 * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
8 * Alliance for Open Media Patent License 1.0 was not distributed with this
9 * source code in the PATENTS file, you can obtain it at
10 * aomedia.org/license/patent-license/.
Yaowu Xuc27fc142016-08-22 16:08:15 -070011 */
12
13#include <assert.h>
14#include <stdio.h>
15
Tom Finegan44702c82018-05-22 13:00:39 -070016#include "config/aom_dsp_rtcd.h"
17
Yaowu Xuc27fc142016-08-22 16:08:15 -070018#include "aom_dsp/mips/convolve_common_dspr2.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "aom_dsp/aom_dsp_common.h"
20#include "aom_dsp/aom_filter.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22
23#if HAVE_DSPR2
Yaowu Xuf883b422016-08-30 14:01:10 -070024void aom_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
Elliott Karpilovskyebe812f2020-04-13 18:48:50 -070025 uint8_t *dst, ptrdiff_t dst_stride, int w, int h) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070026 int x, y;
27
Yaowu Xuc27fc142016-08-22 16:08:15 -070028 /* prefetch data to cache memory */
29 prefetch_load(src);
30 prefetch_load(src + 32);
31 prefetch_store(dst);
32
33 switch (w) {
34 case 4: {
35 uint32_t tp1;
36
37 /* 1 word storage */
38 for (y = h; y--;) {
39 prefetch_load(src + src_stride);
40 prefetch_load(src + src_stride + 32);
41 prefetch_store(dst + dst_stride);
42
43 __asm__ __volatile__(
44 "ulw %[tp1], (%[src]) \n\t"
45 "sw %[tp1], (%[dst]) \n\t" /* store */
46
47 : [tp1] "=&r"(tp1)
48 : [src] "r"(src), [dst] "r"(dst));
49
50 src += src_stride;
51 dst += dst_stride;
52 }
53 } break;
54 case 8: {
55 uint32_t tp1, tp2;
56
57 /* 2 word storage */
58 for (y = h; y--;) {
59 prefetch_load(src + src_stride);
60 prefetch_load(src + src_stride + 32);
61 prefetch_store(dst + dst_stride);
62
63 __asm__ __volatile__(
64 "ulw %[tp1], 0(%[src]) \n\t"
65 "ulw %[tp2], 4(%[src]) \n\t"
66 "sw %[tp1], 0(%[dst]) \n\t" /* store */
67 "sw %[tp2], 4(%[dst]) \n\t" /* store */
68
69 : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2)
70 : [src] "r"(src), [dst] "r"(dst));
71
72 src += src_stride;
73 dst += dst_stride;
74 }
75 } break;
76 case 16: {
77 uint32_t tp1, tp2, tp3, tp4;
78
79 /* 4 word storage */
80 for (y = h; y--;) {
81 prefetch_load(src + src_stride);
82 prefetch_load(src + src_stride + 32);
83 prefetch_store(dst + dst_stride);
84
85 __asm__ __volatile__(
86 "ulw %[tp1], 0(%[src]) \n\t"
87 "ulw %[tp2], 4(%[src]) \n\t"
88 "ulw %[tp3], 8(%[src]) \n\t"
89 "ulw %[tp4], 12(%[src]) \n\t"
90
91 "sw %[tp1], 0(%[dst]) \n\t" /* store */
92 "sw %[tp2], 4(%[dst]) \n\t" /* store */
93 "sw %[tp3], 8(%[dst]) \n\t" /* store */
94 "sw %[tp4], 12(%[dst]) \n\t" /* store */
95
96 : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
97 [tp4] "=&r"(tp4)
98 : [src] "r"(src), [dst] "r"(dst));
99
100 src += src_stride;
101 dst += dst_stride;
102 }
103 } break;
104 case 32: {
105 uint32_t tp1, tp2, tp3, tp4;
106 uint32_t tp5, tp6, tp7, tp8;
107
108 /* 8 word storage */
109 for (y = h; y--;) {
110 prefetch_load(src + src_stride);
111 prefetch_load(src + src_stride + 32);
112 prefetch_store(dst + dst_stride);
113
114 __asm__ __volatile__(
115 "ulw %[tp1], 0(%[src]) \n\t"
116 "ulw %[tp2], 4(%[src]) \n\t"
117 "ulw %[tp3], 8(%[src]) \n\t"
118 "ulw %[tp4], 12(%[src]) \n\t"
119 "ulw %[tp5], 16(%[src]) \n\t"
120 "ulw %[tp6], 20(%[src]) \n\t"
121 "ulw %[tp7], 24(%[src]) \n\t"
122 "ulw %[tp8], 28(%[src]) \n\t"
123
124 "sw %[tp1], 0(%[dst]) \n\t" /* store */
125 "sw %[tp2], 4(%[dst]) \n\t" /* store */
126 "sw %[tp3], 8(%[dst]) \n\t" /* store */
127 "sw %[tp4], 12(%[dst]) \n\t" /* store */
128 "sw %[tp5], 16(%[dst]) \n\t" /* store */
129 "sw %[tp6], 20(%[dst]) \n\t" /* store */
130 "sw %[tp7], 24(%[dst]) \n\t" /* store */
131 "sw %[tp8], 28(%[dst]) \n\t" /* store */
132
133 : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
134 [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
135 [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
136 : [src] "r"(src), [dst] "r"(dst));
137
138 src += src_stride;
139 dst += dst_stride;
140 }
141 } break;
142 case 64: {
143 uint32_t tp1, tp2, tp3, tp4;
144 uint32_t tp5, tp6, tp7, tp8;
145
146 prefetch_load(src + 64);
147 prefetch_store(dst + 32);
148
149 /* 16 word storage */
150 for (y = h; y--;) {
151 prefetch_load(src + src_stride);
152 prefetch_load(src + src_stride + 32);
153 prefetch_load(src + src_stride + 64);
154 prefetch_store(dst + dst_stride);
155 prefetch_store(dst + dst_stride + 32);
156
157 __asm__ __volatile__(
158 "ulw %[tp1], 0(%[src]) \n\t"
159 "ulw %[tp2], 4(%[src]) \n\t"
160 "ulw %[tp3], 8(%[src]) \n\t"
161 "ulw %[tp4], 12(%[src]) \n\t"
162 "ulw %[tp5], 16(%[src]) \n\t"
163 "ulw %[tp6], 20(%[src]) \n\t"
164 "ulw %[tp7], 24(%[src]) \n\t"
165 "ulw %[tp8], 28(%[src]) \n\t"
166
167 "sw %[tp1], 0(%[dst]) \n\t" /* store */
168 "sw %[tp2], 4(%[dst]) \n\t" /* store */
169 "sw %[tp3], 8(%[dst]) \n\t" /* store */
170 "sw %[tp4], 12(%[dst]) \n\t" /* store */
171 "sw %[tp5], 16(%[dst]) \n\t" /* store */
172 "sw %[tp6], 20(%[dst]) \n\t" /* store */
173 "sw %[tp7], 24(%[dst]) \n\t" /* store */
174 "sw %[tp8], 28(%[dst]) \n\t" /* store */
175
176 "ulw %[tp1], 32(%[src]) \n\t"
177 "ulw %[tp2], 36(%[src]) \n\t"
178 "ulw %[tp3], 40(%[src]) \n\t"
179 "ulw %[tp4], 44(%[src]) \n\t"
180 "ulw %[tp5], 48(%[src]) \n\t"
181 "ulw %[tp6], 52(%[src]) \n\t"
182 "ulw %[tp7], 56(%[src]) \n\t"
183 "ulw %[tp8], 60(%[src]) \n\t"
184
185 "sw %[tp1], 32(%[dst]) \n\t" /* store */
186 "sw %[tp2], 36(%[dst]) \n\t" /* store */
187 "sw %[tp3], 40(%[dst]) \n\t" /* store */
188 "sw %[tp4], 44(%[dst]) \n\t" /* store */
189 "sw %[tp5], 48(%[dst]) \n\t" /* store */
190 "sw %[tp6], 52(%[dst]) \n\t" /* store */
191 "sw %[tp7], 56(%[dst]) \n\t" /* store */
192 "sw %[tp8], 60(%[dst]) \n\t" /* store */
193
194 : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
195 [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
196 [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
197 : [src] "r"(src), [dst] "r"(dst));
198
199 src += src_stride;
200 dst += dst_stride;
201 }
202 } break;
203 default:
204 for (y = h; y--;) {
205 for (x = 0; x < w; ++x) {
206 dst[x] = src[x];
207 }
208
209 src += src_stride;
210 dst += dst_stride;
211 }
212 break;
213 }
214}
215#endif