Blame - vp9/common/vp9_convolve.c - avm

2013-01-25 09:47:09 -0800

[diff] [blame]

/*

*

* Use of this source code is governed by a BSD-style license

5

* that can be found in the LICENSE file in the root of the source

6

* tree. An additional intellectual property rights grant can be found

7

* in the file PATENTS. All contributing project authors may

8

* be found in the AUTHORS file in the root of the source tree.

9

*/

Christian Duvivier

2013-02-11 15:34:08 -0800

[diff] [blame]

10

#include "vp9/common/vp9_convolve.h"

11

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

12

#include <assert.h>

13

14

#include "./vpx_config.h"

15

#include "./vp9_rtcd.h"

16

#include "vp9/common/vp9_common.h"

17

#include "vpx/vpx_integer.h"

Christian Duvivier

2013-02-11 15:34:08 -0800

[diff] [blame]

18

#include "vpx_ports/mem.h"

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

19

20

#define VP9_FILTER_WEIGHT 128

21

#define VP9_FILTER_SHIFT 7

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

22

23

/* Assume a bank of 16 filters to choose from. There are two implementations

24

* for filter wrapping behavior, since we want to be able to pick which filter

25

* to start with. We could either:

26

*

27

* 1) make filter_ a pointer to the base of the filter array, and then add an

28

* additional offset parameter, to choose the starting filter.

29

* 2) use a pointer to 2 periods worth of filters, so that even if the original

30

* phase offset is at 15/16, we'll have valid data to read. The filter

31

* tables become [32][8], and the second half is duplicated.

32

* 3) fix the alignment of the filter tables, so that we know the 0/16 is

33

* always 256 byte aligned.

34

*

35

* Implementations 2 and 3 are likely preferable, as they avoid an extra 2

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

36

* parameters, and switching between them is trivial, with the

37

* ALIGN_FILTERS_256 macro, below.

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

38

*/

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

39

#define ALIGN_FILTERS_256 1

40

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

41

static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

42

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

43

const int16_t *filter_x0, int x_step_q4,

44

const int16_t *filter_y, int y_step_q4,

45

int w, int h, int taps) {

46

int x, y, k, sum;

47

const int16_t *filter_x_base = filter_x0;

48

49

#if ALIGN_FILTERS_256

50

filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

51

#endif

52

53

/* Adjust base pointer address for this source line */

54

src -= taps / 2 - 1;

55

56

for (y = 0; y < h; ++y) {

57

/* Pointer to filter to use */

58

const int16_t *filter_x = filter_x0;

59

60

/* Initial phase offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

61

int x0_q4 = (filter_x - filter_x_base) / taps;

62

int x_q4 = x0_q4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

63

64

for (x = 0; x < w; ++x) {

65

/* Per-pixel src offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

66

int src_x = (x_q4 - x0_q4) >> 4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

67

68

for (sum = 0, k = 0; k < taps; ++k) {

69

sum += src[src_x + k] * filter_x[k];

70

}

71

sum += (VP9_FILTER_WEIGHT >> 1);

72

dst[x] = clip_pixel(sum >> VP9_FILTER_SHIFT);

73

74

/* Adjust source and filter to use for the next pixel */

75

x_q4 += x_step_q4;

76

filter_x = filter_x_base + (x_q4 & 0xf) * taps;

}

src += src_stride;

dst += dst_stride;

}

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

83

static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

84

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

85

const int16_t *filter_x0, int x_step_q4,

86

const int16_t *filter_y, int y_step_q4,

87

int w, int h, int taps) {

88

int x, y, k, sum;

89

const int16_t *filter_x_base = filter_x0;

90

91

#if ALIGN_FILTERS_256

92

filter_x_base = (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);

93

#endif

94

95

/* Adjust base pointer address for this source line */

96

src -= taps / 2 - 1;

97

98

for (y = 0; y < h; ++y) {

99

/* Pointer to filter to use */

100

const int16_t *filter_x = filter_x0;

101

102

/* Initial phase offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

103

int x0_q4 = (filter_x - filter_x_base) / taps;

104

int x_q4 = x0_q4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

105

106

for (x = 0; x < w; ++x) {

107

/* Per-pixel src offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

108

int src_x = (x_q4 - x0_q4) >> 4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

109

110

for (sum = 0, k = 0; k < taps; ++k) {

111

sum += src[src_x + k] * filter_x[k];

112

}

113

sum += (VP9_FILTER_WEIGHT >> 1);

114

dst[x] = (dst[x] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;

115

116

/* Adjust source and filter to use for the next pixel */

117

x_q4 += x_step_q4;

118

filter_x = filter_x_base + (x_q4 & 0xf) * taps;

}

src += src_stride;

dst += dst_stride;

}

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

125

static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,

126

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

127

const int16_t *filter_x, int x_step_q4,

128

const int16_t *filter_y0, int y_step_q4,

129

int w, int h, int taps) {

130

int x, y, k, sum;

131

132

const int16_t *filter_y_base = filter_y0;

133

134

#if ALIGN_FILTERS_256

135

filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

136

#endif

137

138

/* Adjust base pointer address for this source column */

139

src -= src_stride * (taps / 2 - 1);

140

for (x = 0; x < w; ++x) {

141

/* Pointer to filter to use */

142

const int16_t *filter_y = filter_y0;

143

144

/* Initial phase offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

145

int y0_q4 = (filter_y - filter_y_base) / taps;

146

int y_q4 = y0_q4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

147

148

for (y = 0; y < h; ++y) {

149

/* Per-pixel src offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

150

int src_y = (y_q4 - y0_q4) >> 4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

151

152

for (sum = 0, k = 0; k < taps; ++k) {

153

sum += src[(src_y + k) * src_stride] * filter_y[k];

154

}

155

sum += (VP9_FILTER_WEIGHT >> 1);

156

dst[y * dst_stride] = clip_pixel(sum >> VP9_FILTER_SHIFT);

157

158

/* Adjust source and filter to use for the next pixel */

159

y_q4 += y_step_q4;

160

filter_y = filter_y_base + (y_q4 & 0xf) * taps;

}

++src;

++dst;

}

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

167

static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

168

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

169

const int16_t *filter_x, int x_step_q4,

170

const int16_t *filter_y0, int y_step_q4,

171

int w, int h, int taps) {

172

int x, y, k, sum;

173

174

const int16_t *filter_y_base = filter_y0;

175

176

#if ALIGN_FILTERS_256

177

filter_y_base = (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);

178

#endif

179

180

/* Adjust base pointer address for this source column */

181

src -= src_stride * (taps / 2 - 1);

182

for (x = 0; x < w; ++x) {

183

/* Pointer to filter to use */

184

const int16_t *filter_y = filter_y0;

185

186

/* Initial phase offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

187

int y0_q4 = (filter_y - filter_y_base) / taps;

188

int y_q4 = y0_q4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

189

190

for (y = 0; y < h; ++y) {

191

/* Per-pixel src offset */

John Koleszar

2013-02-20 15:59:20 -0800

[diff] [blame]

192

int src_y = (y_q4 - y0_q4) >> 4;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

193

194

for (sum = 0, k = 0; k < taps; ++k) {

195

sum += src[(src_y + k) * src_stride] * filter_y[k];

196

}

197

sum += (VP9_FILTER_WEIGHT >> 1);

198

dst[y * dst_stride] =

199

(dst[y * dst_stride] + clip_pixel(sum >> VP9_FILTER_SHIFT) + 1) >> 1;

200

201

/* Adjust source and filter to use for the next pixel */

202

y_q4 += y_step_q4;

203

filter_y = filter_y_base + (y_q4 & 0xf) * taps;

}

++src;

++dst;

}

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

210

static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,

211

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

212

const int16_t *filter_x, int x_step_q4,

213

const int16_t *filter_y, int y_step_q4,

214

int w, int h, int taps) {

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

215

/* Fixed size intermediate buffer places limits on parameters.

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

216

* Maximum intermediate_height is 135, for y_step_q4 == 32,

217

* h == 64, taps == 8.

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

218

*/

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

219

uint8_t temp[64 * 135];

Tero Rintaluoma

18303b1

2013-07-05 13:53:36 +0300

[diff] [blame]

220

int intermediate_height = MAX(((h * y_step_q4) >> 4), 1) + taps - 1;

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

221

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

222

assert(w <= 64);

223

assert(h <= 64);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

224

assert(taps <= 8);

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

225

assert(y_step_q4 <= 32);

Tero Rintaluoma

18303b1

2013-07-05 13:53:36 +0300

[diff] [blame]

226

assert(x_step_q4 <= 32);

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

227

228

if (intermediate_height < h)

229

intermediate_height = h;

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

230

231

convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride,

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

232

temp, 64,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

233

filter_x, x_step_q4, filter_y, y_step_q4,

John Koleszar

2013-02-24 20:55:14 -0800

[diff] [blame]

234

w, intermediate_height, taps);

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

235

convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

236

filter_x, x_step_q4, filter_y, y_step_q4,

w, h, taps);

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

240

void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

241

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

242

const int16_t *filter_x, int x_step_q4,

243

const int16_t *filter_y, int y_step_q4,

244

int w, int h) {

245

convolve_horiz_c(src, src_stride, dst, dst_stride,

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

246

filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

247

}

248

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

249

void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

250

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

251

const int16_t *filter_x, int x_step_q4,

252

const int16_t *filter_y, int y_step_q4,

253

int w, int h) {

254

convolve_avg_horiz_c(src, src_stride, dst, dst_stride,

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

255

filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

256

}

257

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

258

void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

259

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

260

const int16_t *filter_x, int x_step_q4,

261

const int16_t *filter_y, int y_step_q4,

262

int w, int h) {

263

convolve_vert_c(src, src_stride, dst, dst_stride,

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

264

filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

265

}

266

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

267

void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

268

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

269

const int16_t *filter_x, int x_step_q4,

270

const int16_t *filter_y, int y_step_q4,

271

int w, int h) {

272

convolve_avg_vert_c(src, src_stride, dst, dst_stride,

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

273

filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

274

}

275

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

276

void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

277

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

278

const int16_t *filter_x, int x_step_q4,

279

const int16_t *filter_y, int y_step_q4,

280

int w, int h) {

281

convolve_c(src, src_stride, dst, dst_stride,

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

282

filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

283

}

284

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

285

void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

286

uint8_t *dst, ptrdiff_t dst_stride,

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

287

const int16_t *filter_x, int x_step_q4,

288

const int16_t *filter_y, int y_step_q4,

289

int w, int h) {

Christian Duvivier

2013-02-11 15:34:08 -0800

[diff] [blame]

290

/* Fixed size intermediate buffer places limits on parameters. */

John Koleszar

2013-04-18 13:05:38 -0700

[diff] [blame]

291

DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);

292

assert(w <= 64);

293

assert(h <= 64);

Christian Duvivier

2013-02-11 15:34:08 -0800

[diff] [blame]

294

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

295

vp9_convolve8(src, src_stride, temp, 64,

296

filter_x, x_step_q4, filter_y, y_step_q4, w, h);

297

vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);

John Koleszar

2013-01-25 09:47:09 -0800

[diff] [blame]

298

}

John Koleszar

2013-01-28 16:59:03 -0800

[diff] [blame]

299

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

300

void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,

301

uint8_t *dst, ptrdiff_t dst_stride,

302

const int16_t *filter_x, int filter_x_stride,

303

const int16_t *filter_y, int filter_y_stride,

304

int w, int h) {

305

int r;

John Koleszar

2013-01-28 16:59:03 -0800

[diff] [blame]

306

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

307

for (r = h; r > 0; --r) {

308

memcpy(dst, src, w);

309

src += src_stride;

310

dst += dst_stride;

John Koleszar

2013-01-28 16:59:03 -0800

[diff] [blame]

}

}

Ronald S. Bultje

2013-07-10 11:17:19 -0700

[diff] [blame]

314

void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,

315

uint8_t *dst, ptrdiff_t dst_stride,

316

const int16_t *filter_x, int filter_x_stride,

317

const int16_t *filter_y, int filter_y_stride,

318

int w, int h) {

John Koleszar

2013-01-28 16:59:03 -0800

[diff] [blame]

319

int x, y;

320

321

for (y = 0; y < h; ++y) {

Dmitry Kovalev

2013-08-12 14:28:00 -0700

[diff] [blame^]

322

for (x = 0; x < w; ++x)

323

dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

324

John Koleszar