Blame - aom_dsp/aom_convolve.c - aom

2016-08-22 16:08:15 -0700

[diff] [blame]

1

/*

Yaowu Xu

9c01aa1

2016-09-01 14:32:49 -0700

[diff] [blame]

2

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

3

*

Yaowu Xu

9c01aa1

2016-09-01 14:32:49 -0700

[diff] [blame]

4

* This source code is subject to the terms of the BSD 2 Clause License and

5

* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License

6

* was not distributed with this source code in the LICENSE file, you can

7

* obtain it at www.aomedia.org/license/software. If the Alliance for Open

8

* Media Patent License 1.0 was not distributed with this source code in the

9

* PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

*/

#include <assert.h>

#include <string.h>

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

15

#include "./aom_config.h"

16

#include "./aom_dsp_rtcd.h"

17

#include "aom/aom_integer.h"

18

#include "aom_dsp/aom_convolve.h"

19

#include "aom_dsp/aom_dsp_common.h"

20

#include "aom_dsp/aom_filter.h"

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

21

#include "aom_ports/mem.h"

22

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

23

static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) {

24

int sum = 0;

25

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];

return sum;

}

static INLINE int vert_scalar_product(const uint8_t *a, ptrdiff_t a_stride,

30

const int16_t *b) {

31

int sum = 0;

32

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];

return sum;

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

36

static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,

37

uint8_t *dst, ptrdiff_t dst_stride,

38

const InterpKernel *x_filters, int x0_q4,

39

int x_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

40

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

41

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

42

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

43

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

44

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

45

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

46

const int sum = horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

47

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

55

static void convolve_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

56

uint8_t *dst, ptrdiff_t dst_stride,

57

const InterpKernel *x_filters, int x0_qn,

58

int x_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

59

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

60

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

61

int x_qn = x0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

62

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

63

const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS]; // q8

64

const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;

65

assert(x_filter_idx < SUBPEL_SHIFTS);

66

const int16_t *const x_filter = x_filters[x_filter_idx];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

67

const int sum = horz_scalar_product(src_x, x_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

68

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

x_qn += x_step_qn;

}

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

76

static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,

77

uint8_t *dst, ptrdiff_t dst_stride,

78

const InterpKernel *x_filters, int x0_q4,

79

int x_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

80

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

81

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

82

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

83

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

84

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

85

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

86

const int sum = horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

87

dst[x] = ROUND_POWER_OF_TWO(

88

dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

96

static void convolve_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

97

uint8_t *dst, ptrdiff_t dst_stride,

98

const InterpKernel *x_filters, int x0_qn,

99

int x_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

100

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

101

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

102

int x_qn = x0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

103

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

104

const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];

105

const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;

106

assert(x_filter_idx < SUBPEL_SHIFTS);

107

const int16_t *const x_filter = x_filters[x_filter_idx];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

108

const int sum = horz_scalar_product(src_x, x_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

109

dst[x] = ROUND_POWER_OF_TWO(

110

dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

x_qn += x_step_qn;

}

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

118

static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,

119

uint8_t *dst, ptrdiff_t dst_stride,

120

const InterpKernel *y_filters, int y0_q4,

121

int y_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

122

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

123

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

124

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

125

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

126

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

127

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

128

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

129

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

130

dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

138

static void convolve_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

139

uint8_t *dst, ptrdiff_t dst_stride,

140

const InterpKernel *y_filters, int y0_qn,

141

int y_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

142

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

143

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

144

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

145

int y_qn = y0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

146

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

147

const unsigned char *src_y =

148

&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];

149

const int16_t *const y_filter =

150

y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

151

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

152

dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

y_qn += y_step_qn;

}

++src;

++dst;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

160

static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,

161

uint8_t *dst, ptrdiff_t dst_stride,

162

const InterpKernel *y_filters, int y0_q4,

163

int y_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

164

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

165

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

166

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

167

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

168

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

169

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

170

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

171

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

172

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

173

dst[y * dst_stride] +

174

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),

1);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

183

static void convolve_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

184

uint8_t *dst, ptrdiff_t dst_stride,

185

const InterpKernel *y_filters, int y0_qn,

186

int y_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

187

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

188

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

189

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

190

int y_qn = y0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

191

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

192

const unsigned char *src_y =

193

&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];

194

const int16_t *const y_filter =

195

y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

196

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

197

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

198

dst[y * dst_stride] +

199

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),

1);

y_qn += y_step_qn;

}

++src;

++dst;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

208

static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

209

ptrdiff_t dst_stride, const InterpKernel *const x_filters,

210

int x0_q4, int x_step_q4,

211

const InterpKernel *const y_filters, int y0_q4,

212

int y_step_q4, int w, int h) {

213

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

214

// 2d filtering proceeds in 2 steps:

215

// (1) Interpolate horizontally into an intermediate buffer, temp.

216

// (2) Interpolate temp vertically to derive the sub-pixel result.

217

// Deriving the maximum number of rows in the temp buffer (135):

218

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

219

// --Largest block size is 64x64 pixels.

220

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

221

// original frame (in 1/16th pixel units).

222

// --Must round-up because block may be located at sub-pixel position.

223

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

224

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

225

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

226

const int intermediate_height =

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

227

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

228

229

assert(w <= MAX_SB_SIZE);

230

assert(h <= MAX_SB_SIZE);

231

232

assert(y_step_q4 <= 32);

233

assert(x_step_q4 <= 32);

234

235

convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp,

236

MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

237

intermediate_height);

238

convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst,

239

dst_stride, y_filters, y0_q4, y_step_q4, w, h);

240

}

241

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

242

static void convolve_scale_c(const uint8_t *src, ptrdiff_t src_stride,

243

uint8_t *dst, ptrdiff_t dst_stride,

244

const InterpKernel *const x_filters, int x0_qn,

245

int x_step_qn, const InterpKernel *const y_filters,

246

int y0_qn, int y_step_qn, int w, int h) {

247

// TODO(afergs): Update comment here

248

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

249

// 2d filtering proceeds in 2 steps:

250

// (1) Interpolate horizontally into an intermediate buffer, temp.

251

// (2) Interpolate temp vertically to derive the sub-pixel result.

252

// Deriving the maximum number of rows in the temp buffer (135):

253

// --Smallest scaling factor is x1/2 ==> y_step_qn = 32 (Normative).

254

// --Largest block size is 64x64 pixels.

255

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

256

// original frame (in 1/16th pixel units).

257

// --Must round-up because block may be located at sub-pixel position.

258

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

259

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

260

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

261

const int intermediate_height =

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

262

(((h - 1) * y_step_qn + y0_qn) >> SCALE_SUBPEL_BITS) + SUBPEL_TAPS;

263

264

assert(w <= MAX_SB_SIZE);

265

assert(h <= MAX_SB_SIZE);

266

267

assert(y_step_qn <= SCALE_SUBPEL_BITS * 2);

268

assert(x_step_qn <= SCALE_SUBPEL_BITS * 2);

269

270

convolve_horiz_scale_c(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

271

temp, MAX_SB_SIZE, x_filters, x0_qn, x_step_qn, w,

272

intermediate_height);

273

convolve_vert_scale_c(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,

274

dst, dst_stride, y_filters, y0_qn, y_step_qn, w, h);

275

}

276

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

277

static const InterpKernel *get_filter_base(const int16_t *filter) {

278

// NOTE: This assumes that the filter table is 256-byte aligned.

279

// TODO(agrange) Modify to make independent of table alignment.

280

return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));

281

}

282

283

static int get_filter_offset(const int16_t *f, const InterpKernel *base) {

284

return (int)((const InterpKernel *)(intptr_t)f - base);

285

}

286

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

287

void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

288

uint8_t *dst, ptrdiff_t dst_stride,

289

const int16_t *filter_x, int x_step_q4,

290

const int16_t *filter_y, int y_step_q4, int w,

291

int h) {

292

const InterpKernel *const filters_x = get_filter_base(filter_x);

293

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

302

void aom_convolve8_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

303

uint8_t *dst, ptrdiff_t dst_stride,

304

const int16_t *filter_x, int subpel_x,

305

int x_step_qn, const int16_t *filter_y,

306

int subpel_y, int y_step_qn, int w, int h) {

307

const InterpKernel *const filters_x = get_filter_base(filter_x);

(void)subpel_y;

(void)filter_y;

(void)y_step_qn;

convolve_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,

x_step_qn, w, h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

317

void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

318

uint8_t *dst, ptrdiff_t dst_stride,

319

const int16_t *filter_x, int x_step_q4,

320

const int16_t *filter_y, int y_step_q4, int w,

321

int h) {

322

const InterpKernel *const filters_x = get_filter_base(filter_x);

323

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

332

void aom_convolve8_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

333

uint8_t *dst, ptrdiff_t dst_stride,

334

const int16_t *filter_x, int subpel_x,

335

int x_step_qn, const int16_t *filter_y,

336

int subpel_y, int y_step_qn, int w,

337

int h) {

338

const InterpKernel *const filters_x = get_filter_base(filter_x);

(void)subpel_y;

(void)filter_y;

(void)y_step_qn;

convolve_avg_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x,

345

subpel_x, x_step_qn, w, h);

346

}

347

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

348

void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

349

uint8_t *dst, ptrdiff_t dst_stride,

350

const int16_t *filter_x, int x_step_q4,

351

const int16_t *filter_y, int y_step_q4, int w,

352

int h) {

353

const InterpKernel *const filters_y = get_filter_base(filter_y);

354

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,

w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

363

void aom_convolve8_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

364

uint8_t *dst, ptrdiff_t dst_stride,

365

const int16_t *filter_x, int subpel_x,

366

int x_step_qn, const int16_t *filter_y,

367

int subpel_y, int y_step_qn, int w, int h) {

368

const InterpKernel *const filters_y = get_filter_base(filter_y);

(void)subpel_x;

(void)filter_x;

(void)x_step_qn;

convolve_vert_scale_c(src, src_stride, dst, dst_stride, filters_y, subpel_y,

y_step_qn, w, h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

378

void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

379

uint8_t *dst, ptrdiff_t dst_stride,

380

const int16_t *filter_x, int x_step_q4,

381

const int16_t *filter_y, int y_step_q4, int w,

382

int h) {

383

const InterpKernel *const filters_y = get_filter_base(filter_y);

384

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

393

void aom_convolve8_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

394

uint8_t *dst, ptrdiff_t dst_stride,

395

const int16_t *filter_x, int subpel_x,

396

int x_step_qn, const int16_t *filter_y,

397

int subpel_y, int y_step_qn, int w, int h) {

398

const InterpKernel *const filters_y = get_filter_base(filter_y);

(void)subpel_x;

(void)filter_x;

(void)x_step_qn;

convolve_avg_vert_scale_c(src, src_stride, dst, dst_stride, filters_y,

405

subpel_y, y_step_qn, w, h);

406

}

407

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

408

void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

409

ptrdiff_t dst_stride, const int16_t *filter_x,

410

int x_step_q4, const int16_t *filter_y, int y_step_q4,

411

int w, int h) {

412

const InterpKernel *const filters_x = get_filter_base(filter_x);

413

const int x0_q4 = get_filter_offset(filter_x, filters_x);

414

415

const InterpKernel *const filters_y = get_filter_base(filter_y);

416

const int y0_q4 = get_filter_offset(filter_y, filters_y);

417

418

convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

419

filters_y, y0_q4, y_step_q4, w, h);

420

}

421

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

422

void aom_convolve8_scale_c(const uint8_t *src, ptrdiff_t src_stride,

423

uint8_t *dst, ptrdiff_t dst_stride,

424

const int16_t *filter_x, int subpel_x, int x_step_qn,

425

const int16_t *filter_y, int subpel_y, int y_step_qn,

426

int w, int h) {

427

const InterpKernel *const filters_x = get_filter_base(filter_x);

428

429

const InterpKernel *const filters_y = get_filter_base(filter_y);

430

431

convolve_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,

432

x_step_qn, filters_y, subpel_y, y_step_qn, w, h);

433

}

434

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

435

void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

436

ptrdiff_t dst_stride, const int16_t *filter_x,

437

int x_step_q4, const int16_t *filter_y, int y_step_q4,

438

int w, int h) {

439

/* Fixed size intermediate buffer places limits on parameters. */

440

DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

441

assert(w <= MAX_SB_SIZE);

442

assert(h <= MAX_SB_SIZE);

443

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

444

aom_convolve8_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

445

filter_y, y_step_q4, w, h);

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

446

aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

450

void aom_convolve8_avg_scale_c(const uint8_t *src, ptrdiff_t src_stride,

451

uint8_t *dst, ptrdiff_t dst_stride,

452

const int16_t *filter_x, int subpel_x,

453

int x_step_qn, const int16_t *filter_y,

454

int subpel_y, int y_step_qn, int w, int h) {

455

/* Fixed size intermediate buffer places limits on parameters. */

456

DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

457

assert(w <= MAX_SB_SIZE);

458

assert(h <= MAX_SB_SIZE);

459

460

aom_convolve8_scale_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, subpel_x,

461

x_step_qn, filter_y, subpel_y, y_step_qn, w, h);

462

aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,

h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

466

void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

467

ptrdiff_t dst_stride, const int16_t *filter_x,

468

int filter_x_stride, const int16_t *filter_y,

469

int filter_y_stride, int w, int h) {

int r;

(void)filter_x;

(void)filter_x_stride;

474

(void)filter_y;

475

(void)filter_y_stride;

476

477

for (r = h; r > 0; --r) {

memcpy(dst, src, w);

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

484

void aom_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

485

ptrdiff_t dst_stride, const int16_t *filter_x,

486

int filter_x_stride, const int16_t *filter_y,

487

int filter_y_stride, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

488

(void)filter_x;

489

(void)filter_x_stride;

490

(void)filter_y;

491

(void)filter_y_stride;

492

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

493

for (int y = 0; y < h; ++y) {

494

for (int x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

501

void aom_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

502

ptrdiff_t dst_stride, const int16_t *filter_x,

503

int x_step_q4, const int16_t *filter_y, int y_step_q4,

504

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

505

aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

506

filter_y, y_step_q4, w, h);

507

}

508

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

509

void aom_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

510

ptrdiff_t dst_stride, const int16_t *filter_x,

511

int x_step_q4, const int16_t *filter_y, int y_step_q4,

512

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

513

aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

514

filter_y, y_step_q4, w, h);

515

}

516

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

517

void aom_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

518

ptrdiff_t dst_stride, const int16_t *filter_x,

519

int x_step_q4, const int16_t *filter_y, int y_step_q4,

520

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

521

aom_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

522

filter_y, y_step_q4, w, h);

523

}

524

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

525

void aom_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

526

uint8_t *dst, ptrdiff_t dst_stride,

527

const int16_t *filter_x, int x_step_q4,

528

const int16_t *filter_y, int y_step_q4, int w,

529

int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

530

aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

531

x_step_q4, filter_y, y_step_q4, w, h);

532

}

533

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

534

void aom_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

535

uint8_t *dst, ptrdiff_t dst_stride,

536

const int16_t *filter_x, int x_step_q4,

537

const int16_t *filter_y, int y_step_q4, int w,

538

int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

539

aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

540

x_step_q4, filter_y, y_step_q4, w, h);

541

}

542

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

543

void aom_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

544

ptrdiff_t dst_stride, const int16_t *filter_x,

545

int x_step_q4, const int16_t *filter_y, int y_step_q4,

546

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

547

aom_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

548

filter_y, y_step_q4, w, h);

549

}

550

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

551

#if CONFIG_HIGHBITDEPTH || CONFIG_LOOP_RESTORATION

552

static INLINE int highbd_vert_scalar_product(const uint16_t *a,

ptrdiff_t a_stride,

const int16_t *b) {

int sum = 0;

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];

return sum;

}

#endif

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

561

// TODO(afergs): Make sure this works too

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

562

#if CONFIG_LOOP_RESTORATION

563

static void convolve_add_src_horiz(const uint8_t *src, ptrdiff_t src_stride,

564

uint8_t *dst, ptrdiff_t dst_stride,

565

const InterpKernel *x_filters, int x0_q4,

566

int x_step_q4, int w, int h) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

567

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

568

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

569

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

570

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

571

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

572

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

573

574

const int sum = horz_scalar_product(src_x, x_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

575

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

576

src_x[SUBPEL_TAPS / 2 - 1]);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void convolve_add_src_vert(const uint8_t *src, ptrdiff_t src_stride,

585

uint8_t *dst, ptrdiff_t dst_stride,

586

const InterpKernel *y_filters, int y0_q4,

587

int y_step_q4, int w, int h) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

588

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

589

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

590

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

591

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

592

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

593

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

594

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

595

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

596

dst[y * dst_stride] =

597

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

598

src_y[(SUBPEL_TAPS / 2 - 1) * src_stride]);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void convolve_add_src(const uint8_t *src, ptrdiff_t src_stride,

607

uint8_t *dst, ptrdiff_t dst_stride,

608

const InterpKernel *const x_filters, int x0_q4,

609

int x_step_q4, const InterpKernel *const y_filters,

610

int y0_q4, int y_step_q4, int w, int h) {

611

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

612

const int intermediate_height =

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

613

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

614

615

assert(w <= MAX_SB_SIZE);

616

assert(h <= MAX_SB_SIZE);

617

618

assert(y_step_q4 <= 32);

619

assert(x_step_q4 <= 32);

620

621

convolve_add_src_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

622

temp, MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

623

intermediate_height);

624

convolve_add_src_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,

625

dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h);

626

}

627

628

void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

629

uint8_t *dst, ptrdiff_t dst_stride,

630

const int16_t *filter_x, int x_step_q4,

631

const int16_t *filter_y, int y_step_q4,

632

int w, int h) {

633

const InterpKernel *const filters_x = get_filter_base(filter_x);

634

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_add_src_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride,

644

uint8_t *dst, ptrdiff_t dst_stride,

645

const int16_t *filter_x, int x_step_q4,

646

const int16_t *filter_y, int y_step_q4, int w,

647

int h) {

648

const InterpKernel *const filters_y = get_filter_base(filter_y);

649

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_add_src_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride,

659

uint8_t *dst, ptrdiff_t dst_stride,

660

const int16_t *filter_x, int x_step_q4,

661

const int16_t *filter_y, int y_step_q4, int w,

662

int h) {

663

const InterpKernel *const filters_x = get_filter_base(filter_x);

664

const int x0_q4 = get_filter_offset(filter_x, filters_x);

665

666

const InterpKernel *const filters_y = get_filter_base(filter_y);

667

const int y0_q4 = get_filter_offset(filter_y, filters_y);

668

669

convolve_add_src(src, src_stride, dst, dst_stride, filters_x, x0_q4,

670

x_step_q4, filters_y, y0_q4, y_step_q4, w, h);

671

}

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

672

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

673

static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,

674

uint16_t *dst, ptrdiff_t dst_stride,

675

const InterpKernel *x_filters, int x0_q4,

676

int x_step_q4, int w, int h) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

677

const int bd = 8;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

678

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

679

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

680

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

681

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

682

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

683

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

684

const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +

685

(1 << (bd + FILTER_BITS - 1));

686

const int sum = horz_scalar_product(src_x, x_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

687

dst[x] =

688

(uint16_t)clamp(ROUND_POWER_OF_TWO(sum, FILTER_BITS - EXTRAPREC_BITS),

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

689

0, EXTRAPREC_CLAMP_LIMIT(bd) - 1);

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride,

698

uint8_t *dst, ptrdiff_t dst_stride,

699

const InterpKernel *y_filters, int y0_q4,

700

int y_step_q4, int w, int h) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

701

const int bd = 8;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

702

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

703

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

704

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

705

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

706

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

707

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

708

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

709

const int rounding =

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

710

((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -

711

(1 << (bd + FILTER_BITS + EXTRAPREC_BITS - 1));

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

712

const int sum =

713

highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

714

dst[y * dst_stride] =

715

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS + EXTRAPREC_BITS));

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void convolve_add_src_hip(const uint8_t *src, ptrdiff_t src_stride,

724

uint8_t *dst, ptrdiff_t dst_stride,

725

const InterpKernel *const x_filters, int x0_q4,

726

int x_step_q4,

727

const InterpKernel *const y_filters, int y0_q4,

728

int y_step_q4, int w, int h) {

729

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

730

const int intermediate_height =

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

731

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

732

733

assert(w <= MAX_SB_SIZE);

734

assert(h <= MAX_SB_SIZE);

735

736

assert(y_step_q4 <= 32);

737

assert(x_step_q4 <= 32);

738

739

convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),

740

src_stride, temp, MAX_SB_SIZE, x_filters, x0_q4,

741

x_step_q4, w, intermediate_height);

742

convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

743

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride,

748

uint16_t *dst, ptrdiff_t dst_stride,

749

const int16_t *filter_x, int x_step_q4,

750

const int16_t *filter_y, int y_step_q4,

751

int w, int h) {

752

const InterpKernel *const filters_x = get_filter_base(filter_x);

753

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_add_src_horiz_hip(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride,

763

uint8_t *dst, ptrdiff_t dst_stride,

764

const int16_t *filter_x, int x_step_q4,

765

const int16_t *filter_y, int y_step_q4,

766

int w, int h) {

767

const InterpKernel *const filters_y = get_filter_base(filter_y);

768

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_add_src_vert_hip(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride,

778

uint8_t *dst, ptrdiff_t dst_stride,

779

const int16_t *filter_x, int x_step_q4,

780

const int16_t *filter_y, int y_step_q4, int w,

781

int h) {

782

const InterpKernel *const filters_x = get_filter_base(filter_x);

783

const int x0_q4 = get_filter_offset(filter_x, filters_x);

784

785

const InterpKernel *const filters_y = get_filter_base(filter_y);

786

const int y0_q4 = get_filter_offset(filter_y, filters_y);

787

788

convolve_add_src_hip(src, src_stride, dst, dst_stride, filters_x, x0_q4,

789

x_step_q4, filters_y, y0_q4, y_step_q4, w, h);

790

}

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

791

#endif // CONFIG_LOOP_RESTORATION

792

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

793

// TODO(afergs): Make sure this works too

Sebastien Alaiwan

71e8784

2017-04-12 16:03:28 +0200

[diff] [blame]

794

#if CONFIG_HIGHBITDEPTH

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

795

796

static INLINE int highbd_horz_scalar_product(const uint16_t *a,

797

const int16_t *b) {

798

int sum = 0;

799

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];

return sum;

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

803

static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,

804

uint8_t *dst8, ptrdiff_t dst_stride,

805

const InterpKernel *x_filters, int x0_q4,

806

int x_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

807

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

808

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

809

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

810

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

811

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

812

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

813

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

814

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

815

const int sum = highbd_horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

816

dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,

825

uint8_t *dst8, ptrdiff_t dst_stride,

826

const InterpKernel *x_filters, int x0_q4,

827

int x_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

828

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

829

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

830

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

831

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

832

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

833

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

834

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

835

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

836

const int sum = highbd_horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

837

dst[x] = ROUND_POWER_OF_TWO(

838

dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),

1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,

848

uint8_t *dst8, ptrdiff_t dst_stride,

849

const InterpKernel *y_filters, int y0_q4,

850

int y_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

851

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

852

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

853

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

854

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

855

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

856

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

857

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

858

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

859

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

860

dst[y * dst_stride] =

861

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,

870

uint8_t *dst8, ptrdiff_t dst_stride,

871

const InterpKernel *y_filters, int y0_q4,

872

int y_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

873

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

874

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

875

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

876

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

877

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

878

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

879

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

880

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

881

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

882

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

883

dst[y * dst_stride] +

884

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),

1);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,

894

uint8_t *dst, ptrdiff_t dst_stride,

895

const InterpKernel *const x_filters, int x0_q4,

896

int x_step_q4, const InterpKernel *const y_filters,

897

int y0_q4, int y_step_q4, int w, int h, int bd) {

898

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

899

// 2d filtering proceeds in 2 steps:

900

// (1) Interpolate horizontally into an intermediate buffer, temp.

901

// (2) Interpolate temp vertically to derive the sub-pixel result.

902

// Deriving the maximum number of rows in the temp buffer (135):

903

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

904

// --Largest block size is 64x64 pixels.

905

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

906

// original frame (in 1/16th pixel units).

907

// --Must round-up because block may be located at sub-pixel position.

908

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

909

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

910

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

911

const int intermediate_height =

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

912

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

913

914

assert(w <= MAX_SB_SIZE);

915

assert(h <= MAX_SB_SIZE);

916

assert(y_step_q4 <= 32);

917

assert(x_step_q4 <= 32);

918

919

highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

920

CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, x_filters, x0_q4,

921

x_step_q4, w, intermediate_height, bd);

922

highbd_convolve_vert(

923

CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

924

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);

925

}

926

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

927

void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

928

uint8_t *dst, ptrdiff_t dst_stride,

929

const int16_t *filter_x, int x_step_q4,

930

const int16_t *filter_y, int y_step_q4, int w,

931

int h, int bd) {

932

const InterpKernel *const filters_x = get_filter_base(filter_x);

933

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

938

x_step_q4, w, h, bd);

939

}

940

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

941

void aom_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

942

uint8_t *dst, ptrdiff_t dst_stride,

943

const int16_t *filter_x, int x_step_q4,

944

const int16_t *filter_y, int y_step_q4,

945

int w, int h, int bd) {

946

const InterpKernel *const filters_x = get_filter_base(filter_x);

947

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

952

x_step_q4, w, h, bd);

953

}

954

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

955

void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

956

uint8_t *dst, ptrdiff_t dst_stride,

957

const int16_t *filter_x, int x_step_q4,

958

const int16_t *filter_y, int y_step_q4, int w,

959

int h, int bd) {

960

const InterpKernel *const filters_y = get_filter_base(filter_y);

961

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

966

y_step_q4, w, h, bd);

967

}

968

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

969

void aom_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

970

uint8_t *dst, ptrdiff_t dst_stride,

971

const int16_t *filter_x, int x_step_q4,

972

const int16_t *filter_y, int y_step_q4,

973

int w, int h, int bd) {

974

const InterpKernel *const filters_y = get_filter_base(filter_y);

975

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

980

y_step_q4, w, h, bd);

981

}

982

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

983

void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

984

uint8_t *dst, ptrdiff_t dst_stride,

985

const int16_t *filter_x, int x_step_q4,

986

const int16_t *filter_y, int y_step_q4, int w,

987

int h, int bd) {

988

const InterpKernel *const filters_x = get_filter_base(filter_x);

989

const int x0_q4 = get_filter_offset(filter_x, filters_x);

990

991

const InterpKernel *const filters_y = get_filter_base(filter_y);

992

const int y0_q4 = get_filter_offset(filter_y, filters_y);

993

994

highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

995

filters_y, y0_q4, y_step_q4, w, h, bd);

996

}

997

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

998

void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

999

uint8_t *dst, ptrdiff_t dst_stride,

1000

const int16_t *filter_x, int x_step_q4,

1001

const int16_t *filter_y, int y_step_q4, int w,

1002

int h, int bd) {

1003

// Fixed size intermediate buffer places limits on parameters.

1004

DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

1005

assert(w <= MAX_SB_SIZE);

1006

assert(h <= MAX_SB_SIZE);

1007

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1008

aom_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1009

filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1010

aom_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1011

dst_stride, NULL, 0, NULL, 0, w, h, bd);

1012

}

1013

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1014

void aom_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1015

uint8_t *dst8, ptrdiff_t dst_stride,

1016

const int16_t *filter_x, int filter_x_stride,

1017

const int16_t *filter_y, int filter_y_stride,

1018

int w, int h, int bd) {

1019

int r;

1020

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1021

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1022

(void)filter_x;

1023

(void)filter_y;

1024

(void)filter_x_stride;

1025

(void)filter_y_stride;

1026

(void)bd;

1027

1028

for (r = h; r > 0; --r) {

1029

memcpy(dst, src, w * sizeof(uint16_t));

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1035

void aom_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1036

uint8_t *dst8, ptrdiff_t dst_stride,

1037

const int16_t *filter_x, int filter_x_stride,

1038

const int16_t *filter_y, int filter_y_stride,

1039

int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1040

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1041

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1042

(void)filter_x;

1043

(void)filter_y;

1044

(void)filter_x_stride;

1045

(void)filter_y_stride;

1046

(void)bd;

1047

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1048

for (int y = 0; y < h; ++y) {

1049

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1050

dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

}

src += src_stride;

dst += dst_stride;

}

}

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1056

1057

#if CONFIG_LOOP_RESTORATION

1058

static void highbd_convolve_add_src_horiz(const uint8_t *src8,

1059

ptrdiff_t src_stride, uint8_t *dst8,

1060

ptrdiff_t dst_stride,

1061

const InterpKernel *x_filters,

1062

int x0_q4, int x_step_q4, int w,

1063

int h, int bd) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1064

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1065

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1066

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1067

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1068

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1069

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1070

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

1071

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1072

const int sum = highbd_horz_scalar_product(src_x, x_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1073

dst[x] = clip_pixel_highbd(

1074

ROUND_POWER_OF_TWO(sum, FILTER_BITS) + src_x[SUBPEL_TAPS / 2 - 1],

bd);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_add_src_vert(const uint8_t *src8,

1084

ptrdiff_t src_stride, uint8_t *dst8,

1085

ptrdiff_t dst_stride,

1086

const InterpKernel *y_filters,

1087

int y0_q4, int y_step_q4, int w, int h,

1088

int bd) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1089

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1090

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1091

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1092

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1093

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1094

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1095

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

1096

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1097

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1098

dst[y * dst_stride] =

1099

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

1100

src_y[(SUBPEL_TAPS / 2 - 1) * src_stride],

bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_add_src(const uint8_t *src, ptrdiff_t src_stride,

1110

uint8_t *dst, ptrdiff_t dst_stride,

1111

const InterpKernel *const x_filters,

1112

int x0_q4, int x_step_q4,

1113

const InterpKernel *const y_filters,

1114

int y0_q4, int y_step_q4, int w, int h,

1115

int bd) {

1116

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

1117

// 2d filtering proceeds in 2 steps:

1118

// (1) Interpolate horizontally into an intermediate buffer, temp.

1119

// (2) Interpolate temp vertically to derive the sub-pixel result.

1120

// Deriving the maximum number of rows in the temp buffer (135):

1121

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

1122

// --Largest block size is 64x64 pixels.

1123

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

1124

// original frame (in 1/16th pixel units).

1125

// --Must round-up because block may be located at sub-pixel position.

1126

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

1127

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

1128

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

1129

const int intermediate_height =

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1130

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

1131

1132

assert(w <= MAX_SB_SIZE);

1133

assert(h <= MAX_SB_SIZE);

1134

assert(y_step_q4 <= 32);

1135

assert(x_step_q4 <= 32);

1136

1137

highbd_convolve_add_src_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),

1138

src_stride, CONVERT_TO_BYTEPTR(temp),

1139

MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

1140

intermediate_height, bd);

1141

highbd_convolve_add_src_vert(

1142

CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

1143

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);

1144

}

1145

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1146

void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride,

1147

uint8_t *dst, ptrdiff_t dst_stride,

1148

const int16_t *filter_x, int x_step_q4,

1149

const int16_t *filter_y, int y_step_q4,

1150

int w, int h, int bd) {

1151

const InterpKernel *const filters_x = get_filter_base(filter_x);

1152

const int x0_q4 = get_filter_offset(filter_x, filters_x);

1153

1154

const InterpKernel *const filters_y = get_filter_base(filter_y);

1155

const int y0_q4 = get_filter_offset(filter_y, filters_y);

1156

1157

highbd_convolve_add_src(src, src_stride, dst, dst_stride, filters_x, x0_q4,

1158

x_step_q4, filters_y, y0_q4, y_step_q4, w, h, bd);

1159

}

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1160

1161

static void highbd_convolve_add_src_horiz_hip(

1162

const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst,

1163

ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4,

1164

int x_step_q4, int w, int h, int bd) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

1165

const int extraprec_clamp_limit = EXTRAPREC_CLAMP_LIMIT(bd);

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1166

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1167

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1168

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1169

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1170

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1171

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

1172

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1173

const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +

1174

(1 << (bd + FILTER_BITS - 1));

1175

const int sum = highbd_horz_scalar_product(src_x, x_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1176

dst[x] =

1177

(uint16_t)clamp(ROUND_POWER_OF_TWO(sum, FILTER_BITS - EXTRAPREC_BITS),

1178

0, extraprec_clamp_limit - 1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_add_src_vert_hip(

1187

const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8,

1188

ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4,

1189

int y_step_q4, int w, int h, int bd) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1190

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1191

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1192

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1193

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1194

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1195

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

1196

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1197

const int rounding =

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

1198

((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -

1199

(1 << (bd + FILTER_BITS + EXTRAPREC_BITS - 1));

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1200

const int sum =

1201

highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1202

dst[y * dst_stride] = clip_pixel_highbd(

1203

ROUND_POWER_OF_TWO(sum, FILTER_BITS + EXTRAPREC_BITS), bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_add_src_hip(

1212

const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

1213

ptrdiff_t dst_stride, const InterpKernel *const x_filters, int x0_q4,

1214

int x_step_q4, const InterpKernel *const y_filters, int y0_q4,

1215

int y_step_q4, int w, int h, int bd) {

1216

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

1217

// 2d filtering proceeds in 2 steps:

1218

// (1) Interpolate horizontally into an intermediate buffer, temp.

1219

// (2) Interpolate temp vertically to derive the sub-pixel result.

1220

// Deriving the maximum number of rows in the temp buffer (135):

1221

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

1222

// --Largest block size is 64x64 pixels.

1223

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

1224

// original frame (in 1/16th pixel units).

1225

// --Must round-up because block may be located at sub-pixel position.

1226

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

1227

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

1228

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

1229

const int intermediate_height =

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1230

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

1231

1232

assert(w <= MAX_SB_SIZE);

1233

assert(h <= MAX_SB_SIZE);

1234

assert(y_step_q4 <= 32);

1235

assert(x_step_q4 <= 32);

1236

1237

highbd_convolve_add_src_horiz_hip(

1238

src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, MAX_SB_SIZE,

1239

x_filters, x0_q4, x_step_q4, w, intermediate_height, bd);

1240

highbd_convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

1241

MAX_SB_SIZE, dst, dst_stride, y_filters,

1242

y0_q4, y_step_q4, w, h, bd);

1243

}

1244

Debargha Mukherjee