Blame - aom_dsp/aom_convolve.c - avm

2016-08-22 16:08:15 -0700

[diff] [blame]

1

/*

Yaowu Xu

9c01aa1

2016-09-01 14:32:49 -0700

[diff] [blame]

2

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

3

*

Yaowu Xu

9c01aa1

2016-09-01 14:32:49 -0700

[diff] [blame]

4

* This source code is subject to the terms of the BSD 2 Clause License and

5

* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License

6

* was not distributed with this source code in the LICENSE file, you can

7

* obtain it at www.aomedia.org/license/software. If the Alliance for Open

8

* Media Patent License 1.0 was not distributed with this source code in the

9

* PATENTS file, you can obtain it at www.aomedia.org/license/patent.

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

*/

#include <assert.h>

#include <string.h>

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

15

#include "./aom_config.h"

16

#include "./aom_dsp_rtcd.h"

17

#include "aom/aom_integer.h"

18

#include "aom_dsp/aom_convolve.h"

19

#include "aom_dsp/aom_dsp_common.h"

20

#include "aom_dsp/aom_filter.h"

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

21

#include "aom_ports/mem.h"

22

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

23

static INLINE int horz_scalar_product(const uint8_t *a, const int16_t *b) {

24

int sum = 0;

25

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];

return sum;

}

static INLINE int vert_scalar_product(const uint8_t *a, ptrdiff_t a_stride,

30

const int16_t *b) {

31

int sum = 0;

32

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];

return sum;

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

36

static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,

37

uint8_t *dst, ptrdiff_t dst_stride,

38

const InterpKernel *x_filters, int x0_q4,

39

int x_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

40

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

41

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

42

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

43

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

44

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

45

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

46

const int sum = horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

47

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

55

static void convolve_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

56

uint8_t *dst, ptrdiff_t dst_stride,

57

const InterpKernel *x_filters, int x0_qn,

58

int x_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

59

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

60

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

61

int x_qn = x0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

62

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

63

const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS]; // q8

64

const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;

65

assert(x_filter_idx < SUBPEL_SHIFTS);

66

const int16_t *const x_filter = x_filters[x_filter_idx];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

67

const int sum = horz_scalar_product(src_x, x_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

68

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

x_qn += x_step_qn;

}

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

76

static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,

77

uint8_t *dst, ptrdiff_t dst_stride,

78

const InterpKernel *x_filters, int x0_q4,

79

int x_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

80

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

81

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

82

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

83

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

84

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

85

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

86

const int sum = horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

87

dst[x] = ROUND_POWER_OF_TWO(

88

dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

96

static void convolve_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

97

uint8_t *dst, ptrdiff_t dst_stride,

98

const InterpKernel *x_filters, int x0_qn,

99

int x_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

100

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

101

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

102

int x_qn = x0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

103

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

104

const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];

105

const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;

106

assert(x_filter_idx < SUBPEL_SHIFTS);

107

const int16_t *const x_filter = x_filters[x_filter_idx];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

108

const int sum = horz_scalar_product(src_x, x_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

109

dst[x] = ROUND_POWER_OF_TWO(

110

dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);

x_qn += x_step_qn;

}

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

118

static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,

119

uint8_t *dst, ptrdiff_t dst_stride,

120

const InterpKernel *y_filters, int y0_q4,

121

int y_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

122

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

123

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

124

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

125

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

126

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

127

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

128

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

129

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

130

dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

138

static void convolve_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

139

uint8_t *dst, ptrdiff_t dst_stride,

140

const InterpKernel *y_filters, int y0_qn,

141

int y_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

142

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

143

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

144

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

145

int y_qn = y0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

146

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

147

const unsigned char *src_y =

148

&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];

149

const int16_t *const y_filter =

150

y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

151

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

152

dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));

y_qn += y_step_qn;

}

++src;

++dst;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

160

static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,

161

uint8_t *dst, ptrdiff_t dst_stride,

162

const InterpKernel *y_filters, int y0_q4,

163

int y_step_q4, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

164

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

165

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

166

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

167

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

168

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

169

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

170

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

171

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

172

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

173

dst[y * dst_stride] +

174

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),

1);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

183

static void convolve_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

184

uint8_t *dst, ptrdiff_t dst_stride,

185

const InterpKernel *y_filters, int y0_qn,

186

int y_step_qn, int w, int h) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

187

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

188

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

189

for (int x = 0; x < w; ++x) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

190

int y_qn = y0_qn;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

191

for (int y = 0; y < h; ++y) {

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

192

const unsigned char *src_y =

193

&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];

194

const int16_t *const y_filter =

195

y_filters[(y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

196

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

197

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

198

dst[y * dst_stride] +

199

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),

1);

y_qn += y_step_qn;

}

++src;

++dst;

}

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

208

static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

209

ptrdiff_t dst_stride, const InterpKernel *const x_filters,

210

int x0_q4, int x_step_q4,

211

const InterpKernel *const y_filters, int y0_q4,

212

int y_step_q4, int w, int h) {

213

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

214

// 2d filtering proceeds in 2 steps:

215

// (1) Interpolate horizontally into an intermediate buffer, temp.

216

// (2) Interpolate temp vertically to derive the sub-pixel result.

217

// Deriving the maximum number of rows in the temp buffer (135):

218

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

219

// --Largest block size is 64x64 pixels.

220

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

221

// original frame (in 1/16th pixel units).

222

// --Must round-up because block may be located at sub-pixel position.

223

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

224

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

225

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

226

const int intermediate_height =

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

227

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

228

229

assert(w <= MAX_SB_SIZE);

230

assert(h <= MAX_SB_SIZE);

231

232

assert(y_step_q4 <= 32);

233

assert(x_step_q4 <= 32);

234

235

convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp,

236

MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

237

intermediate_height);

238

convolve_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE, dst,

239

dst_stride, y_filters, y0_q4, y_step_q4, w, h);

240

}

241

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

242

static void convolve_scale_c(const uint8_t *src, ptrdiff_t src_stride,

243

uint8_t *dst, ptrdiff_t dst_stride,

244

const InterpKernel *const x_filters, int x0_qn,

245

int x_step_qn, const InterpKernel *const y_filters,

246

int y0_qn, int y_step_qn, int w, int h) {

247

// TODO(afergs): Update comment here

248

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

249

// 2d filtering proceeds in 2 steps:

250

// (1) Interpolate horizontally into an intermediate buffer, temp.

251

// (2) Interpolate temp vertically to derive the sub-pixel result.

252

// Deriving the maximum number of rows in the temp buffer (135):

253

// --Smallest scaling factor is x1/2 ==> y_step_qn = 32 (Normative).

254

// --Largest block size is 64x64 pixels.

255

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

256

// original frame (in 1/16th pixel units).

257

// --Must round-up because block may be located at sub-pixel position.

258

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

259

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

260

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

261

const int intermediate_height =

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

262

(((h - 1) * y_step_qn + y0_qn) >> SCALE_SUBPEL_BITS) + SUBPEL_TAPS;

263

264

assert(w <= MAX_SB_SIZE);

265

assert(h <= MAX_SB_SIZE);

266

267

assert(y_step_qn <= SCALE_SUBPEL_BITS * 2);

268

assert(x_step_qn <= SCALE_SUBPEL_BITS * 2);

269

270

convolve_horiz_scale_c(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

271

temp, MAX_SB_SIZE, x_filters, x0_qn, x_step_qn, w,

272

intermediate_height);

273

convolve_vert_scale_c(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,

274

dst, dst_stride, y_filters, y0_qn, y_step_qn, w, h);

275

}

276

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

277

static const InterpKernel *get_filter_base(const int16_t *filter) {

278

// NOTE: This assumes that the filter table is 256-byte aligned.

279

// TODO(agrange) Modify to make independent of table alignment.

280

return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));

281

}

282

283

static int get_filter_offset(const int16_t *f, const InterpKernel *base) {

284

return (int)((const InterpKernel *)(intptr_t)f - base);

285

}

286

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

287

void aom_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

288

uint8_t *dst, ptrdiff_t dst_stride,

289

const int16_t *filter_x, int x_step_q4,

290

const int16_t *filter_y, int y_step_q4, int w,

291

int h) {

292

const InterpKernel *const filters_x = get_filter_base(filter_x);

293

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

302

void aom_convolve8_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

303

uint8_t *dst, ptrdiff_t dst_stride,

304

const int16_t *filter_x, int subpel_x,

305

int x_step_qn, const int16_t *filter_y,

306

int subpel_y, int y_step_qn, int w, int h) {

307

const InterpKernel *const filters_x = get_filter_base(filter_x);

(void)subpel_y;

(void)filter_y;

(void)y_step_qn;

convolve_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,

x_step_qn, w, h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

317

void aom_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

318

uint8_t *dst, ptrdiff_t dst_stride,

319

const int16_t *filter_x, int x_step_q4,

320

const int16_t *filter_y, int y_step_q4, int w,

321

int h) {

322

const InterpKernel *const filters_x = get_filter_base(filter_x);

323

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

332

void aom_convolve8_avg_horiz_scale_c(const uint8_t *src, ptrdiff_t src_stride,

333

uint8_t *dst, ptrdiff_t dst_stride,

334

const int16_t *filter_x, int subpel_x,

335

int x_step_qn, const int16_t *filter_y,

336

int subpel_y, int y_step_qn, int w,

337

int h) {

338

const InterpKernel *const filters_x = get_filter_base(filter_x);

(void)subpel_y;

(void)filter_y;

(void)y_step_qn;

convolve_avg_horiz_scale_c(src, src_stride, dst, dst_stride, filters_x,

345

subpel_x, x_step_qn, w, h);

346

}

347

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

348

void aom_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

349

uint8_t *dst, ptrdiff_t dst_stride,

350

const int16_t *filter_x, int x_step_q4,

351

const int16_t *filter_y, int y_step_q4, int w,

352

int h) {

353

const InterpKernel *const filters_y = get_filter_base(filter_y);

354

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,

w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

363

void aom_convolve8_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

364

uint8_t *dst, ptrdiff_t dst_stride,

365

const int16_t *filter_x, int subpel_x,

366

int x_step_qn, const int16_t *filter_y,

367

int subpel_y, int y_step_qn, int w, int h) {

368

const InterpKernel *const filters_y = get_filter_base(filter_y);

(void)subpel_x;

(void)filter_x;

(void)x_step_qn;

convolve_vert_scale_c(src, src_stride, dst, dst_stride, filters_y, subpel_y,

y_step_qn, w, h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

378

void aom_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

379

uint8_t *dst, ptrdiff_t dst_stride,

380

const int16_t *filter_x, int x_step_q4,

381

const int16_t *filter_y, int y_step_q4, int w,

382

int h) {

383

const InterpKernel *const filters_y = get_filter_base(filter_y);

384

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

393

void aom_convolve8_avg_vert_scale_c(const uint8_t *src, ptrdiff_t src_stride,

394

uint8_t *dst, ptrdiff_t dst_stride,

395

const int16_t *filter_x, int subpel_x,

396

int x_step_qn, const int16_t *filter_y,

397

int subpel_y, int y_step_qn, int w, int h) {

398

const InterpKernel *const filters_y = get_filter_base(filter_y);

(void)subpel_x;

(void)filter_x;

(void)x_step_qn;

convolve_avg_vert_scale_c(src, src_stride, dst, dst_stride, filters_y,

405

subpel_y, y_step_qn, w, h);

406

}

407

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

408

void aom_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

409

ptrdiff_t dst_stride, const int16_t *filter_x,

410

int x_step_q4, const int16_t *filter_y, int y_step_q4,

411

int w, int h) {

412

const InterpKernel *const filters_x = get_filter_base(filter_x);

413

const int x0_q4 = get_filter_offset(filter_x, filters_x);

414

415

const InterpKernel *const filters_y = get_filter_base(filter_y);

416

const int y0_q4 = get_filter_offset(filter_y, filters_y);

417

418

convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

419

filters_y, y0_q4, y_step_q4, w, h);

420

}

421

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

422

void aom_convolve8_scale_c(const uint8_t *src, ptrdiff_t src_stride,

423

uint8_t *dst, ptrdiff_t dst_stride,

424

const int16_t *filter_x, int subpel_x, int x_step_qn,

425

const int16_t *filter_y, int subpel_y, int y_step_qn,

426

int w, int h) {

427

const InterpKernel *const filters_x = get_filter_base(filter_x);

428

429

const InterpKernel *const filters_y = get_filter_base(filter_y);

430

431

convolve_scale_c(src, src_stride, dst, dst_stride, filters_x, subpel_x,

432

x_step_qn, filters_y, subpel_y, y_step_qn, w, h);

433

}

434

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

435

void aom_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

436

ptrdiff_t dst_stride, const int16_t *filter_x,

437

int x_step_q4, const int16_t *filter_y, int y_step_q4,

438

int w, int h) {

439

/* Fixed size intermediate buffer places limits on parameters. */

440

DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

441

assert(w <= MAX_SB_SIZE);

442

assert(h <= MAX_SB_SIZE);

443

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

444

aom_convolve8_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

445

filter_y, y_step_q4, w, h);

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

446

aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

h);

}

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

450

void aom_convolve8_avg_scale_c(const uint8_t *src, ptrdiff_t src_stride,

451

uint8_t *dst, ptrdiff_t dst_stride,

452

const int16_t *filter_x, int subpel_x,

453

int x_step_qn, const int16_t *filter_y,

454

int subpel_y, int y_step_qn, int w, int h) {

455

/* Fixed size intermediate buffer places limits on parameters. */

456

DECLARE_ALIGNED(16, uint8_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

457

assert(w <= MAX_SB_SIZE);

458

assert(h <= MAX_SB_SIZE);

459

460

aom_convolve8_scale_c(src, src_stride, temp, MAX_SB_SIZE, filter_x, subpel_x,

461

x_step_qn, filter_y, subpel_y, y_step_qn, w, h);

462

aom_convolve_avg_c(temp, MAX_SB_SIZE, dst, dst_stride, NULL, 0, NULL, 0, w,

h);

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

466

void aom_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

467

ptrdiff_t dst_stride, const int16_t *filter_x,

468

int filter_x_stride, const int16_t *filter_y,

469

int filter_y_stride, int w, int h) {

int r;

(void)filter_x;

(void)filter_x_stride;

474

(void)filter_y;

475

(void)filter_y_stride;

476

477

for (r = h; r > 0; --r) {

memcpy(dst, src, w);

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

484

void aom_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

485

ptrdiff_t dst_stride, const int16_t *filter_x,

486

int filter_x_stride, const int16_t *filter_y,

487

int filter_y_stride, int w, int h) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

488

(void)filter_x;

489

(void)filter_x_stride;

490

(void)filter_y;

491

(void)filter_y_stride;

492

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

493

for (int y = 0; y < h; ++y) {

494

for (int x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

501

void aom_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

502

ptrdiff_t dst_stride, const int16_t *filter_x,

503

int x_step_q4, const int16_t *filter_y, int y_step_q4,

504

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

505

aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

506

filter_y, y_step_q4, w, h);

507

}

508

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

509

void aom_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

510

ptrdiff_t dst_stride, const int16_t *filter_x,

511

int x_step_q4, const int16_t *filter_y, int y_step_q4,

512

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

513

aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

514

filter_y, y_step_q4, w, h);

515

}

516

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

517

void aom_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

518

ptrdiff_t dst_stride, const int16_t *filter_x,

519

int x_step_q4, const int16_t *filter_y, int y_step_q4,

520

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

521

aom_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

522

filter_y, y_step_q4, w, h);

523

}

524

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

525

void aom_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

526

uint8_t *dst, ptrdiff_t dst_stride,

527

const int16_t *filter_x, int x_step_q4,

528

const int16_t *filter_y, int y_step_q4, int w,

529

int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

530

aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

531

x_step_q4, filter_y, y_step_q4, w, h);

532

}

533

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

534

void aom_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

535

uint8_t *dst, ptrdiff_t dst_stride,

536

const int16_t *filter_x, int x_step_q4,

537

const int16_t *filter_y, int y_step_q4, int w,

538

int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

539

aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

540

x_step_q4, filter_y, y_step_q4, w, h);

541

}

542

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

543

void aom_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

544

ptrdiff_t dst_stride, const int16_t *filter_x,

545

int x_step_q4, const int16_t *filter_y, int y_step_q4,

546

int w, int h) {

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

547

aom_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

548

filter_y, y_step_q4, w, h);

549

}

550

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

551

static INLINE int highbd_vert_scalar_product(const uint16_t *a,

ptrdiff_t a_stride,

const int16_t *b) {

int sum = 0;

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k * a_stride] * b[k];

556

return sum;

557

}

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

558

Fergus Simpson

2017-06-27 11:23:34 -0700

[diff] [blame]

559

// TODO(afergs): Make sure this works too

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

560

#if CONFIG_LOOP_RESTORATION

561

static void convolve_add_src_horiz(const uint8_t *src, ptrdiff_t src_stride,

562

uint8_t *dst, ptrdiff_t dst_stride,

563

const InterpKernel *x_filters, int x0_q4,

564

int x_step_q4, int w, int h) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

565

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

566

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

567

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

568

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

569

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

570

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

571

572

const int sum = horz_scalar_product(src_x, x_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

573

dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

574

src_x[SUBPEL_TAPS / 2 - 1]);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void convolve_add_src_vert(const uint8_t *src, ptrdiff_t src_stride,

583

uint8_t *dst, ptrdiff_t dst_stride,

584

const InterpKernel *y_filters, int y0_q4,

585

int y_step_q4, int w, int h) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

586

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

587

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

588

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

589

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

590

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

591

const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

592

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

593

const int sum = vert_scalar_product(src_y, src_stride, y_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

594

dst[y * dst_stride] =

595

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

596

src_y[(SUBPEL_TAPS / 2 - 1) * src_stride]);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void convolve_add_src(const uint8_t *src, ptrdiff_t src_stride,

605

uint8_t *dst, ptrdiff_t dst_stride,

606

const InterpKernel *const x_filters, int x0_q4,

607

int x_step_q4, const InterpKernel *const y_filters,

608

int y0_q4, int y_step_q4, int w, int h) {

609

uint8_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

610

const int intermediate_height =

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

611

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

612

613

assert(w <= MAX_SB_SIZE);

614

assert(h <= MAX_SB_SIZE);

615

616

assert(y_step_q4 <= 32);

617

assert(x_step_q4 <= 32);

618

619

convolve_add_src_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

620

temp, MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

621

intermediate_height);

622

convolve_add_src_vert(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1), MAX_SB_SIZE,

623

dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h);

624

}

625

626

void aom_convolve8_add_src_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

627

uint8_t *dst, ptrdiff_t dst_stride,

628

const int16_t *filter_x, int x_step_q4,

629

const int16_t *filter_y, int y_step_q4,

630

int w, int h) {

631

const InterpKernel *const filters_x = get_filter_base(filter_x);

632

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_add_src_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

void aom_convolve8_add_src_vert_c(const uint8_t *src, ptrdiff_t src_stride,

642

uint8_t *dst, ptrdiff_t dst_stride,

643

const int16_t *filter_x, int x_step_q4,

644

const int16_t *filter_y, int y_step_q4, int w,

645

int h) {

646

const InterpKernel *const filters_y = get_filter_base(filter_y);

647

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_add_src_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride,

657

uint8_t *dst, ptrdiff_t dst_stride,

658

const int16_t *filter_x, int x_step_q4,

659

const int16_t *filter_y, int y_step_q4, int w,

660

int h) {

661

const InterpKernel *const filters_x = get_filter_base(filter_x);

662

const int x0_q4 = get_filter_offset(filter_x, filters_x);

663

664

const InterpKernel *const filters_y = get_filter_base(filter_y);

665

const int y0_q4 = get_filter_offset(filter_y, filters_y);

666

667

convolve_add_src(src, src_stride, dst, dst_stride, filters_x, x0_q4,

668

x_step_q4, filters_y, y0_q4, y_step_q4, w, h);

669

}

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

670

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

671

static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,

672

uint16_t *dst, ptrdiff_t dst_stride,

673

const InterpKernel *x_filters, int x0_q4,

674

int x_step_q4, int w, int h) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

675

const int bd = 8;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

676

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

677

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

678

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

679

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

680

const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

681

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

682

const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +

683

(1 << (bd + FILTER_BITS - 1));

684

const int sum = horz_scalar_product(src_x, x_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

685

dst[x] =

686

(uint16_t)clamp(ROUND_POWER_OF_TWO(sum, FILTER_BITS - EXTRAPREC_BITS),

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

687

0, EXTRAPREC_CLAMP_LIMIT(bd) - 1);

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void convolve_add_src_vert_hip(const uint16_t *src, ptrdiff_t src_stride,

696

uint8_t *dst, ptrdiff_t dst_stride,

697

const InterpKernel *y_filters, int y0_q4,

698

int y_step_q4, int w, int h) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

699

const int bd = 8;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

700

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

701

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

702

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

703

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

704

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

705

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

706

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

707

const int rounding =

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

708

((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -

709

(1 << (bd + FILTER_BITS + EXTRAPREC_BITS - 1));

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

710

const int sum =

711

highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

712

dst[y * dst_stride] =

713

clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS + EXTRAPREC_BITS));

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void convolve_add_src_hip(const uint8_t *src, ptrdiff_t src_stride,

722

uint8_t *dst, ptrdiff_t dst_stride,

723

const InterpKernel *const x_filters, int x0_q4,

724

int x_step_q4,

725

const InterpKernel *const y_filters, int y0_q4,

726

int y_step_q4, int w, int h) {

727

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

728

const int intermediate_height =

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

729

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

730

731

assert(w <= MAX_SB_SIZE);

732

assert(h <= MAX_SB_SIZE);

733

734

assert(y_step_q4 <= 32);

735

assert(x_step_q4 <= 32);

736

737

convolve_add_src_horiz_hip(src - src_stride * (SUBPEL_TAPS / 2 - 1),

738

src_stride, temp, MAX_SB_SIZE, x_filters, x0_q4,

739

x_step_q4, w, intermediate_height);

740

convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

741

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_horiz_hip_c(const uint8_t *src, ptrdiff_t src_stride,

746

uint16_t *dst, ptrdiff_t dst_stride,

747

const int16_t *filter_x, int x_step_q4,

748

const int16_t *filter_y, int y_step_q4,

749

int w, int h) {

750

const InterpKernel *const filters_x = get_filter_base(filter_x);

751

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

convolve_add_src_horiz_hip(src, src_stride, dst, dst_stride, filters_x, x0_q4,

x_step_q4, w, h);

}

void aom_convolve8_add_src_vert_hip_c(const uint16_t *src, ptrdiff_t src_stride,

761

uint8_t *dst, ptrdiff_t dst_stride,

762

const int16_t *filter_x, int x_step_q4,

763

const int16_t *filter_y, int y_step_q4,

764

int w, int h) {

765

const InterpKernel *const filters_y = get_filter_base(filter_y);

766

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

convolve_add_src_vert_hip(src, src_stride, dst, dst_stride, filters_y, y0_q4,

y_step_q4, w, h);

}

void aom_convolve8_add_src_hip_c(const uint8_t *src, ptrdiff_t src_stride,

776

uint8_t *dst, ptrdiff_t dst_stride,

777

const int16_t *filter_x, int x_step_q4,

778

const int16_t *filter_y, int y_step_q4, int w,

779

int h) {

780

const InterpKernel *const filters_x = get_filter_base(filter_x);

781

const int x0_q4 = get_filter_offset(filter_x, filters_x);

782

783

const InterpKernel *const filters_y = get_filter_base(filter_y);

784

const int y0_q4 = get_filter_offset(filter_y, filters_y);

785

786

convolve_add_src_hip(src, src_stride, dst, dst_stride, filters_x, x0_q4,

787

x_step_q4, filters_y, y0_q4, y_step_q4, w, h);

788

}

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

789

#endif // CONFIG_LOOP_RESTORATION

790

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

791

static INLINE int highbd_horz_scalar_product(const uint16_t *a,

792

const int16_t *b) {

793

int sum = 0;

794

for (int k = 0; k < SUBPEL_TAPS; ++k) sum += a[k] * b[k];

return sum;

}

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

798

static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,

799

uint8_t *dst8, ptrdiff_t dst_stride,

800

const InterpKernel *x_filters, int x0_q4,

801

int x_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

802

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

803

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

804

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

805

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

806

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

807

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

808

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

809

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

810

const int sum = highbd_horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

811

dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,

820

uint8_t *dst8, ptrdiff_t dst_stride,

821

const InterpKernel *x_filters, int x0_q4,

822

int x_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

823

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

824

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

825

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

826

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

827

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

828

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

829

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

830

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

831

const int sum = highbd_horz_scalar_product(src_x, x_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

832

dst[x] = ROUND_POWER_OF_TWO(

833

dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),

1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,

843

uint8_t *dst8, ptrdiff_t dst_stride,

844

const InterpKernel *y_filters, int y0_q4,

845

int y_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

846

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

847

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

848

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

849

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

850

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

851

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

852

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

853

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

854

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

855

dst[y * dst_stride] =

856

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,

865

uint8_t *dst8, ptrdiff_t dst_stride,

866

const InterpKernel *y_filters, int y0_q4,

867

int y_step_q4, int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

868

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

869

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

870

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

871

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

872

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

873

for (int y = 0; y < h; ++y) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

874

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

875

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

876

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

877

dst[y * dst_stride] = ROUND_POWER_OF_TWO(

878

dst[y * dst_stride] +

879

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),

1);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,

889

uint8_t *dst, ptrdiff_t dst_stride,

890

const InterpKernel *const x_filters, int x0_q4,

891

int x_step_q4, const InterpKernel *const y_filters,

892

int y0_q4, int y_step_q4, int w, int h, int bd) {

893

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

894

// 2d filtering proceeds in 2 steps:

895

// (1) Interpolate horizontally into an intermediate buffer, temp.

896

// (2) Interpolate temp vertically to derive the sub-pixel result.

897

// Deriving the maximum number of rows in the temp buffer (135):

898

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

899

// --Largest block size is 64x64 pixels.

900

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

901

// original frame (in 1/16th pixel units).

902

// --Must round-up because block may be located at sub-pixel position.

903

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

904

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

905

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

906

const int intermediate_height =

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

907

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

908

909

assert(w <= MAX_SB_SIZE);

910

assert(h <= MAX_SB_SIZE);

911

assert(y_step_q4 <= 32);

912

assert(x_step_q4 <= 32);

913

914

highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,

915

CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, x_filters, x0_q4,

916

x_step_q4, w, intermediate_height, bd);

917

highbd_convolve_vert(

918

CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

919

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);

920

}

921

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

922

void aom_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

923

uint8_t *dst, ptrdiff_t dst_stride,

924

const int16_t *filter_x, int x_step_q4,

925

const int16_t *filter_y, int y_step_q4, int w,

926

int h, int bd) {

927

const InterpKernel *const filters_x = get_filter_base(filter_x);

928

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

933

x_step_q4, w, h, bd);

934

}

935

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

936

void aom_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

937

uint8_t *dst, ptrdiff_t dst_stride,

938

const int16_t *filter_x, int x_step_q4,

939

const int16_t *filter_y, int y_step_q4,

940

int w, int h, int bd) {

941

const InterpKernel *const filters_x = get_filter_base(filter_x);

942

const int x0_q4 = get_filter_offset(filter_x, filters_x);

(void)filter_y;

(void)y_step_q4;

highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,

947

x_step_q4, w, h, bd);

948

}

949

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

950

void aom_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

951

uint8_t *dst, ptrdiff_t dst_stride,

952

const int16_t *filter_x, int x_step_q4,

953

const int16_t *filter_y, int y_step_q4, int w,

954

int h, int bd) {

955

const InterpKernel *const filters_y = get_filter_base(filter_y);

956

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

961

y_step_q4, w, h, bd);

962

}

963

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

964

void aom_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

965

uint8_t *dst, ptrdiff_t dst_stride,

966

const int16_t *filter_x, int x_step_q4,

967

const int16_t *filter_y, int y_step_q4,

968

int w, int h, int bd) {

969

const InterpKernel *const filters_y = get_filter_base(filter_y);

970

const int y0_q4 = get_filter_offset(filter_y, filters_y);

(void)filter_x;

(void)x_step_q4;

highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,

975

y_step_q4, w, h, bd);

976

}

977

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

978

void aom_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

979

uint8_t *dst, ptrdiff_t dst_stride,

980

const int16_t *filter_x, int x_step_q4,

981

const int16_t *filter_y, int y_step_q4, int w,

982

int h, int bd) {

983

const InterpKernel *const filters_x = get_filter_base(filter_x);

984

const int x0_q4 = get_filter_offset(filter_x, filters_x);

985

986

const InterpKernel *const filters_y = get_filter_base(filter_y);

987

const int y0_q4 = get_filter_offset(filter_y, filters_y);

988

989

highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,

990

filters_y, y0_q4, y_step_q4, w, h, bd);

991

}

992

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

993

void aom_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

994

uint8_t *dst, ptrdiff_t dst_stride,

995

const int16_t *filter_x, int x_step_q4,

996

const int16_t *filter_y, int y_step_q4, int w,

997

int h, int bd) {

998

// Fixed size intermediate buffer places limits on parameters.

999

DECLARE_ALIGNED(16, uint16_t, temp[MAX_SB_SIZE * MAX_SB_SIZE]);

1000

assert(w <= MAX_SB_SIZE);

1001

assert(h <= MAX_SB_SIZE);

1002

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1003

aom_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1004

filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1005

aom_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, dst,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1006

dst_stride, NULL, 0, NULL, 0, w, h, bd);

1007

}

1008

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1009

void aom_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1010

uint8_t *dst8, ptrdiff_t dst_stride,

1011

const int16_t *filter_x, int filter_x_stride,

1012

const int16_t *filter_y, int filter_y_stride,

1013

int w, int h, int bd) {

1014

int r;

1015

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1016

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1017

(void)filter_x;

1018

(void)filter_y;

1019

(void)filter_x_stride;

1020

(void)filter_y_stride;

1021

(void)bd;

1022

1023

for (r = h; r > 0; --r) {

1024

memcpy(dst, src, w * sizeof(uint16_t));

src += src_stride;

dst += dst_stride;

}

}

Yaowu Xu

2016-08-30 14:01:10 -0700

[diff] [blame]

1030

void aom_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1031

uint8_t *dst8, ptrdiff_t dst_stride,

1032

const int16_t *filter_x, int filter_x_stride,

1033

const int16_t *filter_y, int filter_y_stride,

1034

int w, int h, int bd) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1035

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1036

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1037

(void)filter_x;

1038

(void)filter_y;

1039

(void)filter_x_stride;

1040

(void)filter_y_stride;

1041

(void)bd;

1042

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1043

for (int y = 0; y < h; ++y) {

1044

for (int x = 0; x < w; ++x) {

Yaowu Xu

2016-08-22 16:08:15 -0700

[diff] [blame]

1045

dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);

}

src += src_stride;

dst += dst_stride;

}

}

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1051

1052

#if CONFIG_LOOP_RESTORATION

1053

static void highbd_convolve_add_src_horiz(const uint8_t *src8,

1054

ptrdiff_t src_stride, uint8_t *dst8,

1055

ptrdiff_t dst_stride,

1056

const InterpKernel *x_filters,

1057

int x0_q4, int x_step_q4, int w,

1058

int h, int bd) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1059

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1060

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1061

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1062

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1063

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1064

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1065

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

1066

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1067

const int sum = highbd_horz_scalar_product(src_x, x_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1068

dst[x] = clip_pixel_highbd(

1069

ROUND_POWER_OF_TWO(sum, FILTER_BITS) + src_x[SUBPEL_TAPS / 2 - 1],

bd);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_add_src_vert(const uint8_t *src8,

1079

ptrdiff_t src_stride, uint8_t *dst8,

1080

ptrdiff_t dst_stride,

1081

const InterpKernel *y_filters,

1082

int y0_q4, int y_step_q4, int w, int h,

1083

int bd) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1084

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1085

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1086

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1087

for (int x = 0; x < w; ++x) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1088

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1089

for (int y = 0; y < h; ++y) {

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1090

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

1091

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1092

const int sum = highbd_vert_scalar_product(src_y, src_stride, y_filter);

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1093

dst[y * dst_stride] =

1094

clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS) +

1095

src_y[(SUBPEL_TAPS / 2 - 1) * src_stride],

bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_add_src(const uint8_t *src, ptrdiff_t src_stride,

1105

uint8_t *dst, ptrdiff_t dst_stride,

1106

const InterpKernel *const x_filters,

1107

int x0_q4, int x_step_q4,

1108

const InterpKernel *const y_filters,

1109

int y0_q4, int y_step_q4, int w, int h,

1110

int bd) {

1111

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

1112

// 2d filtering proceeds in 2 steps:

1113

// (1) Interpolate horizontally into an intermediate buffer, temp.

1114

// (2) Interpolate temp vertically to derive the sub-pixel result.

1115

// Deriving the maximum number of rows in the temp buffer (135):

1116

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

1117

// --Largest block size is 64x64 pixels.

1118

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

1119

// original frame (in 1/16th pixel units).

1120

// --Must round-up because block may be located at sub-pixel position.

1121

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

1122

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

1123

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

1124

const int intermediate_height =

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1125

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

1126

1127

assert(w <= MAX_SB_SIZE);

1128

assert(h <= MAX_SB_SIZE);

1129

assert(y_step_q4 <= 32);

1130

assert(x_step_q4 <= 32);

1131

1132

highbd_convolve_add_src_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1),

1133

src_stride, CONVERT_TO_BYTEPTR(temp),

1134

MAX_SB_SIZE, x_filters, x0_q4, x_step_q4, w,

1135

intermediate_height, bd);

1136

highbd_convolve_add_src_vert(

1137

CONVERT_TO_BYTEPTR(temp) + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

1138

MAX_SB_SIZE, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h, bd);

1139

}

1140

David Barker

2016-12-15 15:39:10 +0000

[diff] [blame]

1141

void aom_highbd_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride,

1142

uint8_t *dst, ptrdiff_t dst_stride,

1143

const int16_t *filter_x, int x_step_q4,

1144

const int16_t *filter_y, int y_step_q4,

1145

int w, int h, int bd) {

1146

const InterpKernel *const filters_x = get_filter_base(filter_x);

1147

const int x0_q4 = get_filter_offset(filter_x, filters_x);

1148

1149

const InterpKernel *const filters_y = get_filter_base(filter_y);

1150

const int y0_q4 = get_filter_offset(filter_y, filters_y);

1151

1152

highbd_convolve_add_src(src, src_stride, dst, dst_stride, filters_x, x0_q4,

1153

x_step_q4, filters_y, y0_q4, y_step_q4, w, h, bd);

1154

}

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1155

1156

static void highbd_convolve_add_src_horiz_hip(

1157

const uint8_t *src8, ptrdiff_t src_stride, uint16_t *dst,

1158

ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4,

1159

int x_step_q4, int w, int h, int bd) {

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

1160

const int extraprec_clamp_limit = EXTRAPREC_CLAMP_LIMIT(bd);

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1161

uint16_t *src = CONVERT_TO_SHORTPTR(src8);

1162

src -= SUBPEL_TAPS / 2 - 1;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1163

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1164

int x_q4 = x0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1165

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1166

const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];

1167

const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1168

const int rounding = ((int)src_x[SUBPEL_TAPS / 2 - 1] << FILTER_BITS) +

1169

(1 << (bd + FILTER_BITS - 1));

1170

const int sum = highbd_horz_scalar_product(src_x, x_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1171

dst[x] =

1172

(uint16_t)clamp(ROUND_POWER_OF_TWO(sum, FILTER_BITS - EXTRAPREC_BITS),

1173

0, extraprec_clamp_limit - 1);

x_q4 += x_step_q4;

}

src += src_stride;

dst += dst_stride;

}

}

static void highbd_convolve_add_src_vert_hip(

1182

const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst8,

1183

ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4,

1184

int y_step_q4, int w, int h, int bd) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1185

uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);

1186

src -= src_stride * (SUBPEL_TAPS / 2 - 1);

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1187

for (int x = 0; x < w; ++x) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1188

int y_q4 = y0_q4;

Sebastien Alaiwan

2017-11-09 16:59:25 +0100

[diff] [blame]

1189

for (int y = 0; y < h; ++y) {

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1190

const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];

1191

const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1192

const int rounding =

Debargha Mukherjee

2017-05-25 12:07:47 -0700

[diff] [blame]

1193

((int)src_y[(SUBPEL_TAPS / 2 - 1) * src_stride] << FILTER_BITS) -

1194

(1 << (bd + FILTER_BITS + EXTRAPREC_BITS - 1));

Sebastien Alaiwan

2017-11-09 17:23:58 +0100

[diff] [blame]

1195

const int sum =

1196

highbd_vert_scalar_product(src_y, src_stride, y_filter) + rounding;

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1197

dst[y * dst_stride] = clip_pixel_highbd(

1198

ROUND_POWER_OF_TWO(sum, FILTER_BITS + EXTRAPREC_BITS), bd);

y_q4 += y_step_q4;

}

++src;

++dst;

}

}

static void highbd_convolve_add_src_hip(

1207

const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,

1208

ptrdiff_t dst_stride, const InterpKernel *const x_filters, int x0_q4,

1209

int x_step_q4, const InterpKernel *const y_filters, int y0_q4,

1210

int y_step_q4, int w, int h, int bd) {

1211

// Note: Fixed size intermediate buffer, temp, places limits on parameters.

1212

// 2d filtering proceeds in 2 steps:

1213

// (1) Interpolate horizontally into an intermediate buffer, temp.

1214

// (2) Interpolate temp vertically to derive the sub-pixel result.

1215

// Deriving the maximum number of rows in the temp buffer (135):

1216

// --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).

1217

// --Largest block size is 64x64 pixels.

1218

// --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the

1219

// original frame (in 1/16th pixel units).

1220

// --Must round-up because block may be located at sub-pixel position.

1221

// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.

1222

// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.

1223

uint16_t temp[MAX_EXT_SIZE * MAX_SB_SIZE];

Sebastien Alaiwan

2017-11-29 11:53:48 +0100

[diff] [blame]

1224

const int intermediate_height =

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1225

(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;

1226

1227

assert(w <= MAX_SB_SIZE);

1228

assert(h <= MAX_SB_SIZE);

1229

assert(y_step_q4 <= 32);

1230

assert(x_step_q4 <= 32);

1231

1232

highbd_convolve_add_src_horiz_hip(

1233

src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, MAX_SB_SIZE,

1234

x_filters, x0_q4, x_step_q4, w, intermediate_height, bd);

1235

highbd_convolve_add_src_vert_hip(temp + MAX_SB_SIZE * (SUBPEL_TAPS / 2 - 1),

1236

MAX_SB_SIZE, dst, dst_stride, y_filters,

1237

y0_q4, y_step_q4, w, h, bd);

1238

}

1239

Debargha Mukherjee

2017-05-12 10:44:03 -0700

[diff] [blame]

1240

void aom_highbd_convolve8_add_src_hip_c(const uint8_t *src,

1241

ptrdiff_t src_stride, uint8_t *dst,

1242

ptrdiff_t dst_stride,

1243

const int16_t *filter_x, int x_step_q4,

1244

const int16_t *filter_y, int y_step_q4,

1245

int w, int h, int bd) {

1246

const InterpKernel *const filters_x = get_filter_base(filter_x);

1247

const int x0_q4 = get_filter_offset(filter_x, filters_x);

1248

1249

const InterpKernel *const filters_y = get_filter_base(filter_y);

1250

const int y0_q4 = get_filter_offset(filter_y, filters_y);

1251

1252

highbd_convolve_add_src_hip(src, src_stride, dst, dst_stride, filters_x,

1253

x0_q4, x_step_q4, filters_y, y0_q4, y_step_q4, w,

1254

h, bd);

1255

}

David Barker