Blame - vpx_dsp/mips/itrans32_dspr2.c - aom

2013-10-24 11:29:04 +0530

[diff] [blame]

/*

*

* Use of this source code is governed by a BSD-style license

5

* that can be found in the LICENSE file in the root of the source

6

* tree. An additional intellectual property rights grant can be found

7

* in the file PATENTS. All contributing project authors may

8

* be found in the AUTHORS file in the root of the source tree.

*/

#include <assert.h>

#include <stdio.h>

#include "./vpx_config.h"

Jingning Han

bfad9d2

2015-08-03 10:50:32 -0700

[diff] [blame]

15

#include "vpx_dsp/mips/inv_txfm_dspr2.h"

Jingning Han

a9a1d4e

2015-07-24 10:27:23 -0700

[diff] [blame]

16

#include "vpx_dsp/txfm_common.h"

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

17

18

#if HAVE_DSPR2

Dmitry Kovalev

6e4a03e

2014-01-31 17:05:03 -0800

[diff] [blame]

19

static void idct32_rows_dspr2(const int16_t *input, int16_t *output,

20

uint32_t no_rows) {

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

21

int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;

22

int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;

23

int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;

24

int16_t step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27;

25

int16_t step1_28, step1_29, step1_30, step1_31;

26

int16_t step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;

27

int16_t step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13;

28

int16_t step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20;

29

int16_t step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27;

30

int16_t step2_28, step2_29, step2_30, step2_31;

31

int16_t step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14;

32

int16_t step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21;

33

int16_t step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28;

34

int16_t step3_29, step3_30, step3_31;

35

int temp0, temp1, temp2, temp3;

36

int load1, load2, load3, load4;

37

int result1, result2;

38

int temp21;

39

int i;

40

const int const_2_power_13 = 8192;

41

const int32_t *input_int;

42

Parag Salasakar

2013-10-31 12:12:34 +0530

[diff] [blame]

43

for (i = no_rows; i--; ) {

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

44

input_int = (const int32_t *)input;

45

46

if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] |

47

input_int[4] | input_int[5] | input_int[6] | input_int[7] |

48

input_int[8] | input_int[9] | input_int[10] | input_int[11] |

49

input_int[12] | input_int[13] | input_int[14] | input_int[15])) {

50

input += 32;

51

52

__asm__ __volatile__ (

53

"sh $zero, 0(%[output]) \n\t"

54

"sh $zero, 64(%[output]) \n\t"

55

"sh $zero, 128(%[output]) \n\t"

56

"sh $zero, 192(%[output]) \n\t"

57

"sh $zero, 256(%[output]) \n\t"

58

"sh $zero, 320(%[output]) \n\t"

59

"sh $zero, 384(%[output]) \n\t"

60

"sh $zero, 448(%[output]) \n\t"

61

"sh $zero, 512(%[output]) \n\t"

62

"sh $zero, 576(%[output]) \n\t"

63

"sh $zero, 640(%[output]) \n\t"

64

"sh $zero, 704(%[output]) \n\t"

65

"sh $zero, 768(%[output]) \n\t"

66

"sh $zero, 832(%[output]) \n\t"

67

"sh $zero, 896(%[output]) \n\t"

68

"sh $zero, 960(%[output]) \n\t"

69

"sh $zero, 1024(%[output]) \n\t"

70

"sh $zero, 1088(%[output]) \n\t"

71

"sh $zero, 1152(%[output]) \n\t"

72

"sh $zero, 1216(%[output]) \n\t"

73

"sh $zero, 1280(%[output]) \n\t"

74

"sh $zero, 1344(%[output]) \n\t"

75

"sh $zero, 1408(%[output]) \n\t"

76

"sh $zero, 1472(%[output]) \n\t"

77

"sh $zero, 1536(%[output]) \n\t"

78

"sh $zero, 1600(%[output]) \n\t"

79

"sh $zero, 1664(%[output]) \n\t"

80

"sh $zero, 1728(%[output]) \n\t"

81

"sh $zero, 1792(%[output]) \n\t"

82

"sh $zero, 1856(%[output]) \n\t"

83

"sh $zero, 1920(%[output]) \n\t"

84

"sh $zero, 1984(%[output]) \n\t"

85

86

:

87

: [output] "r" (output)

);

output += 1;

continue;

}

/* prefetch row */

Jingning Han

2992739

2015-07-17 12:31:53 -0700

[diff] [blame]

96

prefetch_load((const uint8_t *)(input + 32));

97

prefetch_load((const uint8_t *)(input + 48));

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

98

99

__asm__ __volatile__ (

100

"lh %[load1], 2(%[input]) \n\t"

101

"lh %[load2], 62(%[input]) \n\t"

102

"lh %[load3], 34(%[input]) \n\t"

103

"lh %[load4], 30(%[input]) \n\t"

104

105

"mtlo %[const_2_power_13], $ac1 \n\t"

106

"mthi $zero, $ac1 \n\t"

107

"mtlo %[const_2_power_13], $ac3 \n\t"

108

"mthi $zero, $ac3 \n\t"

109

110

"madd $ac1, %[load1], %[cospi_31_64] \n\t"

111

"msub $ac1, %[load2], %[cospi_1_64] \n\t"

112

"extp %[temp0], $ac1, 31 \n\t"

113

114

"madd $ac3, %[load1], %[cospi_1_64] \n\t"

115

"madd $ac3, %[load2], %[cospi_31_64] \n\t"

116

"extp %[temp3], $ac3, 31 \n\t"

117

118

"mtlo %[const_2_power_13], $ac1 \n\t"

119

"mthi $zero, $ac1 \n\t"

120

"mtlo %[const_2_power_13], $ac2 \n\t"

121

"mthi $zero, $ac2 \n\t"

122

123

"madd $ac2, %[load3], %[cospi_15_64] \n\t"

124

"msub $ac2, %[load4], %[cospi_17_64] \n\t"

125

"extp %[temp1], $ac2, 31 \n\t"

126

127

"madd $ac1, %[load3], %[cospi_17_64] \n\t"

128

"madd $ac1, %[load4], %[cospi_15_64] \n\t"

129

"extp %[temp2], $ac1, 31 \n\t"

130

131

"mtlo %[const_2_power_13], $ac1 \n\t"

132

"mthi $zero, $ac1 \n\t"

133

"mtlo %[const_2_power_13], $ac3 \n\t"

134

"mthi $zero, $ac3 \n\t"

135

136

"sub %[load1], %[temp3], %[temp2] \n\t"

137

"sub %[load2], %[temp0], %[temp1] \n\t"

138

139

"madd $ac1, %[load1], %[cospi_28_64] \n\t"

140

"msub $ac1, %[load2], %[cospi_4_64] \n\t"

141

"madd $ac3, %[load1], %[cospi_4_64] \n\t"

142

"madd $ac3, %[load2], %[cospi_28_64] \n\t"

143

144

"extp %[step1_17], $ac1, 31 \n\t"

145

"extp %[step1_30], $ac3, 31 \n\t"

146

"add %[step1_16], %[temp0], %[temp1] \n\t"

147

"add %[step1_31], %[temp2], %[temp3] \n\t"

148

149

: [load1] "=&r" (load1), [load2] "=&r" (load2),

150

[load3] "=&r" (load3), [load4] "=&r" (load4),

151

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

152

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

153

[step1_16] "=r" (step1_16), [step1_17] "=r" (step1_17),

154

[step1_30] "=r" (step1_30), [step1_31] "=r" (step1_31)

155

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

156

[cospi_31_64] "r" (cospi_31_64), [cospi_1_64] "r" (cospi_1_64),

157

[cospi_4_64] "r" (cospi_4_64), [cospi_17_64] "r" (cospi_17_64),

158

[cospi_15_64] "r" (cospi_15_64), [cospi_28_64] "r" (cospi_28_64)

159

);

160

161

__asm__ __volatile__ (

162

"lh %[load1], 18(%[input]) \n\t"

163

"lh %[load2], 46(%[input]) \n\t"

164

"lh %[load3], 50(%[input]) \n\t"

165

"lh %[load4], 14(%[input]) \n\t"

166

167

"mtlo %[const_2_power_13], $ac1 \n\t"

168

"mthi $zero, $ac1 \n\t"

169

"mtlo %[const_2_power_13], $ac3 \n\t"

170

"mthi $zero, $ac3 \n\t"

171

172

"madd $ac1, %[load1], %[cospi_23_64] \n\t"

173

"msub $ac1, %[load2], %[cospi_9_64] \n\t"

174

"extp %[temp0], $ac1, 31 \n\t"

175

176

"madd $ac3, %[load1], %[cospi_9_64] \n\t"

177

"madd $ac3, %[load2], %[cospi_23_64] \n\t"

178

"extp %[temp3], $ac3, 31 \n\t"

179

180

"mtlo %[const_2_power_13], $ac1 \n\t"

181

"mthi $zero, $ac1 \n\t"

182

"mtlo %[const_2_power_13], $ac2 \n\t"

183

"mthi $zero, $ac2 \n\t"

184

185

"madd $ac2, %[load3], %[cospi_7_64] \n\t"

186

"msub $ac2, %[load4], %[cospi_25_64] \n\t"

187

"extp %[temp1], $ac2, 31 \n\t"

188

189

"madd $ac1, %[load3], %[cospi_25_64] \n\t"

190

"madd $ac1, %[load4], %[cospi_7_64] \n\t"

191

"extp %[temp2], $ac1, 31 \n\t"

192

193

"mtlo %[const_2_power_13], $ac1 \n\t"

194

"mthi $zero, $ac1 \n\t"

195

"mtlo %[const_2_power_13], $ac3 \n\t"

196

"mthi $zero, $ac3 \n\t"

197

198

"sub %[load1], %[temp1], %[temp0] \n\t"

199

"sub %[load2], %[temp2], %[temp3] \n\t"

200

201

"msub $ac1, %[load1], %[cospi_28_64] \n\t"

202

"msub $ac1, %[load2], %[cospi_4_64] \n\t"

203

"msub $ac3, %[load1], %[cospi_4_64] \n\t"

204

"madd $ac3, %[load2], %[cospi_28_64] \n\t"

205

206

"extp %[step1_18], $ac1, 31 \n\t"

207

"extp %[step1_29], $ac3, 31 \n\t"

208

"add %[step1_19], %[temp0], %[temp1] \n\t"

209

"add %[step1_28], %[temp2], %[temp3] \n\t"

210

211

: [load1] "=&r" (load1), [load2] "=&r" (load2),

212

[load3] "=&r" (load3), [load4] "=&r" (load4),

213

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

214

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

215

[step1_18] "=r" (step1_18), [step1_19] "=r" (step1_19),

216

[step1_28] "=r" (step1_28), [step1_29] "=r" (step1_29)

217

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

218

[cospi_23_64] "r" (cospi_23_64), [cospi_9_64] "r" (cospi_9_64),

219

[cospi_4_64] "r" (cospi_4_64), [cospi_7_64] "r" (cospi_7_64),

220

[cospi_25_64] "r" (cospi_25_64), [cospi_28_64] "r" (cospi_28_64)

221

);

222

223

__asm__ __volatile__ (

224

"lh %[load1], 10(%[input]) \n\t"

225

"lh %[load2], 54(%[input]) \n\t"

226

"lh %[load3], 42(%[input]) \n\t"

227

"lh %[load4], 22(%[input]) \n\t"

228

229

"mtlo %[const_2_power_13], $ac1 \n\t"

230

"mthi $zero, $ac1 \n\t"

231

"mtlo %[const_2_power_13], $ac3 \n\t"

232

"mthi $zero, $ac3 \n\t"

233

234

"madd $ac1, %[load1], %[cospi_27_64] \n\t"

235

"msub $ac1, %[load2], %[cospi_5_64] \n\t"

236

"extp %[temp0], $ac1, 31 \n\t"

237

238

"madd $ac3, %[load1], %[cospi_5_64] \n\t"

239

"madd $ac3, %[load2], %[cospi_27_64] \n\t"

240

"extp %[temp3], $ac3, 31 \n\t"

241

242

"mtlo %[const_2_power_13], $ac1 \n\t"

243

"mthi $zero, $ac1 \n\t"

244

"mtlo %[const_2_power_13], $ac2 \n\t"

245

"mthi $zero, $ac2 \n\t"

246

247

"madd $ac2, %[load3], %[cospi_11_64] \n\t"

248

"msub $ac2, %[load4], %[cospi_21_64] \n\t"

249

"extp %[temp1], $ac2, 31 \n\t"

250

251

"madd $ac1, %[load3], %[cospi_21_64] \n\t"

252

"madd $ac1, %[load4], %[cospi_11_64] \n\t"

253

"extp %[temp2], $ac1, 31 \n\t"

254

255

"mtlo %[const_2_power_13], $ac1 \n\t"

256

"mthi $zero, $ac1 \n\t"

257

"mtlo %[const_2_power_13], $ac3 \n\t"

258

"mthi $zero, $ac3 \n\t"

259

260

"sub %[load1], %[temp0], %[temp1] \n\t"

261

"sub %[load2], %[temp3], %[temp2] \n\t"

262

263

"madd $ac1, %[load2], %[cospi_12_64] \n\t"

264

"msub $ac1, %[load1], %[cospi_20_64] \n\t"

265

"madd $ac3, %[load1], %[cospi_12_64] \n\t"

266

"madd $ac3, %[load2], %[cospi_20_64] \n\t"

267

268

"extp %[step1_21], $ac1, 31 \n\t"

269

"extp %[step1_26], $ac3, 31 \n\t"

270

"add %[step1_20], %[temp0], %[temp1] \n\t"

271

"add %[step1_27], %[temp2], %[temp3] \n\t"

272

273

: [load1] "=&r" (load1), [load2] "=&r" (load2),

274

[load3] "=&r" (load3), [load4] "=&r" (load4),

275

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

276

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

277

[step1_20] "=r" (step1_20), [step1_21] "=r" (step1_21),

278

[step1_26] "=r" (step1_26), [step1_27] "=r" (step1_27)

279

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

280

[cospi_27_64] "r" (cospi_27_64), [cospi_5_64] "r" (cospi_5_64),

281

[cospi_11_64] "r" (cospi_11_64), [cospi_21_64] "r" (cospi_21_64),

282

[cospi_12_64] "r" (cospi_12_64), [cospi_20_64] "r" (cospi_20_64)

283

);

284

285

__asm__ __volatile__ (

286

"lh %[load1], 26(%[input]) \n\t"

287

"lh %[load2], 38(%[input]) \n\t"

288

"lh %[load3], 58(%[input]) \n\t"

289

"lh %[load4], 6(%[input]) \n\t"

290

291

"mtlo %[const_2_power_13], $ac1 \n\t"

292

"mthi $zero, $ac1 \n\t"

293

"mtlo %[const_2_power_13], $ac3 \n\t"

294

"mthi $zero, $ac3 \n\t"

295

296

"madd $ac1, %[load1], %[cospi_19_64] \n\t"

297

"msub $ac1, %[load2], %[cospi_13_64] \n\t"

298

"extp %[temp0], $ac1, 31 \n\t"

299

300

"madd $ac3, %[load1], %[cospi_13_64] \n\t"

301

"madd $ac3, %[load2], %[cospi_19_64] \n\t"

302

"extp %[temp3], $ac3, 31 \n\t"

303

304

"mtlo %[const_2_power_13], $ac1 \n\t"

305

"mthi $zero, $ac1 \n\t"

306

"mtlo %[const_2_power_13], $ac2 \n\t"

307

"mthi $zero, $ac2 \n\t"

308

309

"madd $ac2, %[load3], %[cospi_3_64] \n\t"

310

"msub $ac2, %[load4], %[cospi_29_64] \n\t"

311

"extp %[temp1], $ac2, 31 \n\t"

312

313

"madd $ac1, %[load3], %[cospi_29_64] \n\t"

314

"madd $ac1, %[load4], %[cospi_3_64] \n\t"

315

"extp %[temp2], $ac1, 31 \n\t"

316

317

"mtlo %[const_2_power_13], $ac1 \n\t"

318

"mthi $zero, $ac1 \n\t"

319

"mtlo %[const_2_power_13], $ac3 \n\t"

320

"mthi $zero, $ac3 \n\t"

321

322

"sub %[load1], %[temp1], %[temp0] \n\t"

323

"sub %[load2], %[temp2], %[temp3] \n\t"

324

325

"msub $ac1, %[load1], %[cospi_12_64] \n\t"

326

"msub $ac1, %[load2], %[cospi_20_64] \n\t"

327

"msub $ac3, %[load1], %[cospi_20_64] \n\t"

328

"madd $ac3, %[load2], %[cospi_12_64] \n\t"

329

330

"extp %[step1_22], $ac1, 31 \n\t"

331

"extp %[step1_25], $ac3, 31 \n\t"

332

"add %[step1_23], %[temp0], %[temp1] \n\t"

333

"add %[step1_24], %[temp2], %[temp3] \n\t"

334

335

: [load1] "=&r" (load1), [load2] "=&r" (load2),

336

[load3] "=&r" (load3), [load4] "=&r" (load4),

337

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

338

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

339

[step1_22] "=r" (step1_22), [step1_23] "=r" (step1_23),

340

[step1_24] "=r" (step1_24), [step1_25] "=r" (step1_25)

341

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

342

[cospi_19_64] "r" (cospi_19_64), [cospi_13_64] "r" (cospi_13_64),

343

[cospi_3_64] "r" (cospi_3_64), [cospi_29_64] "r" (cospi_29_64),

344

[cospi_12_64] "r" (cospi_12_64), [cospi_20_64] "r" (cospi_20_64)

345

);

346

347

__asm__ __volatile__ (

348

"lh %[load1], 4(%[input]) \n\t"

349

"lh %[load2], 60(%[input]) \n\t"

350

"lh %[load3], 36(%[input]) \n\t"

351

"lh %[load4], 28(%[input]) \n\t"

352

353

"mtlo %[const_2_power_13], $ac1 \n\t"

354

"mthi $zero, $ac1 \n\t"

355

"mtlo %[const_2_power_13], $ac3 \n\t"

356

"mthi $zero, $ac3 \n\t"

357

358

"madd $ac1, %[load1], %[cospi_30_64] \n\t"

359

"msub $ac1, %[load2], %[cospi_2_64] \n\t"

360

"extp %[temp0], $ac1, 31 \n\t"

361

362

"madd $ac3, %[load1], %[cospi_2_64] \n\t"

363

"madd $ac3, %[load2], %[cospi_30_64] \n\t"

364

"extp %[temp3], $ac3, 31 \n\t"

365

366

"mtlo %[const_2_power_13], $ac1 \n\t"

367

"mthi $zero, $ac1 \n\t"

368

"mtlo %[const_2_power_13], $ac2 \n\t"

369

"mthi $zero, $ac2 \n\t"

370

371

"madd $ac2, %[load3], %[cospi_14_64] \n\t"

372

"msub $ac2, %[load4], %[cospi_18_64] \n\t"

373

"extp %[temp1], $ac2, 31 \n\t"

374

375

"madd $ac1, %[load3], %[cospi_18_64] \n\t"

376

"madd $ac1, %[load4], %[cospi_14_64] \n\t"

377

"extp %[temp2], $ac1, 31 \n\t"

378

379

"mtlo %[const_2_power_13], $ac1 \n\t"

380

"mthi $zero, $ac1 \n\t"

381

"mtlo %[const_2_power_13], $ac3 \n\t"

382

"mthi $zero, $ac3 \n\t"

383

384

"sub %[load1], %[temp0], %[temp1] \n\t"

385

"sub %[load2], %[temp3], %[temp2] \n\t"

386

387

"msub $ac1, %[load1], %[cospi_8_64] \n\t"

388

"madd $ac1, %[load2], %[cospi_24_64] \n\t"

389

"madd $ac3, %[load1], %[cospi_24_64] \n\t"

390

"madd $ac3, %[load2], %[cospi_8_64] \n\t"

391

392

"extp %[step2_9], $ac1, 31 \n\t"

393

"extp %[step2_14], $ac3, 31 \n\t"

394

"add %[step2_8], %[temp0], %[temp1] \n\t"

395

"add %[step2_15], %[temp2], %[temp3] \n\t"

396

397

: [load1] "=&r" (load1), [load2] "=&r" (load2),

398

[load3] "=&r" (load3), [load4] "=&r" (load4),

399

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

400

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

401

[step2_8] "=r" (step2_8), [step2_9] "=r" (step2_9),

402

[step2_14] "=r" (step2_14), [step2_15] "=r" (step2_15)

403

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

404

[cospi_30_64] "r" (cospi_30_64), [cospi_2_64] "r" (cospi_2_64),

405

[cospi_14_64] "r" (cospi_14_64), [cospi_18_64] "r" (cospi_18_64),

406

[cospi_8_64] "r" (cospi_8_64), [cospi_24_64] "r" (cospi_24_64)

407

);

408

409

__asm__ __volatile__ (

410

"lh %[load1], 20(%[input]) \n\t"

411

"lh %[load2], 44(%[input]) \n\t"

412

"lh %[load3], 52(%[input]) \n\t"

413

"lh %[load4], 12(%[input]) \n\t"

414

415

"mtlo %[const_2_power_13], $ac1 \n\t"

416

"mthi $zero, $ac1 \n\t"

417

"mtlo %[const_2_power_13], $ac3 \n\t"

418

"mthi $zero, $ac3 \n\t"

419

420

"madd $ac1, %[load1], %[cospi_22_64] \n\t"

421

"msub $ac1, %[load2], %[cospi_10_64] \n\t"

422

"extp %[temp0], $ac1, 31 \n\t"

423

424

"madd $ac3, %[load1], %[cospi_10_64] \n\t"

425

"madd $ac3, %[load2], %[cospi_22_64] \n\t"

426

"extp %[temp3], $ac3, 31 \n\t"

427

428

"mtlo %[const_2_power_13], $ac1 \n\t"

429

"mthi $zero, $ac1 \n\t"

430

"mtlo %[const_2_power_13], $ac2 \n\t"

431

"mthi $zero, $ac2 \n\t"

432

433

"madd $ac2, %[load3], %[cospi_6_64] \n\t"

434

"msub $ac2, %[load4], %[cospi_26_64] \n\t"

435

"extp %[temp1], $ac2, 31 \n\t"

436

437

"madd $ac1, %[load3], %[cospi_26_64] \n\t"

438

"madd $ac1, %[load4], %[cospi_6_64] \n\t"

439

"extp %[temp2], $ac1, 31 \n\t"

440

441

"mtlo %[const_2_power_13], $ac1 \n\t"

442

"mthi $zero, $ac1 \n\t"

443

"mtlo %[const_2_power_13], $ac3 \n\t"

444

"mthi $zero, $ac3 \n\t"

445

446

"sub %[load1], %[temp1], %[temp0] \n\t"

447

"sub %[load2], %[temp2], %[temp3] \n\t"

448

449

"msub $ac1, %[load1], %[cospi_24_64] \n\t"

450

"msub $ac1, %[load2], %[cospi_8_64] \n\t"

451

"madd $ac3, %[load2], %[cospi_24_64] \n\t"

452

"msub $ac3, %[load1], %[cospi_8_64] \n\t"

453

454

"extp %[step2_10], $ac1, 31 \n\t"

455

"extp %[step2_13], $ac3, 31 \n\t"

456

"add %[step2_11], %[temp0], %[temp1] \n\t"

457

"add %[step2_12], %[temp2], %[temp3] \n\t"

458

459

: [load1] "=&r" (load1), [load2] "=&r" (load2),

460

[load3] "=&r" (load3), [load4] "=&r" (load4),

461

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

462

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

463

[step2_10] "=r" (step2_10), [step2_11] "=r" (step2_11),

464

[step2_12] "=r" (step2_12), [step2_13] "=r" (step2_13)

465

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

466

[cospi_22_64] "r" (cospi_22_64), [cospi_10_64] "r" (cospi_10_64),

467

[cospi_6_64] "r" (cospi_6_64), [cospi_26_64] "r" (cospi_26_64),

468

[cospi_8_64] "r" (cospi_8_64), [cospi_24_64] "r" (cospi_24_64)

469

);

470

471

__asm__ __volatile__ (

472

"mtlo %[const_2_power_13], $ac0 \n\t"

473

"mthi $zero, $ac0 \n\t"

474

"sub %[temp0], %[step2_14], %[step2_13] \n\t"

475

"sub %[temp0], %[temp0], %[step2_9] \n\t"

476

"add %[temp0], %[temp0], %[step2_10] \n\t"

477

"madd $ac0, %[temp0], %[cospi_16_64] \n\t"

478

479

"mtlo %[const_2_power_13], $ac1 \n\t"

480

"mthi $zero, $ac1 \n\t"

481

"sub %[temp1], %[step2_14], %[step2_13] \n\t"

482

"add %[temp1], %[temp1], %[step2_9] \n\t"

483

"sub %[temp1], %[temp1], %[step2_10] \n\t"

484

"madd $ac1, %[temp1], %[cospi_16_64] \n\t"

485

486

"mtlo %[const_2_power_13], $ac2 \n\t"

487

"mthi $zero, $ac2 \n\t"

488

"sub %[temp0], %[step2_15], %[step2_12] \n\t"

489

"sub %[temp0], %[temp0], %[step2_8] \n\t"

490

"add %[temp0], %[temp0], %[step2_11] \n\t"

491

"madd $ac2, %[temp0], %[cospi_16_64] \n\t"

492

493

"mtlo %[const_2_power_13], $ac3 \n\t"

494

"mthi $zero, $ac3 \n\t"

495

"sub %[temp1], %[step2_15], %[step2_12] \n\t"

496

"add %[temp1], %[temp1], %[step2_8] \n\t"

497

"sub %[temp1], %[temp1], %[step2_11] \n\t"

498

"madd $ac3, %[temp1], %[cospi_16_64] \n\t"

499

500

"add %[step3_8], %[step2_8], %[step2_11] \n\t"

501

"add %[step3_9], %[step2_9], %[step2_10] \n\t"

502

"add %[step3_14], %[step2_13], %[step2_14] \n\t"

503

"add %[step3_15], %[step2_12], %[step2_15] \n\t"

504

505

"extp %[step3_10], $ac0, 31 \n\t"

506

"extp %[step3_13], $ac1, 31 \n\t"

507

"extp %[step3_11], $ac2, 31 \n\t"

508

"extp %[step3_12], $ac3, 31 \n\t"

509

510

: [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

511

[step3_8] "=r" (step3_8), [step3_9] "=r" (step3_9),

512

[step3_10] "=r" (step3_10), [step3_11] "=r" (step3_11),

513

[step3_12] "=r" (step3_12), [step3_13] "=r" (step3_13),

514

[step3_14] "=r" (step3_14), [step3_15] "=r" (step3_15)

515

: [const_2_power_13] "r" (const_2_power_13),

516

[step2_8] "r" (step2_8), [step2_9] "r" (step2_9),

517

[step2_10] "r" (step2_10), [step2_11] "r" (step2_11),

518

[step2_12] "r" (step2_12), [step2_13] "r" (step2_13),

519

[step2_14] "r" (step2_14), [step2_15] "r" (step2_15),

520

[cospi_16_64] "r" (cospi_16_64)

521

);

522

523

step2_18 = step1_17 - step1_18;

524

step2_29 = step1_30 - step1_29;

525

526

__asm__ __volatile__ (

527

"mtlo %[const_2_power_13], $ac0 \n\t"

528

"mthi $zero, $ac0 \n\t"

529

"msub $ac0, %[step2_18], %[cospi_8_64] \n\t"

530

"madd $ac0, %[step2_29], %[cospi_24_64] \n\t"

531

"extp %[step3_18], $ac0, 31 \n\t"

532

533

: [step3_18] "=r" (step3_18)

534

: [const_2_power_13] "r" (const_2_power_13),

535

[step2_18] "r" (step2_18), [step2_29] "r" (step2_29),

536

[cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)

537

);

538

539

temp21 = step2_18 * cospi_24_64 + step2_29 * cospi_8_64;

540

step3_29 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

541

542

step2_19 = step1_16 - step1_19;

543

step2_28 = step1_31 - step1_28;

544

545

__asm__ __volatile__ (

546

"mtlo %[const_2_power_13], $ac0 \n\t"

547

"mthi $zero, $ac0 \n\t"

548

"msub $ac0, %[step2_19], %[cospi_8_64] \n\t"

549

"madd $ac0, %[step2_28], %[cospi_24_64] \n\t"

550

"extp %[step3_19], $ac0, 31 \n\t"

551

552

: [step3_19] "=r" (step3_19)

553

: [const_2_power_13] "r" (const_2_power_13),

554

[step2_19] "r" (step2_19), [step2_28] "r" (step2_28),

555

[cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)

556

);

557

558

temp21 = step2_19 * cospi_24_64 + step2_28 * cospi_8_64;

559

step3_28 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

560

561

step3_16 = step1_16 + step1_19;

562

step3_17 = step1_17 + step1_18;

563

step3_30 = step1_29 + step1_30;

564

step3_31 = step1_28 + step1_31;

565

566

step2_20 = step1_23 - step1_20;

567

step2_27 = step1_24 - step1_27;

568

569

__asm__ __volatile__ (

570

"mtlo %[const_2_power_13], $ac0 \n\t"

571

"mthi $zero, $ac0 \n\t"

572

"msub $ac0, %[step2_20], %[cospi_24_64] \n\t"

573

"msub $ac0, %[step2_27], %[cospi_8_64] \n\t"

574

"extp %[step3_20], $ac0, 31 \n\t"

575

576

: [step3_20] "=r" (step3_20)

577

: [const_2_power_13] "r" (const_2_power_13),

578

[step2_20] "r" (step2_20), [step2_27] "r" (step2_27),

579

[cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)

580

);

581

582

temp21 = -step2_20 * cospi_8_64 + step2_27 * cospi_24_64;

583

step3_27 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

584

585

step2_21 = step1_22 - step1_21;

586

step2_26 = step1_25 - step1_26;

587

588

__asm__ __volatile__ (

589

"mtlo %[const_2_power_13], $ac1 \n\t"

590

"mthi $zero, $ac1 \n\t"

591

"msub $ac1, %[step2_21], %[cospi_24_64] \n\t"

592

"msub $ac1, %[step2_26], %[cospi_8_64] \n\t"

593

"extp %[step3_21], $ac1, 31 \n\t"

594

595

: [step3_21] "=r" (step3_21)

596

: [const_2_power_13] "r" (const_2_power_13),

597

[step2_21] "r" (step2_21), [step2_26] "r" (step2_26),

598

[cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)

599

);

600

601

temp21 = -step2_21 * cospi_8_64 + step2_26 * cospi_24_64;

602

step3_26 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

603

604

step3_22 = step1_21 + step1_22;

605

step3_23 = step1_20 + step1_23;

606

step3_24 = step1_24 + step1_27;

607

step3_25 = step1_25 + step1_26;

608

609

step2_16 = step3_16 + step3_23;

610

step2_17 = step3_17 + step3_22;

611

step2_18 = step3_18 + step3_21;

612

step2_19 = step3_19 + step3_20;

613

step2_20 = step3_19 - step3_20;

614

step2_21 = step3_18 - step3_21;

615

step2_22 = step3_17 - step3_22;

616

step2_23 = step3_16 - step3_23;

617

618

step2_24 = step3_31 - step3_24;

619

step2_25 = step3_30 - step3_25;

620

step2_26 = step3_29 - step3_26;

621

step2_27 = step3_28 - step3_27;

622

step2_28 = step3_28 + step3_27;

623

step2_29 = step3_29 + step3_26;

624

step2_30 = step3_30 + step3_25;

625

step2_31 = step3_31 + step3_24;

626

627

__asm__ __volatile__ (

628

"lh %[load1], 0(%[input]) \n\t"

629

"lh %[load2], 32(%[input]) \n\t"

630

"lh %[load3], 16(%[input]) \n\t"

631

"lh %[load4], 48(%[input]) \n\t"

632

633

"mtlo %[const_2_power_13], $ac1 \n\t"

634

"mthi $zero, $ac1 \n\t"

635

"mtlo %[const_2_power_13], $ac2 \n\t"

636

"mthi $zero, $ac2 \n\t"

637

"add %[result1], %[load1], %[load2] \n\t"

638

"sub %[result2], %[load1], %[load2] \n\t"

639

"madd $ac1, %[result1], %[cospi_16_64] \n\t"

640

"madd $ac2, %[result2], %[cospi_16_64] \n\t"

641

"extp %[temp0], $ac1, 31 \n\t"

642

"extp %[temp1], $ac2, 31 \n\t"

643

644

"mtlo %[const_2_power_13], $ac3 \n\t"

645

"mthi $zero, $ac3 \n\t"

646

"madd $ac3, %[load3], %[cospi_24_64] \n\t"

647

"msub $ac3, %[load4], %[cospi_8_64] \n\t"

648

"extp %[temp2], $ac3, 31 \n\t"

649

650

"mtlo %[const_2_power_13], $ac1 \n\t"

651

"mthi $zero, $ac1 \n\t"

652

"madd $ac1, %[load3], %[cospi_8_64] \n\t"

653

"madd $ac1, %[load4], %[cospi_24_64] \n\t"

654

"extp %[temp3], $ac1, 31 \n\t"

655

656

"add %[step1_0], %[temp0], %[temp3] \n\t"

657

"add %[step1_1], %[temp1], %[temp2] \n\t"

658

"sub %[step1_2], %[temp1], %[temp2] \n\t"

659

"sub %[step1_3], %[temp0], %[temp3] \n\t"

660

661

: [load1] "=&r" (load1), [load2] "=&r" (load2),

662

[load3] "=&r" (load3), [load4] "=&r" (load4),

663

[result1] "=&r" (result1), [result2] "=&r" (result2),

664

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

665

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

666

[step1_0] "=r" (step1_0), [step1_1] "=r" (step1_1),

667

[step1_2] "=r" (step1_2), [step1_3] "=r" (step1_3)

668

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

669

[cospi_16_64] "r" (cospi_16_64),

670

[cospi_24_64] "r" (cospi_24_64), [cospi_8_64] "r" (cospi_8_64)

);

__asm__ __volatile__ (

675

"lh %[load1], 8(%[input]) \n\t"

676

"lh %[load2], 56(%[input]) \n\t"

677

"lh %[load3], 40(%[input]) \n\t"

678

"lh %[load4], 24(%[input]) \n\t"

679

680

"mtlo %[const_2_power_13], $ac1 \n\t"

681

"mthi $zero, $ac1 \n\t"

682

"mtlo %[const_2_power_13], $ac3 \n\t"

683

"mthi $zero, $ac3 \n\t"

684

685

"madd $ac1, %[load1], %[cospi_28_64] \n\t"

686

"msub $ac1, %[load2], %[cospi_4_64] \n\t"

687

"extp %[temp0], $ac1, 31 \n\t"

688

689

"madd $ac3, %[load1], %[cospi_4_64] \n\t"

690

"madd $ac3, %[load2], %[cospi_28_64] \n\t"

691

"extp %[temp3], $ac3, 31 \n\t"

692

693

"mtlo %[const_2_power_13], $ac1 \n\t"

694

"mthi $zero, $ac1 \n\t"

695

"mtlo %[const_2_power_13], $ac2 \n\t"

696

"mthi $zero, $ac2 \n\t"

697

698

"madd $ac2, %[load3], %[cospi_12_64] \n\t"

699

"msub $ac2, %[load4], %[cospi_20_64] \n\t"

700

"extp %[temp1], $ac2, 31 \n\t"

701

702

"madd $ac1, %[load3], %[cospi_20_64] \n\t"

703

"madd $ac1, %[load4], %[cospi_12_64] \n\t"

704

"extp %[temp2], $ac1, 31 \n\t"

705

706

"mtlo %[const_2_power_13], $ac1 \n\t"

707

"mthi $zero, $ac1 \n\t"

708

"mtlo %[const_2_power_13], $ac3 \n\t"

709

"mthi $zero, $ac3 \n\t"

710

711

"sub %[load1], %[temp3], %[temp2] \n\t"

712

"sub %[load1], %[load1], %[temp0] \n\t"

713

"add %[load1], %[load1], %[temp1] \n\t"

714

715

"sub %[load2], %[temp0], %[temp1] \n\t"

716

"sub %[load2], %[load2], %[temp2] \n\t"

717

"add %[load2], %[load2], %[temp3] \n\t"

718

719

"madd $ac1, %[load1], %[cospi_16_64] \n\t"

720

"madd $ac3, %[load2], %[cospi_16_64] \n\t"

721

722

"extp %[step1_5], $ac1, 31 \n\t"

723

"extp %[step1_6], $ac3, 31 \n\t"

724

"add %[step1_4], %[temp0], %[temp1] \n\t"

725

"add %[step1_7], %[temp3], %[temp2] \n\t"

726

727

: [load1] "=&r" (load1), [load2] "=&r" (load2),

728

[load3] "=&r" (load3), [load4] "=&r" (load4),

729

[temp0] "=&r" (temp0), [temp1] "=&r" (temp1),

730

[temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

731

[step1_4] "=r" (step1_4), [step1_5] "=r" (step1_5),

732

[step1_6] "=r" (step1_6), [step1_7] "=r" (step1_7)

733

: [const_2_power_13] "r" (const_2_power_13), [input] "r" (input),

734

[cospi_20_64] "r" (cospi_20_64), [cospi_12_64] "r" (cospi_12_64),

735

[cospi_4_64] "r" (cospi_4_64), [cospi_28_64] "r" (cospi_28_64),

736

[cospi_16_64] "r" (cospi_16_64)

737

);

738

739

step2_0 = step1_0 + step1_7;

740

step2_1 = step1_1 + step1_6;

741

step2_2 = step1_2 + step1_5;

742

step2_3 = step1_3 + step1_4;

743

step2_4 = step1_3 - step1_4;

744

step2_5 = step1_2 - step1_5;

745

step2_6 = step1_1 - step1_6;

746

step2_7 = step1_0 - step1_7;

747

748

step1_0 = step2_0 + step3_15;

749

step1_1 = step2_1 + step3_14;

750

step1_2 = step2_2 + step3_13;

751

step1_3 = step2_3 + step3_12;

752

step1_4 = step2_4 + step3_11;

753

step1_5 = step2_5 + step3_10;

754

step1_6 = step2_6 + step3_9;

755

step1_7 = step2_7 + step3_8;

756

step1_8 = step2_7 - step3_8;

757

step1_9 = step2_6 - step3_9;

758

step1_10 = step2_5 - step3_10;

759

step1_11 = step2_4 - step3_11;

760

step1_12 = step2_3 - step3_12;

761

step1_13 = step2_2 - step3_13;

762

step1_14 = step2_1 - step3_14;

763

step1_15 = step2_0 - step3_15;

764

765

__asm__ __volatile__ (

766

"sub %[temp0], %[step2_27], %[step2_20] \n\t"

767

"mtlo %[const_2_power_13], $ac0 \n\t"

768

"mthi $zero, $ac0 \n\t"

769

"madd $ac0, %[temp0], %[cospi_16_64] \n\t"

770

"extp %[step1_20], $ac0, 31 \n\t"

771

772

: [temp0] "=&r" (temp0), [step1_20] "=r" (step1_20)

773

: [const_2_power_13] "r" (const_2_power_13),

774

[step2_20] "r" (step2_20), [step2_27] "r" (step2_27),

775

[cospi_16_64] "r" (cospi_16_64)

776

);

777

778

temp21 = (step2_20 + step2_27) * cospi_16_64;

779

step1_27 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

780

781

__asm__ __volatile__ (

782

"sub %[temp0], %[step2_26], %[step2_21] \n\t"

783

"mtlo %[const_2_power_13], $ac0 \n\t"

784

"mthi $zero, $ac0 \n\t"

785

"madd $ac0, %[temp0], %[cospi_16_64] \n\t"

786

"extp %[step1_21], $ac0, 31 \n\t"

787

788

: [temp0] "=&r" (temp0), [step1_21] "=r" (step1_21)

789

: [const_2_power_13] "r" (const_2_power_13),

790

[step2_26] "r" (step2_26), [step2_21] "r" (step2_21),

791

[cospi_16_64] "r" (cospi_16_64)

792

);

793

794

temp21 = (step2_21 + step2_26) * cospi_16_64;

795

step1_26 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

796

797

__asm__ __volatile__ (

798

"sub %[temp0], %[step2_25], %[step2_22] \n\t"

799

"mtlo %[const_2_power_13], $ac0 \n\t"

800

"mthi $zero, $ac0 \n\t"

801

"madd $ac0, %[temp0], %[cospi_16_64] \n\t"

802

"extp %[step1_22], $ac0, 31 \n\t"

803

804

: [temp0] "=&r" (temp0), [step1_22] "=r" (step1_22)

805

: [const_2_power_13] "r" (const_2_power_13),

806

[step2_25] "r" (step2_25), [step2_22] "r" (step2_22),

807

[cospi_16_64] "r" (cospi_16_64)

808

);

809

810

temp21 = (step2_22 + step2_25) * cospi_16_64;

811

step1_25 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

812

813

__asm__ __volatile__ (

814

"sub %[temp0], %[step2_24], %[step2_23] \n\t"

815

"mtlo %[const_2_power_13], $ac0 \n\t"

816

"mthi $zero, $ac0 \n\t"

817

"madd $ac0, %[temp0], %[cospi_16_64] \n\t"

818

"extp %[step1_23], $ac0, 31 \n\t"

819

820

: [temp0] "=&r" (temp0), [step1_23] "=r" (step1_23)

821

: [const_2_power_13] "r" (const_2_power_13),

822

[step2_24] "r" (step2_24), [step2_23] "r" (step2_23),

823

[cospi_16_64] "r" (cospi_16_64)

824

);

825

826

temp21 = (step2_23 + step2_24) * cospi_16_64;

827

step1_24 = (temp21 + DCT_CONST_ROUNDING) >> DCT_CONST_BITS;

828

829

// final stage

830

output[0 * 32] = step1_0 + step2_31;

831

output[1 * 32] = step1_1 + step2_30;

832

output[2 * 32] = step1_2 + step2_29;

833

output[3 * 32] = step1_3 + step2_28;

834

output[4 * 32] = step1_4 + step1_27;

835

output[5 * 32] = step1_5 + step1_26;

836

output[6 * 32] = step1_6 + step1_25;

837

output[7 * 32] = step1_7 + step1_24;

838

output[8 * 32] = step1_8 + step1_23;

839

output[9 * 32] = step1_9 + step1_22;

840

output[10 * 32] = step1_10 + step1_21;

841

output[11 * 32] = step1_11 + step1_20;

842

output[12 * 32] = step1_12 + step2_19;

843

output[13 * 32] = step1_13 + step2_18;

844

output[14 * 32] = step1_14 + step2_17;

845

output[15 * 32] = step1_15 + step2_16;

846

output[16 * 32] = step1_15 - step2_16;

847

output[17 * 32] = step1_14 - step2_17;

848

output[18 * 32] = step1_13 - step2_18;

849

output[19 * 32] = step1_12 - step2_19;

850

output[20 * 32] = step1_11 - step1_20;

851

output[21 * 32] = step1_10 - step1_21;

852

output[22 * 32] = step1_9 - step1_22;

853

output[23 * 32] = step1_8 - step1_23;

854

output[24 * 32] = step1_7 - step1_24;

855

output[25 * 32] = step1_6 - step1_25;

856

output[26 * 32] = step1_5 - step1_26;

857

output[27 * 32] = step1_4 - step1_27;

858

output[28 * 32] = step1_3 - step2_28;

859

output[29 * 32] = step1_2 - step2_29;

860

output[30 * 32] = step1_1 - step2_30;

861

output[31 * 32] = step1_0 - step2_31;

input += 32;

output += 1;

}

}

Jingning Han

2015-08-03 14:51:10 -0700

[diff] [blame]

868

void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

869

int dest_stride) {

870

DECLARE_ALIGNED(32, int16_t, out[32 * 32]);

871

int16_t *outptr = out;

872

uint32_t pos = 45;

873

874

/* bit positon for extract from acc */

875

__asm__ __volatile__ (

876

"wrdsp %[pos], 1 \n\t"

:

: [pos] "r" (pos)

);

// Rows

Dmitry Kovalev

6e4a03e

2014-01-31 17:05:03 -0800

[diff] [blame]

882

idct32_rows_dspr2(input, outptr, 32);

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

883

884

// Columns

Jingning Han

2015-08-03 14:51:10 -0700

[diff] [blame]

885

vpx_idct32_cols_add_blk_dspr2(out, dest, dest_stride);

Parag Salasakar

2013-10-24 11:29:04 +0530

[diff] [blame]

886

}

887

Jingning Han

2015-08-03 14:51:10 -0700

[diff] [blame]

888

void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,

Parag Salasakar

2013-10-31 12:12:34 +0530

[diff] [blame]

889

int stride) {

890

DECLARE_ALIGNED(32, int16_t, out[32 * 32]);

891

int16_t *outptr = out;

uint32_t i;

uint32_t pos = 45;

/* bit positon for extract from acc */

896

__asm__ __volatile__ (

897

"wrdsp %[pos], 1 \n\t"

:

: [pos] "r" (pos)

);

// Rows

Dmitry Kovalev

6e4a03e

2014-01-31 17:05:03 -0800

[diff] [blame]

903

idct32_rows_dspr2(input, outptr, 8);

Parag Salasakar

2013-10-31 12:12:34 +0530

[diff] [blame]

904

905

outptr += 8;

906

__asm__ __volatile__ (

907

"sw $zero, 0(%[outptr]) \n\t"

908

"sw $zero, 4(%[outptr]) \n\t"

909

"sw $zero, 8(%[outptr]) \n\t"

910

"sw $zero, 12(%[outptr]) \n\t"

911

"sw $zero, 16(%[outptr]) \n\t"

912

"sw $zero, 20(%[outptr]) \n\t"

913

"sw $zero, 24(%[outptr]) \n\t"

914

"sw $zero, 28(%[outptr]) \n\t"

915

"sw $zero, 32(%[outptr]) \n\t"

916

"sw $zero, 36(%[outptr]) \n\t"

917

"sw $zero, 40(%[outptr]) \n\t"

918

"sw $zero, 44(%[outptr]) \n\t"

919

920

:

921

: [outptr] "r" (outptr)

922

);

923

924

for (i = 0; i < 31; ++i) {

925

outptr += 32;

926

927

__asm__ __volatile__ (

928

"sw $zero, 0(%[outptr]) \n\t"

929

"sw $zero, 4(%[outptr]) \n\t"

930

"sw $zero, 8(%[outptr]) \n\t"

931

"sw $zero, 12(%[outptr]) \n\t"

932

"sw $zero, 16(%[outptr]) \n\t"

933

"sw $zero, 20(%[outptr]) \n\t"

934

"sw $zero, 24(%[outptr]) \n\t"

935

"sw $zero, 28(%[outptr]) \n\t"

936

"sw $zero, 32(%[outptr]) \n\t"

937

"sw $zero, 36(%[outptr]) \n\t"

938

"sw $zero, 40(%[outptr]) \n\t"

939

"sw $zero, 44(%[outptr]) \n\t"

940

941

:

942

: [outptr] "r" (outptr)

);

}

// Columns

Jingning Han

2015-08-03 14:51:10 -0700

[diff] [blame]

947

vpx_idct32_cols_add_blk_dspr2(out, dest, stride);

Parag Salasakar

2013-10-31 12:12:34 +0530

[diff] [blame]

948

}

949

Jingning Han

2015-08-03 14:51:10 -0700

[diff] [blame]

950

void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,

Parag Salasakar