blob: 1f125886e767f591dc86315cebd813d8e14094c2 [file] [log] [blame]
Johannaecbba62017-12-15 09:03:23 -08001##
2## Copyright (c) 2017, Alliance for Open Media. All rights reserved
3##
4## This source code is subject to the terms of the BSD 2 Clause License and
5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6## was not distributed with this source code in the LICENSE file, you can
7## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8## Media Patent License 1.0 was not distributed with this source code in the
9## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10##
Yaowu Xuf883b422016-08-30 14:01:10 -070011sub av1_common_forward_decls() {
12print <<EOF
13/*
14 * AV1
15 */
16
17#include "aom/aom_integer.h"
Lester Lu27319b62017-07-10 16:57:15 -070018#include "aom_dsp/txfm_common.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070019#include "av1/common/common.h"
20#include "av1/common/enums.h"
21#include "av1/common/quant_common.h"
22#include "av1/common/filter.h"
Angie Chiang674bffd2017-01-11 16:15:55 -080023#include "av1/common/convolve.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070024#include "av1/common/av1_txfm.h"
Rostislav Pehlivanovc7606972017-03-20 17:34:35 +000025#include "av1/common/odintrin.h"
Rupert Swarbrick13927862017-11-22 14:35:34 +000026#include "av1/common/restoration.h"
Rupert Swarbrick13927862017-11-22 14:35:34 +000027
Yaowu Xuf883b422016-08-30 14:01:10 -070028struct macroblockd;
29
30/* Encoder forward decls */
31struct macroblock;
Lester Lu27319b62017-07-10 16:57:15 -070032struct txfm_param;
Yaowu Xuf883b422016-08-30 14:01:10 -070033struct aom_variance_vtable;
34struct search_site_config;
Yaowu Xuf883b422016-08-30 14:01:10 -070035struct yv12_buffer_config;
David Turner486cc982018-11-09 15:48:58 +000036struct NN_CONFIG;
37typedef struct NN_CONFIG NN_CONFIG;
Luc Trudeau48f4b232018-02-21 17:01:14 -050038
Luc Trudeau48f4b232018-02-21 17:01:14 -050039/* Function pointers return by CfL functions */
40typedef void (*cfl_subsample_lbd_fn)(const uint8_t *input, int input_stride,
Luc Trudeau1f431172018-05-10 11:37:23 -040041 uint16_t *output_q3);
Luc Trudeau48f4b232018-02-21 17:01:14 -050042
43typedef void (*cfl_subsample_hbd_fn)(const uint16_t *input, int input_stride,
Luc Trudeau1f431172018-05-10 11:37:23 -040044 uint16_t *output_q3);
Luc Trudeau48f4b232018-02-21 17:01:14 -050045
Luc Trudeau1f431172018-05-10 11:37:23 -040046typedef void (*cfl_subtract_average_fn)(const uint16_t *src, int16_t *dst);
Luc Trudeau48f4b232018-02-21 17:01:14 -050047
Luc Trudeau8a192112018-05-09 22:38:28 -040048typedef void (*cfl_predict_lbd_fn)(const int16_t *src, uint8_t *dst,
Luc Trudeaue0cd7222018-03-07 13:53:07 -050049 int dst_stride, int alpha_q3);
Luc Trudeau48f4b232018-02-21 17:01:14 -050050
Luc Trudeau8a192112018-05-09 22:38:28 -040051typedef void (*cfl_predict_hbd_fn)(const int16_t *src, uint16_t *dst,
Luc Trudeaue0cd7222018-03-07 13:53:07 -050052 int dst_stride, int alpha_q3, int bd);
Yaowu Xuf883b422016-08-30 14:01:10 -070053EOF
54}
55forward_decls qw/av1_common_forward_decls/;
56
57# functions that are 64 bit only.
58$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
59if ($opts{arch} eq "x86_64") {
60 $mmx_x86_64 = 'mmx';
61 $sse2_x86_64 = 'sse2';
62 $ssse3_x86_64 = 'ssse3';
63 $avx_x86_64 = 'avx';
64 $avx2_x86_64 = 'avx2';
65}
66
Yaowu Xu028380e2018-04-02 10:45:02 -070067add_proto qw/void av1_convolve_horiz_rs/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn";
Debargha Mukherjee3a4959f2018-02-26 15:34:03 -080068specialize qw/av1_convolve_horiz_rs sse4_1/;
Yaowu Xud3e7c682017-12-21 14:08:25 -080069
Yaowu Xu028380e2018-04-02 10:45:02 -070070add_proto qw/void av1_highbd_convolve_horiz_rs/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd";
Debargha Mukherjee3a4959f2018-02-26 15:34:03 -080071specialize qw/av1_highbd_convolve_horiz_rs sse4_1/;
Debargha Mukherjee97137442017-11-01 10:36:02 -070072
Debargha Mukherjee568e9ee2018-03-14 18:34:50 -070073add_proto qw/void av1_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params";
Debargha Mukherjeee6ceeec2018-03-11 18:21:52 -070074
Debargha Mukherjee568e9ee2018-03-14 18:34:50 -070075add_proto qw/void av1_highbd_wiener_convolve_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, const ConvolveParams *conv_params, int bps";
Debargha Mukherjeee6ceeec2018-03-11 18:21:52 -070076
Sanampudi Venkata Rao7c9746d2018-05-17 12:26:13 +053077specialize qw/av1_wiener_convolve_add_src sse2 avx2 neon/;
Debargha Mukherjee568e9ee2018-03-14 18:34:50 -070078specialize qw/av1_highbd_wiener_convolve_add_src ssse3/;
79specialize qw/av1_highbd_wiener_convolve_add_src avx2/;
Debargha Mukherjeee6ceeec2018-03-11 18:21:52 -070080
Xing Jin37ee03b2018-07-25 10:15:42 +080081
Hui Sub1d534a2018-02-27 16:02:00 -080082# directional intra predictor functions
Yaowu Xu299577c2018-02-28 15:57:22 -080083add_proto qw/void av1_dr_prediction_z1/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy";
Victoria Zhislinae08559f2018-12-10 14:37:49 +030084specialize qw/av1_dr_prediction_z1 avx2/;
Yaowu Xu299577c2018-02-28 15:57:22 -080085add_proto qw/void av1_dr_prediction_z2/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int upsample_left, int dx, int dy";
Victoria Zhislinae08559f2018-12-10 14:37:49 +030086specialize qw/av1_dr_prediction_z2 avx2/;
Yaowu Xu299577c2018-02-28 15:57:22 -080087add_proto qw/void av1_dr_prediction_z3/, "uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_left, int dx, int dy";
Victoria Zhislinae08559f2018-12-10 14:37:49 +030088specialize qw/av1_dr_prediction_z3 avx2/;
Hui Sub1d534a2018-02-27 16:02:00 -080089
hui su5db97432016-10-14 16:10:14 -070090# FILTER_INTRA predictor functions
Yue Chen69d9aef2018-02-26 17:49:59 -080091add_proto qw/void av1_filter_intra_predictor/, "uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint8_t *above, const uint8_t *left, int mode";
92specialize qw/av1_filter_intra_predictor sse4_1/;
Yaowu Xuf883b422016-08-30 14:01:10 -070093
94# High bitdepth functions
Yaowu Xuf883b422016-08-30 14:01:10 -070095
Sebastien Alaiwanf4123632018-01-26 10:18:02 +010096#
97# Sub Pixel Filters
98#
99add_proto qw/void av1_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
Yaowu Xuf883b422016-08-30 14:01:10 -0700100
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100101add_proto qw/void av1_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
Yaowu Xuf883b422016-08-30 14:01:10 -0700102
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100103add_proto qw/void av1_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
104specialize qw/av1_highbd_convolve8/, "$sse2_x86_64";
Yaowu Xuf883b422016-08-30 14:01:10 -0700105
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100106add_proto qw/void av1_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
107specialize qw/av1_highbd_convolve8_horiz/, "$sse2_x86_64";
Yaowu Xuf883b422016-08-30 14:01:10 -0700108
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100109add_proto qw/void av1_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
110specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
Yaowu Xuf883b422016-08-30 14:01:10 -0700111
Yi Luo51281092017-06-26 16:36:15 -0700112#inv txfm
Angie Chiang1a2f2452018-02-07 19:42:13 -0800113add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
Venkat000f2f62018-07-05 12:03:05 +0530114specialize qw/av1_inv_txfm_add ssse3 avx2 neon/;
Angie Chiang1a2f2452018-02-07 19:42:13 -0800115
Venkatd3d68452018-08-08 12:14:45 +0530116add_proto qw/void av1_highbd_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
117specialize qw/av1_highbd_inv_txfm_add sse4_1 avx2/;
118
119add_proto qw/void av1_highbd_inv_txfm_add_4x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
120specialize qw/av1_highbd_inv_txfm_add_4x4 sse4_1/;
121add_proto qw/void av1_highbd_inv_txfm_add_8x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
Aniket Dhok0d851be2018-12-11 14:30:21 +0530122specialize qw/av1_highbd_inv_txfm_add_8x8 sse4_1/;
Remyaa5633022018-09-21 14:59:26 +0530123add_proto qw/void av1_highbd_inv_txfm_add_4x8/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
124specialize qw/av1_highbd_inv_txfm_add_4x8 sse4_1/;
125add_proto qw/void av1_highbd_inv_txfm_add_8x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
126specialize qw/av1_highbd_inv_txfm_add_8x4 sse4_1/;
Remyaffe7ae72018-09-24 19:59:00 +0530127add_proto qw/void av1_highbd_inv_txfm_add_4x16/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
128specialize qw/av1_highbd_inv_txfm_add_4x16 sse4_1/;
129add_proto qw/void av1_highbd_inv_txfm_add_16x4/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
130specialize qw/av1_highbd_inv_txfm_add_16x4 sse4_1/;
Venkatd3d68452018-08-08 12:14:45 +0530131
Angie Chiang8c489a82018-05-15 16:07:30 -0700132add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
133add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
134
Urvang Joshi2283d372017-10-02 17:16:45 -0700135add_proto qw/void av1_inv_txfm2d_add_4x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
136add_proto qw/void av1_inv_txfm2d_add_8x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
137add_proto qw/void av1_inv_txfm2d_add_8x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
138add_proto qw/void av1_inv_txfm2d_add_16x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
139add_proto qw/void av1_inv_txfm2d_add_16x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
140add_proto qw/void av1_inv_txfm2d_add_32x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
141add_proto qw/void av1_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100142specialize qw/av1_inv_txfm2d_add_4x4 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700143add_proto qw/void av1_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100144specialize qw/av1_inv_txfm2d_add_8x8 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700145add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Urvang Joshi2283d372017-10-02 17:16:45 -0700146add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100147
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100148add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
149add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
150add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
151add_proto qw/void av1_inv_txfm2d_add_16x64/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
152add_proto qw/void av1_inv_txfm2d_add_64x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Yaowu Xud3d41592018-02-14 13:26:52 -0800153
Debargha Mukherjee845057f2017-11-13 07:03:36 -0800154add_proto qw/void av1_inv_txfm2d_add_4x16/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
155add_proto qw/void av1_inv_txfm2d_add_16x4/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
156add_proto qw/void av1_inv_txfm2d_add_8x32/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
157add_proto qw/void av1_inv_txfm2d_add_32x8/, "const int32_t *input, uint16_t *output, int stride, TX_TYPE tx_type, int bd";
Yunqing Wang80360582017-03-28 17:17:23 -0700158
Hui Sub1d534a2018-02-27 16:02:00 -0800159# directional intra predictor functions
Yaowu Xu299577c2018-02-28 15:57:22 -0800160add_proto qw/void av1_highbd_dr_prediction_z1/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd";
Victoria Zhislina1468e602018-10-31 18:38:28 +0300161specialize qw/av1_highbd_dr_prediction_z1 avx2/;
Yaowu Xu299577c2018-02-28 15:57:22 -0800162add_proto qw/void av1_highbd_dr_prediction_z2/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int upsample_left, int dx, int dy, int bd";
Victoria Zhislinae08559f2018-12-10 14:37:49 +0300163specialize qw/av1_highbd_dr_prediction_z2 avx2/;
Yaowu Xu299577c2018-02-28 15:57:22 -0800164add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd";
Victoria Zhislina1468e602018-10-31 18:38:28 +0300165specialize qw/av1_highbd_dr_prediction_z3 avx2/;
Hui Sub1d534a2018-02-27 16:02:00 -0800166
Peng Binb356cdd2018-04-04 13:09:58 +0800167# build compound seg mask functions
168add_proto qw/void av1_build_compound_diffwtd_mask/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w";
Xing Jinaa218b22018-08-22 13:24:59 +0800169specialize qw/av1_build_compound_diffwtd_mask sse4_1 avx2/;
Katsuhisa Yuasa09deb322018-04-16 02:10:35 +0900170
171add_proto qw/void av1_build_compound_diffwtd_mask_highbd/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd";
172specialize qw/av1_build_compound_diffwtd_mask_highbd ssse3 avx2/;
173
Ravi Chaudharyea665b02018-04-10 18:39:47 +0530174add_proto qw/void av1_build_compound_diffwtd_mask_d16/, "uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0, int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w, ConvolveParams *conv_params, int bd";
Xing Jinaa218b22018-08-22 13:24:59 +0800175specialize qw/av1_build_compound_diffwtd_mask_d16 sse4_1 avx2 neon/;
Peng Binb356cdd2018-04-04 13:09:58 +0800176
Johann98dae942018-12-07 10:38:47 -0800177# Helper functions.
178add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
179specialize "av1_round_shift_array", qw/sse4_1 neon/;
180
Yaowu Xuf883b422016-08-30 14:01:10 -0700181#
182# Encoder functions below this point.
183#
184if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
185
James Zern1512fa92017-09-26 20:53:11 -0700186 # ENCODEMB INVOKE
Yaowu Xuf883b422016-08-30 14:01:10 -0700187
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100188 # the transform coefficients are held in 32-bit
189 # values, so the assembler code for av1_block_error can no longer be used.
190 add_proto qw/int64_t av1_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
191 specialize qw/av1_block_error avx2/;
Yaowu Xuf883b422016-08-30 14:01:10 -0700192
Peng Bin437cbae2018-04-19 20:39:22 +0800193 add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100194 specialize qw/av1_quantize_fp sse2 avx2/;
Yaowu Xuf883b422016-08-30 14:01:10 -0700195
Peng Bin437cbae2018-04-19 20:39:22 +0800196 add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100197 specialize qw/av1_quantize_fp_32x32 avx2/;
Yaowu Xuf883b422016-08-30 14:01:10 -0700198
Peng Bin437cbae2018-04-19 20:39:22 +0800199 add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Peng Bin3cfd8052018-04-18 12:59:50 +0800200 specialize qw/av1_quantize_fp_64x64 avx2/;
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100201
James Zern1512fa92017-09-26 20:53:11 -0700202 # fdct functions
Yaowu Xuf883b422016-08-30 14:01:10 -0700203
James Zern1512fa92017-09-26 20:53:11 -0700204 add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
Yaowu Xuf883b422016-08-30 14:01:10 -0700205
James Zern1512fa92017-09-26 20:53:11 -0700206 #fwd txfm
Angie Chiang7d8b13e2018-02-07 22:55:45 -0800207 add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
Xing Jin354773c2018-06-27 11:18:46 +0800208 specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2/;
Angie Chiang7d8b13e2018-02-07 22:55:45 -0800209
Urvang Joshi2283d372017-10-02 17:16:45 -0700210 add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman46fdae42018-09-28 09:38:02 +0530211 specialize qw/av1_fwd_txfm2d_4x8 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700212 add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman46fdae42018-09-28 09:38:02 +0530213 specialize qw/av1_fwd_txfm2d_8x4 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700214 add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Venkatd04f6c52018-09-12 10:42:33 +0530215 specialize qw/av1_fwd_txfm2d_8x16 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700216 add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Venkatd04f6c52018-09-12 10:42:33 +0530217 specialize qw/av1_fwd_txfm2d_16x8 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700218 add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman35012f92018-09-21 09:56:11 +0530219 specialize qw/av1_fwd_txfm2d_16x32 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700220 add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman35012f92018-09-21 09:56:11 +0530221 specialize qw/av1_fwd_txfm2d_32x16 sse4_1/;
Debargha Mukherjee69f914a2017-11-15 20:58:23 -0800222 add_proto qw/void av1_fwd_txfm2d_4x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman46fdae42018-09-28 09:38:02 +0530223 specialize qw/av1_fwd_txfm2d_4x16 sse4_1/;
Debargha Mukherjee69f914a2017-11-15 20:58:23 -0800224 add_proto qw/void av1_fwd_txfm2d_16x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman46fdae42018-09-28 09:38:02 +0530225 specialize qw/av1_fwd_txfm2d_16x4 sse4_1/;
Debargha Mukherjee69f914a2017-11-15 20:58:23 -0800226 add_proto qw/void av1_fwd_txfm2d_8x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Sumanb2d5d862018-09-25 11:22:20 +0530227 specialize qw/av1_fwd_txfm2d_8x32 sse4_1/;
Debargha Mukherjee69f914a2017-11-15 20:58:23 -0800228 add_proto qw/void av1_fwd_txfm2d_32x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Sumanb2d5d862018-09-25 11:22:20 +0530229 specialize qw/av1_fwd_txfm2d_32x8 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700230 add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100231 specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700232 add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100233 specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700234 add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100235 specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
Urvang Joshi2283d372017-10-02 17:16:45 -0700236 add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100237 specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
Debargha Mukherjee2b435012017-09-28 08:30:35 -0700238
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100239 add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Sumanfd5aae42018-09-17 18:10:04 +0530240 specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100241 add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman02526002018-10-17 17:51:49 +0530242 specialize qw/av1_fwd_txfm2d_32x64 sse4_1/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100243 add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman02526002018-10-17 17:51:49 +0530244 specialize qw/av1_fwd_txfm2d_64x32 sse4_1/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100245 add_proto qw/void av1_fwd_txfm2d_16x64/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman02526002018-10-17 17:51:49 +0530246 specialize qw/av1_fwd_txfm2d_16x64 sse4_1/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100247 add_proto qw/void av1_fwd_txfm2d_64x16/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
Satish Kumar Suman02526002018-10-17 17:51:49 +0530248 specialize qw/av1_fwd_txfm2d_64x16 sse4_1/;
Yaowu Xud3d41592018-02-14 13:26:52 -0800249
James Zern1512fa92017-09-26 20:53:11 -0700250 #
251 # Motion search
252 #
Sebastien Alaiwan84ed5f92018-04-25 16:36:53 +0200253 add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
Yi Luo0f4195c2017-06-27 16:07:28 -0700254
Sebastien Alaiwan84ed5f92018-04-25 16:36:53 +0200255 add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const MV *center_mv";
Yi Luo0f4195c2017-06-27 16:07:28 -0700256
Yunqing Wangfe5fefc2018-12-11 10:08:58 -0800257 # TODO(yunqing): Add back the optimizations.
258 # add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
259 # specialize qw/av1_temporal_filter_apply sse2 msa/;
Yi Luo0f4195c2017-06-27 16:07:28 -0700260
Peng Binf36be562018-09-07 15:28:46 +0800261 add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
Yaowu Xuf883b422016-08-30 14:01:10 -0700262
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100263 # ENCODEMB INVOKE
James Zern1512fa92017-09-26 20:53:11 -0700264
Sebastien Alaiwanf4123632018-01-26 10:18:02 +0100265 add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
266 specialize qw/av1_highbd_block_error sse2/;
James Zern1512fa92017-09-26 20:53:11 -0700267
Yunqing Wangfe5fefc2018-12-11 10:08:58 -0800268 # add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
James Zern1512fa92017-09-26 20:53:11 -0700269
Peng Bin0dcd23c2018-04-19 21:42:30 +0800270 add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
James Zern1512fa92017-09-26 20:53:11 -0700271 specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
272
273 add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
274
275 # End av1_high encoder functions
276
Linfeng Zhang0ba23e82017-12-20 16:27:28 -0800277 # txb
Katsuhisa Yuasa55c95f82018-04-03 00:51:22 +0900278 add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100279 specialize qw/av1_get_nz_map_contexts sse2/;
Peng Bin27d7ca92018-03-22 22:25:56 +0800280 add_proto qw/void av1_txb_init_levels/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels";
Xing Jina7106c32018-08-14 16:27:56 +0800281 specialize qw/av1_txb_init_levels sse4_1 avx2/;
Linfeng Zhang0ba23e82017-12-20 16:27:28 -0800282
Sebastien Alaiwanb51ef0d2017-09-28 11:58:42 +0200283 add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
Xing Jin81774882018-07-12 09:35:53 +0800284 specialize qw/av1_wedge_sse_from_residuals sse2 avx2/;
Sebastien Alaiwanb51ef0d2017-09-28 11:58:42 +0200285 add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
Xing Jin81774882018-07-12 09:35:53 +0800286 specialize qw/av1_wedge_sign_from_residuals sse2 avx2/;
Sebastien Alaiwanb51ef0d2017-09-28 11:58:42 +0200287 add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
Xing Jin81774882018-07-12 09:35:53 +0800288 specialize qw/av1_wedge_compute_delta_squares sse2 avx2/;
Yaowu Xuf883b422016-08-30 14:01:10 -0700289
PENGBINffda3772018-02-26 17:36:37 +0800290 # hash
Peng Bin8a204cd2018-04-08 13:07:35 +0800291 add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length";
292 specialize qw/av1_get_crc32c_value sse4_2/;
PENGBINffda3772018-02-26 17:36:37 +0800293
David Turner9042a3c2018-09-20 16:47:53 +0100294 add_proto qw/void av1_compute_stats/, "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H";
Xing Jin04c7c842018-07-30 17:08:30 +0800295 specialize qw/av1_compute_stats sse4_1 avx2/;
Xing Jina88f27b2018-07-30 16:57:53 +0800296
David Turnercfb52502018-09-25 15:38:39 +0100297 add_proto qw/void av1_compute_stats_highbd/, "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, aom_bit_depth_t bit_depth";
298 specialize qw/av1_compute_stats_highbd sse4_1 avx2/;
299
Xing Jina88f27b2018-07-30 16:57:53 +0800300 add_proto qw/int64_t av1_lowbd_pixel_proj_error/, " const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params";
301 specialize qw/av1_lowbd_pixel_proj_error sse4_1 avx2/;
David Turner1c6c5ad2018-10-03 17:10:52 +0100302
303 add_proto qw/int64_t av1_highbd_pixel_proj_error/, " const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params";
304 specialize qw/av1_highbd_pixel_proj_error sse4_1 avx2/;
David Turner1c573e02018-10-19 11:49:53 +0100305
306 add_proto qw/void av1_get_horver_correlation_full/, " const int16_t *diff, int stride, int w, int h, float *hcorr, float *vcorr";
307 specialize qw/av1_get_horver_correlation_full sse4_1 avx2/;
David Turner486cc982018-11-09 15:48:58 +0000308
309 add_proto qw/void av1_nn_predict/, " const float *input_nodes, const NN_CONFIG *const nn_config, float *const output";
310 specialize qw/av1_nn_predict sse3/;
Yaowu Xuf883b422016-08-30 14:01:10 -0700311}
312# end encoder functions
Michael Bebenita7227b652016-10-06 14:27:34 -0700313
314# Deringing Functions
315
Frederic Barbier1aeee2e2017-11-10 17:54:22 +0100316add_proto qw/int cdef_find_dir/, "const uint16_t *img, int stride, int32_t *var, int coeff_shift";
Johann83607dc2018-11-02 11:50:31 -0700317add_proto qw/void cdef_filter_block/, "uint8_t *dst8, uint16_t *dst16, int dstride, const uint16_t *in, int pri_strength, int sec_strength, int dir, int pri_damping, int sec_damping, int bsize, int coeff_shift";
Frederic Barbier1aeee2e2017-11-10 17:54:22 +0100318
319add_proto qw/void copy_rect8_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int v, int h";
320add_proto qw/void copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int v, int h";
321
322# VS compiling for 32 bit targets does not support vector types in
323# structs as arguments, which makes the v256 type of the intrinsics
324# hard to support, so optimizations for this target are disabled.
325if ($opts{config} !~ /libs-x86-win32-vs.*/) {
Steinar Midtskogen8322ff02017-12-20 15:39:52 +0100326 specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
327 specialize qw/cdef_filter_block sse2 ssse3 sse4_1 avx2 neon/;
328 specialize qw/copy_rect8_8bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
329 specialize qw/copy_rect8_16bit_to_16bit sse2 ssse3 sse4_1 avx2 neon/;
Michael Bebenita7227b652016-10-06 14:27:34 -0700330}
331
David Barkerd5dfa962017-01-10 15:06:08 +0000332# WARPED_MOTION / GLOBAL_MOTION functions
333
Sebastien Alaiwan48795802017-10-30 12:07:13 +0100334add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
Remya2c893e62018-08-08 17:54:06 +0530335specialize qw/av1_warp_affine sse4_1 neon/;
David Barker2bcf2802017-04-05 11:44:31 +0100336
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100337add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
338specialize qw/av1_highbd_warp_affine sse4_1/;
David Barkerd5dfa962017-01-10 15:06:08 +0000339
Sebastien Alaiwan48795802017-10-30 12:07:13 +0100340if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
David Barkeree674322017-05-10 15:43:02 +0100341 add_proto qw/double compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2";
Aniket Dhokcc0cc712018-12-18 12:33:30 +0530342 specialize qw/compute_cross_correlation sse4_1 avx2/;
David Barkeree674322017-05-10 15:43:02 +0100343}
344
David Barkerce110cc2017-02-22 10:38:59 +0000345# LOOP_RESTORATION functions
346
Debargha Mukherjee0d639a92018-02-26 14:54:55 -0800347add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
Venkat03504962018-06-26 08:41:26 +0530348specialize qw/apply_selfguided_restoration sse4_1 avx2 neon/;
David Barker506eb722017-03-08 13:35:49 +0000349
Debargha Mukherjeeeeb121c2018-09-10 16:20:28 -0700350add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
351 int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
352 int sgr_params_idx, int bit_depth, int highbd";
Venkat03504962018-06-26 08:41:26 +0530353specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
David Barkerce110cc2017-02-22 10:38:59 +0000354
David Barker8295c7c2017-06-02 15:21:43 +0100355# CONVOLVE_ROUND/COMPOUND_ROUND functions
Linfeng Zhangf880d182018-03-16 12:24:05 -0700356
Peng Bin3a0c2ed2018-07-19 16:24:00 +0800357add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
358add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
359add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
360add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
361add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
362add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
363add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
364add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
365add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
366add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
367add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
368add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
369add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
370add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
371add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
372add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
Linfeng Zhangf880d182018-03-16 12:24:05 -0700373
Peng Bin3a0c2ed2018-07-19 16:24:00 +0800374 add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
375 add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
Sachin Kumar Garg9974ee32018-05-23 14:55:52 +0530376
Remya Prakasand580a4d2018-05-24 15:32:35 +0530377 specialize qw/av1_convolve_2d_sr sse2 avx2 neon/;
Sachin Kumar Garg9974ee32018-05-23 14:55:52 +0530378 specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/;
Remya Prakasane915e3c2018-05-11 17:16:34 +0530379 specialize qw/av1_convolve_x_sr sse2 avx2 neon/;
Remya Prakasan981f2102018-05-17 20:03:21 +0530380 specialize qw/av1_convolve_y_sr sse2 avx2 neon/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100381 specialize qw/av1_convolve_2d_scale sse4_1/;
Yaowu Xu24ec4642018-11-01 16:49:42 -0700382 specialize qw/av1_jnt_convolve_2d sse2 ssse3 avx2 neon/;
Sachin Kumar Garg9974ee32018-05-23 14:55:52 +0530383 specialize qw/av1_jnt_convolve_2d_copy sse2 avx2 neon/;
Venkatdac71a72018-05-31 12:27:32 +0530384 specialize qw/av1_jnt_convolve_x sse2 avx2 neon/;
385 specialize qw/av1_jnt_convolve_y sse2 avx2 neon/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100386 specialize qw/av1_highbd_convolve_2d_copy_sr sse2 avx2/;
Linfeng Zhangf880d182018-03-16 12:24:05 -0700387 specialize qw/av1_highbd_convolve_2d_sr ssse3 avx2/;
388 specialize qw/av1_highbd_convolve_x_sr ssse3 avx2/;
389 specialize qw/av1_highbd_convolve_y_sr ssse3 avx2/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100390 specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
Sebastien Alaiwan50097302018-03-15 10:29:38 +0100391 specialize qw/av1_highbd_jnt_convolve_2d sse4_1 avx2/;
Ravi Chaudhary23c1d632018-03-22 11:12:56 +0530392 specialize qw/av1_highbd_jnt_convolve_x sse4_1 avx2/;
393 specialize qw/av1_highbd_jnt_convolve_y sse4_1 avx2/;
394 specialize qw/av1_highbd_jnt_convolve_2d_copy sse4_1 avx2/;
David Barker8295c7c2017-06-02 15:21:43 +0100395
Joe Young89d321f2017-09-14 15:59:43 -0700396# INTRA_EDGE functions
Yaowu Xu299577c2018-02-28 15:57:22 -0800397add_proto qw/void av1_filter_intra_edge/, "uint8_t *p, int sz, int strength";
398specialize qw/av1_filter_intra_edge sse4_1/;
399add_proto qw/void av1_upsample_intra_edge/, "uint8_t *p, int sz";
400specialize qw/av1_upsample_intra_edge sse4_1/;
Sarah Parker7cf7f0e2018-01-03 17:09:12 -0800401
Yaowu Xu299577c2018-02-28 15:57:22 -0800402add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";
403specialize qw/av1_filter_intra_edge_high sse4_1/;
404add_proto qw/void av1_upsample_intra_edge_high/, "uint16_t *p, int sz, int bd";
405specialize qw/av1_upsample_intra_edge_high sse4_1/;
Joe Young89d321f2017-09-14 15:59:43 -0700406
Luc Trudeaub4faea72017-12-15 16:44:01 -0500407# CFL
Luc Trudeau3ec16a32018-03-01 20:58:09 -0500408add_proto qw/cfl_subtract_average_fn get_subtract_average_fn/, "TX_SIZE tx_size";
Luc Trudeau22296e42018-04-04 10:38:01 -0400409specialize qw/get_subtract_average_fn sse2 avx2 neon vsx/;
Luc Trudeaub4faea72017-12-15 16:44:01 -0500410
Luc Trudeau3ec16a32018-03-01 20:58:09 -0500411add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd/, "TX_SIZE tx_size";
Luc Trudeau46929632018-02-16 15:09:26 -0500412specialize qw/cfl_get_luma_subsampling_420_lbd ssse3 avx2 neon/;
Luc Trudeaufe735c42018-01-31 22:36:13 -0500413
Luc Trudeau34061662018-03-27 20:10:49 -0400414add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd/, "TX_SIZE tx_size";
415specialize qw/cfl_get_luma_subsampling_422_lbd ssse3 avx2 neon/;
416
Luc Trudeau9ba35682018-03-23 21:08:15 -0400417add_proto qw/cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd/, "TX_SIZE tx_size";
418specialize qw/cfl_get_luma_subsampling_444_lbd ssse3 avx2 neon/;
419
Luc Trudeau069473b2018-02-28 11:57:14 -0500420add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd/, "TX_SIZE tx_size";
Luc Trudeau32b8af72018-03-30 18:38:02 -0400421specialize qw/cfl_get_luma_subsampling_420_hbd ssse3 avx2 neon/;
Luc Trudeau069473b2018-02-28 11:57:14 -0500422
Luc Trudeau576846a2018-04-05 10:39:54 -0400423add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd/, "TX_SIZE tx_size";
Luc Trudeau733dacf2018-04-05 12:34:43 -0400424specialize qw/cfl_get_luma_subsampling_422_hbd ssse3 avx2 neon/;
Luc Trudeau576846a2018-04-05 10:39:54 -0400425
Luc Trudeau87a6a4f2018-04-05 14:04:18 -0400426add_proto qw/cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd/, "TX_SIZE tx_size";
Luc Trudeaubee20882018-04-05 14:50:35 -0400427specialize qw/cfl_get_luma_subsampling_444_hbd ssse3 avx2 neon/;
Luc Trudeau87a6a4f2018-04-05 14:04:18 -0400428
Luc Trudeau3ec16a32018-03-01 20:58:09 -0500429add_proto qw/cfl_predict_lbd_fn get_predict_lbd_fn/, "TX_SIZE tx_size";
Luc Trudeau5905ac52018-03-08 13:22:23 -0500430specialize qw/get_predict_lbd_fn ssse3 avx2 neon/;
David Michael Barrc363ab72018-01-12 16:53:38 +0900431
Luc Trudeau3ec16a32018-03-01 20:58:09 -0500432add_proto qw/cfl_predict_hbd_fn get_predict_hbd_fn/, "TX_SIZE tx_size";
Luc Trudeau5905ac52018-03-08 13:22:23 -0500433specialize qw/get_predict_hbd_fn ssse3 avx2 neon/;
Luc Trudeaub4faea72017-12-15 16:44:01 -0500434
Sebastien Alaiwanda346922017-10-11 10:14:43 +02004351;