merge aom_asm_stubs.c and highbd_convolve_sse2.c
The allows the functions in highbd_convolve_sse2.c to be made static.
This fixes some -Wmissing-prototypes warnings.
This change is similar to what was done in libvpx:
c67a2e76a subpixel_8t sse2: resolve missing declarations
Bug: aomedia:3416
Change-Id: I473da99c88edfec47ca5b3384a74d3f076b565d9
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 27099d3..6d8e5a9 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -58,7 +58,6 @@
list(APPEND AOM_DSP_COMMON_INTRIN_SSE2
"${AOM_ROOT}/aom_dsp/x86/aom_convolve_copy_sse2.c"
- "${AOM_ROOT}/aom_dsp/x86/aom_asm_stubs.c"
"${AOM_ROOT}/aom_dsp/x86/convolve.h"
"${AOM_ROOT}/aom_dsp/x86/convolve_sse2.h"
"${AOM_ROOT}/aom_dsp/x86/fft_sse2.c"
diff --git a/aom_dsp/x86/aom_asm_stubs.c b/aom_dsp/x86/aom_asm_stubs.c
deleted file mode 100644
index 6c7fdd6..0000000
--- a/aom_dsp/x86/aom_asm_stubs.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/x86/convolve.h"
-
-#if HAVE_SSE2
-#if CONFIG_AV1_HIGHBITDEPTH
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_h8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_h8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_v8_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_h8_sse2;
-
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_v4_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_h4_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_v4_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_h4_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_v4_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_h4_sse2;
-
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d16_h2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d8_h2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_v2_sse2;
-highbd_filter8_1dfunction aom_highbd_filter_block1d4_h2_sse2;
-
-// void aom_highbd_convolve8_horiz_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-// void aom_highbd_convolve8_vert_sse2(const uint8_t *src,
-// ptrdiff_t src_stride,
-// uint8_t *dst,
-// ptrdiff_t dst_stride,
-// const int16_t *filter_x,
-// int x_step_q4,
-// const int16_t *filter_y,
-// int y_step_q4,
-// int w, int h, int bd);
-HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2)
-HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2)
-#endif
-#endif // HAVE_SSE2
diff --git a/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm b/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
index d392225..f84f8fa 100644
--- a/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
+++ b/aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
@@ -202,14 +202,15 @@
SECTION .text
-;void aom_filter_block1d4_v8_sse2
+;void aom_highbd_filter_block1d4_v8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d4_v8_sse2)
sym(aom_highbd_filter_block1d4_v8_sse2):
@@ -272,14 +273,15 @@
pop rbp
ret
-;void aom_filter_block1d8_v8_sse2
+;void aom_highbd_filter_block1d8_v8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d8_v8_sse2)
sym(aom_highbd_filter_block1d8_v8_sse2):
@@ -331,14 +333,15 @@
pop rbp
ret
-;void aom_filter_block1d16_v8_sse2
+;void aom_highbd_filter_block1d16_v8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d16_v8_sse2)
sym(aom_highbd_filter_block1d16_v8_sse2):
@@ -394,14 +397,15 @@
pop rbp
ret
-;void aom_filter_block1d4_h8_sse2
+;void aom_highbd_filter_block1d4_h8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d4_h8_sse2)
sym(aom_highbd_filter_block1d4_h8_sse2):
@@ -469,14 +473,15 @@
pop rbp
ret
-;void aom_filter_block1d8_h8_sse2
+;void aom_highbd_filter_block1d8_h8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d8_h8_sse2)
sym(aom_highbd_filter_block1d8_h8_sse2):
@@ -535,14 +540,15 @@
pop rbp
ret
-;void aom_filter_block1d16_h8_sse2
+;void aom_highbd_filter_block1d16_h8_sse2
;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
+; const uint16_t *src_ptr,
+; const ptrdiff_t src_pitch,
+; uint16_t *output_ptr,
+; ptrdiff_t out_pitch,
+; unsigned int output_height,
+; const int16_t *filter,
+; int bd
;)
globalsym(aom_highbd_filter_block1d16_h8_sse2)
sym(aom_highbd_filter_block1d16_h8_sse2):
diff --git a/aom_dsp/x86/highbd_convolve_sse2.c b/aom_dsp/x86/highbd_convolve_sse2.c
index a2bb283..40201aa 100644
--- a/aom_dsp/x86/highbd_convolve_sse2.c
+++ b/aom_dsp/x86/highbd_convolve_sse2.c
@@ -15,10 +15,9 @@
// -----------------------------------------------------------------------------
-void aom_highbd_filter_block1d4_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d4_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
__m128i filtersReg;
__m128i srcReg2, srcReg3, srcReg4, srcReg5, srcReg6;
__m128i srcReg23_lo, srcReg34_lo;
@@ -101,10 +100,9 @@
}
}
-void aom_highbd_filter_block1d4_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d4_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
__m128i filtersReg;
__m128i addFilterReg64;
__m128i secondFilters, thirdFilters;
@@ -153,10 +151,9 @@
}
}
-void aom_highbd_filter_block1d8_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d8_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
__m128i filtersReg;
__m128i srcReg2, srcReg3, srcReg4, srcReg5, srcReg6;
__m128i srcReg23_lo, srcReg23_hi, srcReg34_lo, srcReg34_hi;
@@ -262,10 +259,9 @@
}
}
-void aom_highbd_filter_block1d8_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d8_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
__m128i filtersReg;
__m128i addFilterReg64;
__m128i secondFilters, thirdFilters;
@@ -330,22 +326,57 @@
}
}
-void aom_highbd_filter_block1d16_v4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d16_v4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
aom_highbd_filter_block1d8_v4_sse2(src_ptr, src_pitch, dst_ptr, dst_pitch,
height, filter, bd);
aom_highbd_filter_block1d8_v4_sse2((src_ptr + 8), src_pitch, (dst_ptr + 8),
dst_pitch, height, filter, bd);
}
-void aom_highbd_filter_block1d16_h4_sse2(const uint16_t *src_ptr,
- ptrdiff_t src_pitch, uint16_t *dst_ptr,
- ptrdiff_t dst_pitch, uint32_t height,
- const int16_t *filter, int bd) {
+static void aom_highbd_filter_block1d16_h4_sse2(
+ const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr,
+ ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) {
aom_highbd_filter_block1d8_h4_sse2(src_ptr, src_pitch, dst_ptr, dst_pitch,
height, filter, bd);
aom_highbd_filter_block1d8_h4_sse2((src_ptr + 8), src_pitch, (dst_ptr + 8),
dst_pitch, height, filter, bd);
}
+
+// From aom_dsp/x86/aom_high_subpixel_8t_sse2.asm
+highbd_filter8_1dfunction aom_highbd_filter_block1d16_v8_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d16_h8_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d8_v8_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d8_h8_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d4_v8_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d4_h8_sse2;
+
+// From aom_dsp/x86/aom_high_subpixel_bilinear_sse2.asm
+highbd_filter8_1dfunction aom_highbd_filter_block1d16_v2_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d16_h2_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d8_v2_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d8_h2_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d4_v2_sse2;
+highbd_filter8_1dfunction aom_highbd_filter_block1d4_h2_sse2;
+
+// void aom_highbd_convolve8_horiz_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+// void aom_highbd_convolve8_vert_sse2(const uint8_t *src,
+// ptrdiff_t src_stride,
+// uint8_t *dst,
+// ptrdiff_t dst_stride,
+// const int16_t *filter_x,
+// int x_step_q4,
+// const int16_t *filter_y,
+// int y_step_q4,
+// int w, int h, int bd);
+HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2)
+HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2)