Merge "Removed vp9_ prefix from vpx_dsp/bitreader file names"
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index 2a251c1..96aaa23 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -60,49 +60,49 @@
void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
}
#else
void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
@@ -114,25 +114,25 @@
void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON_ASM
@@ -141,13 +141,13 @@
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
@@ -534,46 +534,46 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 10, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 10, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 12, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 12, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 12, 1),
make_tuple(&wrapper_vertical_16_dual_sse2,
@@ -586,10 +586,10 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -598,8 +598,8 @@
INSTANTIATE_TEST_CASE_P(
AVX2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
+ make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
2)));
#endif
@@ -608,42 +608,42 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 12)));
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_sse2,
- &vp9_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_sse2,
- &vp9_lpf_vertical_8_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_sse2,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_sse2,
+ &vpx_lpf_vertical_8_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -657,36 +657,36 @@
#if HAVE_NEON_ASM
// Using #if inside the macro is unsupported on MSVS but the tests are not
// currently built for MSVS with ARM and NEON.
- make_tuple(&vp9_lpf_horizontal_16_neon,
- &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_neon,
- &vp9_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_horizontal_16_neon,
+ &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_neon,
+ &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&wrapper_vertical_16_neon,
&wrapper_vertical_16_c, 8, 1),
make_tuple(&wrapper_vertical_16_dual_neon,
&wrapper_vertical_16_dual_c, 8, 1),
#endif // HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_8_neon,
- &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_vertical_8_neon,
- &vp9_lpf_vertical_8_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_4_neon,
- &vp9_lpf_horizontal_4_c, 8, 1),
- make_tuple(&vp9_lpf_vertical_4_neon,
- &vp9_lpf_vertical_4_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_8_neon,
+ &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_vertical_8_neon,
+ &vpx_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_4_neon,
+ &vpx_lpf_horizontal_4_c, 8, 1),
+ make_tuple(&vpx_lpf_vertical_4_neon,
+ &vpx_lpf_vertical_4_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
#if HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_8_dual_neon,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_neon,
- &vp9_lpf_vertical_8_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_neon,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_neon,
+ &vpx_lpf_vertical_8_dual_c, 8),
#endif // HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_4_dual_neon,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_neon,
- &vp9_lpf_vertical_4_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_neon,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_neon,
+ &vpx_lpf_vertical_4_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
@@ -694,23 +694,23 @@
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_4_dual_msa,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_horizontal_8_dual_msa,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_msa,
- &vp9_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_msa,
- &vp9_lpf_vertical_8_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_msa,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_msa,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_msa,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_msa,
+ &vpx_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace
diff --git a/test/test.mk b/test/test.mk
index a8a365e..8ecc856 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -91,6 +91,7 @@
## shared library builds don't make these functions accessible.
##
ifeq ($(CONFIG_SHARED),)
+LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc
## VP8
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
@@ -142,7 +143,6 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index 943c00b..0e09652 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -19,7 +19,7 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
index 91d62bc..aad7c45 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
@@ -44,7 +44,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -148,8 +148,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -245,7 +245,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -257,7 +257,7 @@
filter_y, w, h);
break;
case 64:
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_bi_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
index 148b20f..bc60e93 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
@@ -40,9 +40,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -135,9 +135,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -290,9 +290,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -539,11 +539,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -781,9 +781,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -807,8 +807,8 @@
filter_x, h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_bi_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
index 92644f2..b714f9a 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
@@ -41,8 +41,8 @@
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -132,8 +132,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@@ -272,8 +272,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@@ -504,9 +504,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@@ -747,8 +747,8 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
+ prefetch_load(src);
+ prefetch_load(src + 32);
switch (w) {
case 4:
@@ -769,7 +769,7 @@
(w/16));
break;
case 64:
- vp9_prefetch_load(src + 32);
+ prefetch_load(src + 32);
convolve_bi_horiz_64_transposed_dspr2(src, src_stride,
dst, dst_stride,
filter, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
index 1debdb4..27ea100 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
@@ -39,9 +39,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -122,9 +122,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -252,9 +252,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -459,11 +459,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -651,7 +651,7 @@
if (16 == x_step_q4) {
uint32_t pos = 38;
- vp9_prefetch_load((const uint8_t *)filter_x);
+ prefetch_load((const uint8_t *)filter_x);
/* bit positon for extract from acc */
__asm__ __volatile__ (
@@ -661,9 +661,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -687,8 +687,8 @@
filter_x, (int32_t)h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
index bf01f11..32f5fb6 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
@@ -44,7 +44,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -141,7 +141,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -230,7 +230,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4 :
@@ -242,7 +242,7 @@
filter_y, w, h);
break;
case 64 :
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_bi_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
index 1742279..d9cbfe6 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
@@ -49,7 +49,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -210,8 +210,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -372,7 +372,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -384,7 +384,7 @@
filter_y, w, h);
break;
case 64:
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
@@ -452,17 +452,17 @@
uint32_t tp3, tp4, tn2;
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
/* 1 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -482,9 +482,9 @@
case 8:
/* 2 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -509,9 +509,9 @@
case 16:
/* 4 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -544,9 +544,9 @@
case 32:
/* 8 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -593,16 +593,16 @@
}
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_load(src + src_stride + 64);
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride + 64);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
index 69da1cf..cdb8312 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
@@ -43,9 +43,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -165,9 +165,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -357,9 +357,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -668,11 +668,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -985,9 +985,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -1011,8 +1011,8 @@
filter_x, h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
index 58b50d2..a1309d1 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
@@ -60,8 +60,8 @@
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -176,8 +176,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@@ -355,8 +355,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@@ -645,9 +645,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@@ -993,8 +993,8 @@
src -= (src_stride * 3 + 3);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
+ prefetch_load(src);
+ prefetch_load(src + 32);
switch (w) {
case 4:
@@ -1015,7 +1015,7 @@
(w/16));
break;
case 64:
- vp9_prefetch_load(src + 32);
+ prefetch_load(src + 32);
convolve_horiz_64_transposed_dspr2(src, src_stride,
temp, intermediate_height,
filter_x, intermediate_height);
@@ -1078,9 +1078,9 @@
int x, y;
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -1089,9 +1089,9 @@
/* 1 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], (%[src]) \n\t"
@@ -1112,9 +1112,9 @@
/* 2 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1137,9 +1137,9 @@
/* 4 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1169,9 +1169,9 @@
/* 8 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1209,16 +1209,16 @@
uint32_t tp1, tp2, tp3, tp4;
uint32_t tp5, tp6, tp7, tp8;
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_load(src + src_stride + 64);
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride + 64);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
index 0303896..d0e3095 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
@@ -43,9 +43,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -154,9 +154,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -323,9 +323,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -593,11 +593,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -859,7 +859,7 @@
if (16 == x_step_q4) {
uint32_t pos = 38;
- vp9_prefetch_load((const uint8_t *)filter_x);
+ prefetch_load((const uint8_t *)filter_x);
src -= 3;
/* bit positon for extract from acc */
@@ -870,9 +870,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -896,8 +896,8 @@
filter_x, (int32_t)h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
index 0930bb3..98acb81 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
@@ -49,7 +49,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -203,8 +203,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -358,7 +358,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4 :
@@ -370,7 +370,7 @@
filter_y, w, h);
break;
case 64 :
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 202d913..10a24f3 100644
--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -34,7 +34,7 @@
for (i = no_rows; i--; ) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
__asm__ __volatile__ (
"lh %[load1], 0(%[input]) \n\t"
@@ -421,14 +421,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 16; ++i) {
dest_pix = (dest + i);
@@ -1124,7 +1124,7 @@
for (i = 0; i < 16; ++i) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;
@@ -1144,7 +1144,7 @@
for (i = 0; i < 16; ++i) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 7ceebb6..a256145 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -44,14 +44,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 32; ++i) {
dest_pix = dest + i;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index 74a90b0..dd18831 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -96,8 +96,8 @@
}
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 32));
- vp9_prefetch_load((const uint8_t *)(input + 48));
+ prefetch_load((const uint8_t *)(input + 32));
+ prefetch_load((const uint8_t *)(input + 48));
__asm__ __volatile__ (
"lh %[load1], 2(%[input]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 280190a..4e31f9f 100644
--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -115,14 +115,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 4; ++i) {
dest_pix = (dest + i);
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index 04d2266..6898d56 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -211,14 +211,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 8; ++i) {
dest_pix = (dest + i);
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 44dfb4d..42c3a09 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -56,20 +56,6 @@
return (uint16_t)clamp(val, 0, 4095);
}
}
-
-// Note:
-// tran_low_t is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int64_t tran_high_t;
-typedef int32_t tran_low_t;
-
-#else
-
-// Note:
-// tran_low_t is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int32_t tran_high_t;
-typedef int16_t tran_low_t;
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_DEBUG
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 5e35fc5..0915918 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -327,55 +327,55 @@
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
- vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else {
- vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
- vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
- vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
- vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
- vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
- vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1);
} else {
- vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
@@ -427,55 +427,55 @@
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else if (mask_16x16_0 & 1) {
- vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else {
- vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_8x8_0 & 1) {
- vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_0 & 1) {
- vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_int_0 & 1) {
- vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
@@ -513,11 +513,11 @@
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
count = 2;
} else {
- vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_8x8 & 1) {
@@ -525,28 +525,28 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4 & 1) {
@@ -554,31 +554,31 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4_int & 1) {
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
}
@@ -610,11 +610,11 @@
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2, bd);
count = 2;
} else {
- vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
} else if (mask_8x8 & 1) {
@@ -622,31 +622,31 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
- vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
- vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
- vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
@@ -655,35 +655,35 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
- vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
- vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
- vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
} else if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
@@ -1094,15 +1094,15 @@
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
- vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
} else if (mask_4x4 & 1) {
- vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
}
if (mask_4x4_int & 1)
- vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1128,18 +1128,18 @@
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
} else if (mask_8x8 & 1) {
- vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
} else if (mask_4x4 & 1) {
- vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
if (mask_4x4_int & 1)
- vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
s += 8;
lfl += 1;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index aad1990..415b2e9 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -781,12 +781,6 @@
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32/;
- add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b/;
-
- add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/;
-
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant/;
} else {
@@ -802,12 +796,6 @@
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
- add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64_x86inc";
-
- add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
-
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
}
@@ -935,12 +923,6 @@
add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_fp_32x32/;
- add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_highbd_quantize_b sse2/;
-
- add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_highbd_quantize_b_32x32 sse2/;
-
#
# Structured Similarity (SSIM)
#
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index f74cdd8..7769340 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -23,7 +24,6 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_tokenize.h"
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 32c1f76..d53d95d 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -9,7 +9,7 @@
*/
#include <math.h>
-
+#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -20,113 +20,6 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
-void vp9_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 16;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
- if (tmp)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
- if (abs_qcoeff)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
-void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
- INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 15;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
- if (tmp)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
- int skip_block,
- const int16_t *round_ptr,
- const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
- if (abs_qcoeff)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -298,224 +191,6 @@
}
#endif
-void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff >= zbins[rc != 0]) {
- int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
- quant_shift_ptr[rc != 0]) >> 16; // quantization
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-
- if (tmp)
- eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff >= zbins[rc != 0]) {
- const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
- if (abs_qcoeff)
- eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
-void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- int tmp;
- int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
- tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
- quant_shift_ptr[rc != 0]) >> 15;
-
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-
- if (tmp)
- eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 = abs_coeff
- + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff)
- eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
MACROBLOCKD *const xd = &x->e_mbd;
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 55e5469..6132036 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -37,34 +37,9 @@
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
} QUANTS;
-void vp9_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
- int skip_block,
- const int16_t *round_ptr,
- const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr);
-#endif
-
struct VP9_COMP;
struct VP9Common;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 8c6c633..2be2a64 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1810,13 +1810,12 @@
cpi->resize_count = 0;
return 0;
}
- // Resize based on average QP over some window.
+ // Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key.
if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
const int window = (int)(5 * cpi->framerate);
cpi->resize_avg_qp += cm->base_qindex;
- if (cpi->rc.buffer_level < (int)(cpi->oxcf.drop_frames_water_mark *
- rc->optimal_buffer_level / 100))
+ if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
++cpi->resize_buffer_underflow;
++cpi->resize_count;
// Check for resize action every "window" frames.
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
index 71fdfd7..2071dfe 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -14,214 +14,6 @@
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
-void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t* zbin_ptr,
- const int16_t* round_ptr, const int16_t* quant_ptr,
- const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
- int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- uint16_t* eob_ptr,
- const int16_t* scan_ptr,
- const int16_t* iscan_ptr) {
- __m128i zero;
- (void)scan_ptr;
-
- coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
- qcoeff_ptr += n_coeffs;
- dqcoeff_ptr += n_coeffs;
- n_coeffs = -n_coeffs;
- zero = _mm_setzero_si128();
- if (!skip_block) {
- __m128i eob;
- __m128i zbin;
- __m128i round, quant, dequant, shift;
- {
- __m128i coeff0, coeff1;
-
- // Setup global values
- {
- __m128i pw_1;
- zbin = _mm_load_si128((const __m128i*)zbin_ptr);
- round = _mm_load_si128((const __m128i*)round_ptr);
- quant = _mm_load_si128((const __m128i*)quant_ptr);
- pw_1 = _mm_set1_epi16(1);
- zbin = _mm_sub_epi16(zbin, pw_1);
- dequant = _mm_load_si128((const __m128i*)dequant_ptr);
- shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
- }
-
- {
- __m128i coeff0_sign, coeff1_sign;
- __m128i qcoeff0, qcoeff1;
- __m128i qtmp0, qtmp1;
- __m128i cmp_mask0, cmp_mask1;
- // Do DC and first 15 AC
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
-
- // Poor man's sign extract
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
- qcoeff0 = _mm_adds_epi16(qcoeff0, round);
- round = _mm_unpackhi_epi64(round, round);
- qcoeff1 = _mm_adds_epi16(qcoeff1, round);
- qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
- quant = _mm_unpackhi_epi64(quant, quant);
- qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
- qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
- qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
- qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
- shift = _mm_unpackhi_epi64(shift, shift);
- qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
-
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- // Mask out zbin threshold coeffs
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
- coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
- dequant = _mm_unpackhi_epi64(dequant, dequant);
- coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
- }
-
- {
- // Scan for eob
- __m128i zero_coeff0, zero_coeff1;
- __m128i nzero_coeff0, nzero_coeff1;
- __m128i iscan0, iscan1;
- __m128i eob1;
- zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
- zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
- nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
- nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
- // Add one to convert from indices to counts
- iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
- iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
- eob = _mm_and_si128(iscan0, nzero_coeff0);
- eob1 = _mm_and_si128(iscan1, nzero_coeff1);
- eob = _mm_max_epi16(eob, eob1);
- }
- n_coeffs += 8 * 2;
- }
-
- // AC only loop
- while (n_coeffs < 0) {
- __m128i coeff0, coeff1;
- {
- __m128i coeff0_sign, coeff1_sign;
- __m128i qcoeff0, qcoeff1;
- __m128i qtmp0, qtmp1;
- __m128i cmp_mask0, cmp_mask1;
-
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
-
- // Poor man's sign extract
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
- qcoeff0 = _mm_adds_epi16(qcoeff0, round);
- qcoeff1 = _mm_adds_epi16(qcoeff1, round);
- qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
- qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
- qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
- qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
- qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
- qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
-
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- // Mask out zbin threshold coeffs
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
- coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
- coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
- }
-
- {
- // Scan for eob
- __m128i zero_coeff0, zero_coeff1;
- __m128i nzero_coeff0, nzero_coeff1;
- __m128i iscan0, iscan1;
- __m128i eob0, eob1;
- zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
- zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
- nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
- nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
- // Add one to convert from indices to counts
- iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
- iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
- eob0 = _mm_and_si128(iscan0, nzero_coeff0);
- eob1 = _mm_and_si128(iscan1, nzero_coeff1);
- eob0 = _mm_max_epi16(eob0, eob1);
- eob = _mm_max_epi16(eob, eob0);
- }
- n_coeffs += 8 * 2;
- }
-
- // Accumulate EOB
- {
- __m128i eob_shuffled;
- eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
- eob = _mm_max_epi16(eob, eob_shuffled);
- *eob_ptr = _mm_extract_epi16(eob, 1);
- }
- } else {
- do {
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
- n_coeffs += 8 * 2;
- } while (n_coeffs < 0);
- *eob_ptr = 0;
- }
-}
-
void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 449d52b..ec2e87c 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -15,206 +15,6 @@
SECTION .text
-; TODO(yunqingwang)fix quantize_b code for skip=1 case.
-%macro QUANTIZE_FN 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, \
- eob, scan, iscan
- cmp dword skipm, 0
- jne .blank
-
- ; actual quantize loop - setup pointers, rounders, etc.
- movifnidn coeffq, coeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, dequantmp
- movifnidn zbinq, zbinmp
- movifnidn roundq, roundmp
- movifnidn quantq, quantmp
- mova m0, [zbinq] ; m0 = zbin
- mova m1, [roundq] ; m1 = round
- mova m2, [quantq] ; m2 = quant
-%ifidn %1, b_32x32
- pcmpeqw m5, m5
- psrlw m5, 15
- paddw m0, m5
- paddw m1, m5
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
- psrlw m1, 1 ; m1 = (m1 + 1) / 2
-%endif
- mova m3, [r2q] ; m3 = dequant
- psubw m0, [pw_1]
- mov r2, shiftmp
- mov r3, qcoeffmp
- mova m4, [r2] ; m4 = shift
- mov r4, dqcoeffmp
- mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
-%endif
- pxor m5, m5 ; m5 = dedicated zero
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
- lea coeffq, [ coeffq+ncoeffq*2]
- lea iscanq, [ iscanq+ncoeffq*2]
- lea qcoeffq, [ qcoeffq+ncoeffq*2]
- lea dqcoeffq, [dqcoeffq+ncoeffq*2]
- neg ncoeffq
-
- ; get DC and first 15 AC coeffs
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- punpckhqdq m0, m0
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m8, m6 ; m8 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m8, m4 ; m8 = m8*qsh>>16
- punpckhqdq m4, m4
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m8, m7
- pand m13, m12
- mova [qcoeffq+ncoeffq*2+ 0], m8
- mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
- pabsw m8, m8
- pabsw m13, m13
-%endif
- pmullw m8, m3 ; dqc[i] = qc[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m8, 1
- psrlw m13, 1
- psignw m8, m9
- psignw m13, m10
-%endif
- mova [dqcoeffq+ncoeffq*2+ 0], m8
- mova [dqcoeffq+ncoeffq*2+16], m13
- pcmpeqw m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m8, m6 ; m8 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jz .accumulate_eob
-
-.ac_only_loop:
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-%ifidn %1, b_32x32
- pmovmskb r6d, m7
- pmovmskb r2d, m12
- or r6, r2
- jz .skip_iter
-%endif
- paddsw m6, m1 ; m6 += round
- paddsw m11, m1 ; m11 += round
- pmulhw m14, m6, m2 ; m14 = m6*q>>16
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m14, m6 ; m14 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m14, m4 ; m14 = m14*qsh>>16
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m14, m9 ; m14 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m14, m7
- pand m13, m12
- mova [qcoeffq+ncoeffq*2+ 0], m14
- mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
- pabsw m14, m14
- pabsw m13, m13
-%endif
- pmullw m14, m3 ; dqc[i] = qc[i] * q
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m14, 1
- psrlw m13, 1
- psignw m14, m9
- psignw m13, m10
-%endif
- mova [dqcoeffq+ncoeffq*2+ 0], m14
- mova [dqcoeffq+ncoeffq*2+16], m13
- pcmpeqw m14, m5 ; m14 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m14, m6 ; m14 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m14
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jl .ac_only_loop
-
-%ifidn %1, b_32x32
- jmp .accumulate_eob
-.skip_iter:
- mova [qcoeffq+ncoeffq*2+ 0], m5
- mova [qcoeffq+ncoeffq*2+16], m5
- mova [dqcoeffq+ncoeffq*2+ 0], m5
- mova [dqcoeffq+ncoeffq*2+16], m5
- add ncoeffq, mmsize
- jl .ac_only_loop
-%endif
-
-.accumulate_eob:
- ; horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- pextrw r6, m8, 0
- mov [r2], r6
- RET
-
- ; skip-block, i.e. just write all zeroes
-.blank:
- mov r0, dqcoeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, qcoeffmp
- mov r3, eobmp
- DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
- lea dqcoeffq, [dqcoeffq+ncoeffq*2]
- lea qcoeffq, [ qcoeffq+ncoeffq*2]
- neg ncoeffq
- pxor m7, m7
-.blank_loop:
- mova [dqcoeffq+ncoeffq*2+ 0], m7
- mova [dqcoeffq+ncoeffq*2+16], m7
- mova [qcoeffq+ncoeffq*2+ 0], m7
- mova [qcoeffq+ncoeffq*2+16], m7
- add ncoeffq, mmsize
- jl .blank_loop
- mov word [eobq], 0
- RET
-%endmacro
-
-INIT_XMM ssse3
-QUANTIZE_FN b, 7
-QUANTIZE_FN b_32x32, 7
-
%macro QUANTIZE_FP 2
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
shift, qcoeff, dqcoeff, dequant, \
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index e1463fc..c8cf973 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -116,13 +116,6 @@
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans32_cols_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans32_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
# common (msa)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 94cc7ba..01e5303 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -104,7 +104,6 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
diff --git a/vpx_dsp/arm/loopfilter_16_neon.asm b/vpx_dsp/arm/loopfilter_16_neon.asm
index 5b8ec20..5a8fdd6 100644
--- a/vpx_dsp/arm/loopfilter_16_neon.asm
+++ b/vpx_dsp/arm/loopfilter_16_neon.asm
@@ -8,12 +8,12 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_4_dual_neon|
+ EXPORT |vpx_lpf_horizontal_4_dual_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
+;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
; const uint8_t *blimit0,
; const uint8_t *limit0,
; const uint8_t *thresh0,
@@ -29,7 +29,7 @@
; sp+8 const uint8_t *limit1,
; sp+12 const uint8_t *thresh1,
-|vp9_lpf_horizontal_4_dual_neon| PROC
+|vpx_lpf_horizontal_4_dual_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh0
@@ -66,7 +66,7 @@
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
- bl vp9_loop_filter_neon_16
+ bl vpx_loop_filter_neon_16
vst1.u8 {q5}, [r2@64], r1 ; store op1
vst1.u8 {q6}, [r3@64], r1 ; store op0
@@ -76,9 +76,9 @@
vpop {d8-d15} ; restore neon registers
pop {pc}
- ENDP ; |vp9_lpf_horizontal_4_dual_neon|
+ ENDP ; |vpx_lpf_horizontal_4_dual_neon|
-; void vp9_loop_filter_neon_16();
+; void vpx_loop_filter_neon_16();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. This function uses
; registers d8-d15, so the calling function must save those registers.
@@ -101,7 +101,7 @@
; q6 op0
; q7 oq0
; q8 oq1
-|vp9_loop_filter_neon_16| PROC
+|vpx_loop_filter_neon_16| PROC
; filter_mask
vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2)
@@ -194,6 +194,6 @@
veor q8, q12, q10 ; *oq1 = u^0x80
bx lr
- ENDP ; |vp9_loop_filter_neon_16|
+ ENDP ; |vpx_loop_filter_neon_16|
END
diff --git a/vpx_dsp/arm/loopfilter_16_neon.c b/vpx_dsp/arm/loopfilter_16_neon.c
index 27c2827..d24e6ad 100644
--- a/vpx_dsp/arm/loopfilter_16_neon.c
+++ b/vpx_dsp/arm/loopfilter_16_neon.c
@@ -14,7 +14,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-static INLINE void vp9_loop_filter_neon_16(
+static INLINE void loop_filter_neon_16(
uint8x16_t qblimit, // blimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
@@ -124,7 +124,7 @@
return;
}
-void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -163,9 +163,9 @@
s += p;
q10u8 = vld1q_u8(s);
- vp9_loop_filter_neon_16(qblimit, qlimit, qthresh,
- q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,
- &q5u8, &q6u8, &q7u8, &q8u8);
+ loop_filter_neon_16(qblimit, qlimit, qthresh,
+ q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,
+ &q5u8, &q6u8, &q7u8, &q8u8);
s -= (p * 5);
vst1q_u8(s, q5u8);
diff --git a/vpx_dsp/arm/loopfilter_4_neon.asm b/vpx_dsp/arm/loopfilter_4_neon.asm
index 7738e0d..e45e34c 100644
--- a/vpx_dsp/arm/loopfilter_4_neon.asm
+++ b/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_4_neon|
- EXPORT |vp9_lpf_vertical_4_neon|
+ EXPORT |vpx_lpf_horizontal_4_neon|
+ EXPORT |vpx_lpf_vertical_4_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_horizontal_4_neon(uint8_t *s,
+; void vpx_lpf_horizontal_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -32,7 +32,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_horizontal_4_neon| PROC
+|vpx_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -41,7 +41,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
- beq end_vp9_lf_h_edge
+ beq end_vpx_lf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -62,7 +62,7 @@
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
- bl vp9_loop_filter_neon
+ bl vpx_loop_filter_neon
vst1.u8 {d4}, [r2@64], r1 ; store op1
vst1.u8 {d5}, [r3@64], r1 ; store op0
@@ -73,16 +73,16 @@
subs r12, r12, #1
bne count_lf_h_loop
-end_vp9_lf_h_edge
+end_vpx_lf_h_edge
pop {pc}
- ENDP ; |vp9_lpf_horizontal_4_neon|
+ ENDP ; |vpx_lpf_horizontal_4_neon|
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_vertical_4_neon(uint8_t *s,
+; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -95,7 +95,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_vertical_4_neon| PROC
+|vpx_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -105,7 +105,7 @@
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
- beq end_vp9_lf_v_edge
+ beq end_vpx_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@@ -135,7 +135,7 @@
vtrn.8 d7, d16
vtrn.8 d17, d18
- bl vp9_loop_filter_neon
+ bl vpx_loop_filter_neon
sub r0, r0, #2
@@ -154,11 +154,11 @@
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
-end_vp9_lf_v_edge
+end_vpx_lf_v_edge
pop {pc}
- ENDP ; |vp9_lpf_vertical_4_neon|
+ ENDP ; |vpx_lpf_vertical_4_neon|
-; void vp9_loop_filter_neon();
+; void vpx_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@@ -182,7 +182,7 @@
; d5 op0
; d6 oq0
; d7 oq1
-|vp9_loop_filter_neon| PROC
+|vpx_loop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@@ -272,6 +272,6 @@
veor d7, d20, d18 ; *oq1 = u^0x80
bx lr
- ENDP ; |vp9_loop_filter_neon|
+ ENDP ; |vpx_loop_filter_neon|
END
diff --git a/vpx_dsp/arm/loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c
index 476ed34..7ad411a 100644
--- a/vpx_dsp/arm/loopfilter_4_neon.c
+++ b/vpx_dsp/arm/loopfilter_4_neon.c
@@ -12,7 +12,7 @@
#include "./vpx_dsp_rtcd.h"
-static INLINE void vp9_loop_filter_neon(
+static INLINE void loop_filter_neon(
uint8x8_t dblimit, // flimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@@ -110,7 +110,7 @@
return;
}
-void vp9_lpf_horizontal_4_neon(
+void vpx_lpf_horizontal_4_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@@ -122,7 +122,7 @@
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
- if (count == 0) // end_vp9_lf_h_edge
+ if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -149,9 +149,9 @@
s += pitch;
d18u8 = vld1_u8(s);
- vp9_loop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d4u8, &d5u8, &d6u8, &d7u8);
+ loop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d4u8, &d5u8, &d6u8, &d7u8);
s -= (pitch * 5);
vst1_u8(s, d4u8);
@@ -165,7 +165,7 @@
return;
}
-void vp9_lpf_vertical_4_neon(
+void vpx_lpf_vertical_4_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@@ -181,7 +181,7 @@
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result;
- if (count == 0) // end_vp9_lf_h_edge
+ if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -244,9 +244,9 @@
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
- vp9_loop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d4u8, &d5u8, &d6u8, &d7u8);
+ loop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d4u8, &d5u8, &d6u8, &d7u8);
d4Result.val[0] = d4u8;
d4Result.val[1] = d5u8;
diff --git a/vpx_dsp/arm/loopfilter_8_neon.asm b/vpx_dsp/arm/loopfilter_8_neon.asm
index 91aaec0..e81734c 100644
--- a/vpx_dsp/arm/loopfilter_8_neon.asm
+++ b/vpx_dsp/arm/loopfilter_8_neon.asm
@@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_8_neon|
- EXPORT |vp9_lpf_vertical_8_neon|
+ EXPORT |vpx_lpf_horizontal_8_neon|
+ EXPORT |vpx_lpf_vertical_8_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
+; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
@@ -30,7 +30,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_horizontal_8_neon| PROC
+|vpx_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -39,7 +39,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
- beq end_vp9_mblf_h_edge
+ beq end_vpx_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -60,7 +60,7 @@
sub r3, r3, r1, lsl #1
sub r2, r2, r1, lsl #2
- bl vp9_mbloop_filter_neon
+ bl vpx_mbloop_filter_neon
vst1.u8 {d0}, [r2@64], r1 ; store op2
vst1.u8 {d1}, [r3@64], r1 ; store op1
@@ -73,12 +73,12 @@
subs r12, r12, #1
bne count_mblf_h_loop
-end_vp9_mblf_h_edge
+end_vpx_mblf_h_edge
pop {r4-r5, pc}
- ENDP ; |vp9_lpf_horizontal_8_neon|
+ ENDP ; |vpx_lpf_horizontal_8_neon|
-; void vp9_lpf_vertical_8_neon(uint8_t *s,
+; void vpx_lpf_vertical_8_neon(uint8_t *s,
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -91,7 +91,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_vertical_8_neon| PROC
+|vpx_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -101,7 +101,7 @@
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
- beq end_vp9_mblf_v_edge
+ beq end_vpx_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@@ -134,7 +134,7 @@
sub r2, r0, #3
add r3, r0, #1
- bl vp9_mbloop_filter_neon
+ bl vpx_mbloop_filter_neon
;store op2, op1, op0, oq0
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
@@ -161,11 +161,11 @@
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
-end_vp9_mblf_v_edge
+end_vpx_mblf_v_edge
pop {r4-r5, pc}
- ENDP ; |vp9_lpf_vertical_8_neon|
+ ENDP ; |vpx_lpf_vertical_8_neon|
-; void vp9_mbloop_filter_neon();
+; void vpx_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@@ -191,7 +191,7 @@
; d3 oq0
; d4 oq1
; d5 oq2
-|vp9_mbloop_filter_neon| PROC
+|vpx_mbloop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@@ -446,6 +446,6 @@
bx lr
- ENDP ; |vp9_mbloop_filter_neon|
+ ENDP ; |vpx_mbloop_filter_neon|
END
diff --git a/vpx_dsp/arm/loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c
index 2179e5f..a887e2e 100644
--- a/vpx_dsp/arm/loopfilter_8_neon.c
+++ b/vpx_dsp/arm/loopfilter_8_neon.c
@@ -12,7 +12,7 @@
#include "./vpx_dsp_rtcd.h"
-static INLINE void vp9_mbloop_filter_neon(
+static INLINE void mbloop_filter_neon(
uint8x8_t dblimit, // mblimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@@ -263,7 +263,7 @@
return;
}
-void vp9_lpf_horizontal_8_neon(
+void vpx_lpf_horizontal_8_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@@ -276,7 +276,7 @@
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
- if (count == 0) // end_vp9_mblf_h_edge
+ if (count == 0) // end_vpx_mblf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -303,9 +303,9 @@
s += pitch;
d18u8 = vld1_u8(s);
- vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
+ mbloop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
s -= (pitch * 6);
vst1_u8(s, d0u8);
@@ -323,7 +323,7 @@
return;
}
-void vp9_lpf_vertical_8_neon(
+void vpx_lpf_vertical_8_neon(
uint8_t *src,
int pitch,
const uint8_t *blimit,
@@ -403,9 +403,9 @@
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
- vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
+ mbloop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
d4Result.val[0] = d0u8;
d4Result.val[1] = d1u8;
diff --git a/vpx_dsp/arm/loopfilter_mb_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm
index 5fe2bba..20d9cfb 100644
--- a/vpx_dsp/arm/loopfilter_mb_neon.asm
+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm
@@ -8,13 +8,13 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_16_neon|
- EXPORT |vp9_lpf_vertical_16_neon|
+ EXPORT |vpx_lpf_horizontal_16_neon|
+ EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
+; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh
@@ -24,7 +24,7 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_lpf_horizontal_16_neon| PROC
+|vpx_lpf_horizontal_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -54,7 +54,7 @@
vld1.u8 {d14}, [r8@64], r1 ; q6
vld1.u8 {d15}, [r8@64], r1 ; q7
- bl vp9_wide_mbfilter_neon
+ bl vpx_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
@@ -115,9 +115,9 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_lpf_horizontal_16_neon|
+ ENDP ; |vpx_lpf_horizontal_16_neon|
-; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
+; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh)
@@ -126,7 +126,7 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_lpf_vertical_16_neon| PROC
+|vpx_lpf_vertical_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -176,7 +176,7 @@
vtrn.8 d12, d13
vtrn.8 d14, d15
- bl vp9_wide_mbfilter_neon
+ bl vpx_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
@@ -279,9 +279,9 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_lpf_vertical_16_neon|
+ ENDP ; |vpx_lpf_vertical_16_neon|
-; void vp9_wide_mbfilter_neon();
+; void vpx_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store.
;
@@ -305,7 +305,7 @@
; d13 q5
; d14 q6
; d15 q7
-|vp9_wide_mbfilter_neon| PROC
+|vpx_wide_mbfilter_neon| PROC
mov r7, #0
; filter_mask
@@ -601,6 +601,6 @@
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
bx lr
- ENDP ; |vp9_wide_mbfilter_neon|
+ ENDP ; |vpx_wide_mbfilter_neon|
END
diff --git a/vpx_dsp/arm/loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c
index 9ef3229..eff87d2 100644
--- a/vpx_dsp/arm/loopfilter_neon.c
+++ b/vpx_dsp/arm/loopfilter_neon.c
@@ -14,45 +14,45 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
#if HAVE_NEON_ASM
-void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
- vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
- vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
#endif // HAVE_NEON_ASM
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c
index 73b448e..dc8aca5 100644
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -115,7 +115,7 @@
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
-void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count) {
int i;
@@ -132,15 +132,15 @@
}
}
-void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -157,12 +157,12 @@
}
}
-void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -187,7 +187,7 @@
}
}
-void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -207,15 +207,15 @@
}
}
-void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -232,12 +232,12 @@
}
}
-void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -292,7 +292,7 @@
}
}
-void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -341,12 +341,12 @@
}
}
-void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
}
-void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
}
@@ -446,7 +446,7 @@
*op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);
}
-void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
+void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count, int bd) {
int i;
@@ -469,7 +469,7 @@
}
}
-void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -477,11 +477,11 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
-void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -498,7 +498,7 @@
}
}
-void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
+void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -506,8 +506,8 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@@ -532,7 +532,7 @@
}
}
-void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -554,7 +554,7 @@
}
}
-void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -562,11 +562,11 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
-void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -586,7 +586,7 @@
}
}
-void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
+void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -594,8 +594,8 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@@ -662,7 +662,7 @@
}
}
-void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -727,13 +727,13 @@
}
}
-void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
}
-void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
diff --git a/vpx_dsp/mips/common_dspr2.h b/vpx_dsp/mips/common_dspr2.h
index 879e2d0..8278101 100644
--- a/vpx_dsp/mips/common_dspr2.h
+++ b/vpx_dsp/mips/common_dspr2.h
@@ -21,7 +21,7 @@
#if HAVE_DSPR2
#define CROP_WIDTH 512
-static INLINE void vp9_prefetch_load(const unsigned char *src) {
+static INLINE void prefetch_load(const unsigned char *src) {
__asm__ __volatile__ (
"pref 0, 0(%[src]) \n\t"
:
@@ -30,7 +30,7 @@
}
/* prefetch data for store */
-static INLINE void vp9_prefetch_store(unsigned char *dst) {
+static INLINE void prefetch_store(unsigned char *dst) {
__asm__ __volatile__ (
"pref 1, 0(%[dst]) \n\t"
:
@@ -38,7 +38,7 @@
);
}
-static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {
+static INLINE void prefetch_load_streamed(const unsigned char *src) {
__asm__ __volatile__ (
"pref 4, 0(%[src]) \n\t"
:
@@ -47,7 +47,7 @@
}
/* prefetch data for store */
-static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
+static INLINE void prefetch_store_streamed(unsigned char *dst) {
__asm__ __volatile__ (
"pref 5, 0(%[dst]) \n\t"
:
diff --git a/vpx_dsp/mips/loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c
index b0b99b4..b7c9f7b 100644
--- a/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/vpx_dsp/mips/loopfilter_16_msa.c
@@ -11,7 +11,7 @@
#include "vpx_ports/mem.h"
#include "vpx_dsp/mips/loopfilter_msa.h"
-int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
uint8_t *filter48,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -79,7 +79,7 @@
}
}
-void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
+void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@@ -405,7 +405,7 @@
}
}
-void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -415,15 +415,15 @@
(void)count;
- early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
+ early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- vp9_hz_lpf_t16_16w(src, pitch, filter48);
+ vpx_hz_lpf_t16_16w(src, pitch, filter48);
}
}
-void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -643,7 +643,7 @@
}
}
} else {
- vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
+ vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
thresh_ptr, count);
}
}
@@ -744,7 +744,7 @@
ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
}
-int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
+int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch_org,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -812,7 +812,7 @@
}
}
-int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16i8 zero = { 0 };
v16u8 filter8, flat, flat2;
@@ -1032,7 +1032,7 @@
}
}
-void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@@ -1042,12 +1042,12 @@
transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
- early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
+ early_exit = vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
+ early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {
@@ -1056,7 +1056,7 @@
}
}
-int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
+int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -1134,7 +1134,7 @@
}
}
-int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
@@ -1455,7 +1455,7 @@
}
}
-void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@@ -1465,12 +1465,12 @@
transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
- early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
+ early_exit = vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
+ early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {
diff --git a/vpx_dsp/mips/loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c
index 24db787..daf5f38 100644
--- a/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/vpx_dsp/mips/loopfilter_4_msa.c
@@ -10,7 +10,7 @@
#include "vpx_dsp/mips/loopfilter_msa.h"
-void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -39,7 +39,7 @@
SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
}
-void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
@@ -71,7 +71,7 @@
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
}
-void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -102,7 +102,7 @@
ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
}
-void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
diff --git a/vpx_dsp/mips/loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c
index 85c34eb..00b6db5 100644
--- a/vpx_dsp/mips/loopfilter_8_msa.c
+++ b/vpx_dsp/mips/loopfilter_8_msa.c
@@ -10,7 +10,7 @@
#include "vpx_dsp/mips/loopfilter_msa.h"
-void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -83,7 +83,7 @@
}
}
-void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -158,7 +158,7 @@
}
}
-void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -237,7 +237,7 @@
}
}
-void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c
similarity index 85%
rename from vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
rename to vpx_dsp/mips/loopfilter_filters_dspr2.c
index 41be900..99a96d8 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -50,7 +49,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
/* loop filter designed to work using chars so that we can make maximum use
of 8 bit simd instructions. */
@@ -88,14 +87,14 @@
: [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6)
);
- vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
- pm1, p0, p3, p4, p5, p6,
- thresh_vec, &hev, &mask);
+ filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
+ pm1, p0, p3, p4, p5, p6,
+ thresh_vec, &hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
- vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
+ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
__asm__ __volatile__ (
"sw %[p1], (%[s1]) \n\t"
@@ -114,7 +113,7 @@
}
}
-void vp9_lpf_vertical_4_dspr2(unsigned char *s,
+void vpx_lpf_vertical_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -144,7 +143,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -217,14 +216,14 @@
* mask will be zero and filtering is not needed
*/
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
- vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
- p0, p3, p4, p5, p6, thresh_vec,
- &hev, &mask);
+ filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
+ p0, p3, p4, p5, p6, thresh_vec,
+ &hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
- vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
+ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
/* unpack processed 4x4 neighborhood
* don't use transpose on output data
@@ -307,56 +306,56 @@
}
}
-void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
+ vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
1);
}
-void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
- vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
- vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
}
#endif // #if HAVE_DSPR2
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/vpx_dsp/mips/loopfilter_filters_dspr2.h
similarity index 85%
rename from vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
rename to vpx_dsp/mips/loopfilter_filters_dspr2.h
index 675db65..4a1506b 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.h
@@ -13,10 +13,10 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
#ifdef __cplusplus
extern "C" {
@@ -24,10 +24,10 @@
#if HAVE_DSPR2
/* inputs & outputs are quad-byte vectors */
-static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
- uint32_t *ps1, uint32_t *ps0,
- uint32_t *qs0, uint32_t *qs1) {
- int32_t vp9_filter_l, vp9_filter_r;
+static INLINE void filter_dspr2(uint32_t mask, uint32_t hev,
+ uint32_t *ps1, uint32_t *ps0,
+ uint32_t *qs0, uint32_t *qs1) {
+ int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@@ -73,34 +73,34 @@
hev_r = hev_r & HWM;
__asm__ __volatile__ (
- /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
- /* vp9_filter &= hev; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
+ /* vpx_filter &= hev; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
- /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
- /* vp9_filter &= mask; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
+ /* vpx_filter &= mask; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
- : [vp9_filter_l] "=&r" (vp9_filter_l),
- [vp9_filter_r] "=&r" (vp9_filter_r),
+ : [vpx_filter_l] "=&r" (vpx_filter_l),
+ [vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@@ -113,13 +113,13 @@
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
- /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
+ /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
+ "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
- /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
+ /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@@ -142,23 +142,23 @@
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
- [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
+ [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
- /* (vp9_filter += 1) >>= 1 */
+ /* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
- /* vp9_filter &= ~hev; */
+ /* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
- /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
+ /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
- /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
+ /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@@ -196,12 +196,12 @@
*qs1 = vqs1 ^ N128;
}
-static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
- uint32_t ps1, uint32_t ps0,
- uint32_t qs0, uint32_t qs1,
- uint32_t *p1_f0, uint32_t *p0_f0,
- uint32_t *q0_f0, uint32_t *q1_f0) {
- int32_t vp9_filter_l, vp9_filter_r;
+static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev,
+ uint32_t ps1, uint32_t ps0,
+ uint32_t qs0, uint32_t qs1,
+ uint32_t *p1_f0, uint32_t *p0_f0,
+ uint32_t *q0_f0, uint32_t *q1_f0) {
+ int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@@ -247,34 +247,34 @@
hev_r = hev_r & HWM;
__asm__ __volatile__ (
- /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
- /* vp9_filter &= hev; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
+ /* vpx_filter &= hev; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
- /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
- /* vp9_filter &= mask; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
+ /* vpx_filter &= mask; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
- : [vp9_filter_l] "=&r" (vp9_filter_l),
- [vp9_filter_r] "=&r" (vp9_filter_r),
+ : [vpx_filter_l] "=&r" (vpx_filter_l),
+ [vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@@ -286,13 +286,13 @@
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
- /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
+ /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
+ "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
- /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
+ /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@@ -315,23 +315,23 @@
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
- [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
+ [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
- /* (vp9_filter += 1) >>= 1 */
+ /* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
- /* vp9_filter &= ~hev; */
+ /* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
- /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
+ /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
- /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
+ /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@@ -369,10 +369,10 @@
*q1_f0 = vqs1 ^ N128;
}
-static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
- uint32_t *op1, uint32_t *op0,
- uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3) {
+static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
+ uint32_t *op1, uint32_t *op0,
+ uint32_t *oq0, uint32_t *oq1,
+ uint32_t *oq2, uint32_t *oq3) {
/* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
@@ -446,14 +446,14 @@
*oq2 = res_oq2;
}
-static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2,
- uint32_t p1, uint32_t p0,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t *op2_f1,
- uint32_t *op1_f1, uint32_t *op0_f1,
- uint32_t *oq0_f1, uint32_t *oq1_f1,
- uint32_t *oq2_f1) {
+static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2,
+ uint32_t p1, uint32_t p0,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t *op2_f1,
+ uint32_t *op1_f1, uint32_t *op0_f1,
+ uint32_t *oq0_f1, uint32_t *oq1_f1,
+ uint32_t *oq2_f1) {
/* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
uint32_t res_op2, res_op1, res_op0;
uint32_t res_oq0, res_oq1, res_oq2;
@@ -524,14 +524,14 @@
*oq2_f1 = res_oq2;
}
-static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
- uint32_t *op5, uint32_t *op4,
- uint32_t *op3, uint32_t *op2,
- uint32_t *op1, uint32_t *op0,
- uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3,
- uint32_t *oq4, uint32_t *oq5,
- uint32_t *oq6, uint32_t *oq7) {
+static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
+ uint32_t *op5, uint32_t *op4,
+ uint32_t *op3, uint32_t *op2,
+ uint32_t *op1, uint32_t *op0,
+ uint32_t *oq0, uint32_t *oq1,
+ uint32_t *oq2, uint32_t *oq3,
+ uint32_t *oq4, uint32_t *oq5,
+ uint32_t *oq6, uint32_t *oq7) {
const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4;
const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h b/vpx_dsp/mips/loopfilter_macros_dspr2.h
similarity index 99%
rename from vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
rename to vpx_dsp/mips/loopfilter_macros_dspr2.h
index ca01a6a..994ff18 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_macros_dspr2.h
@@ -13,9 +13,9 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#ifdef __cplusplus
extern "C" {
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h b/vpx_dsp/mips/loopfilter_masks_dspr2.h
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
rename to vpx_dsp/mips/loopfilter_masks_dspr2.h
index 5b0d9cc..e82dfb7 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_masks_dspr2.h
@@ -13,9 +13,9 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#ifdef __cplusplus
extern "C" {
@@ -24,13 +24,13 @@
#if HAVE_DSPR2
/* processing 4 pixels at the same time
* compute hev and mask in the same function */
-static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
- uint32_t p1, uint32_t p0,
- uint32_t p3, uint32_t p2,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t thresh, uint32_t *hev,
- uint32_t *mask) {
+static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
+ uint32_t p1, uint32_t p0,
+ uint32_t p3, uint32_t p2,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t thresh, uint32_t *hev,
+ uint32_t *mask) {
uint32_t c, r, r3, r_k;
uint32_t s1, s2, s3;
uint32_t ones = 0xFFFFFFFF;
@@ -129,16 +129,16 @@
*mask = s2;
}
-static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
- uint32_t flimit,
- uint32_t thresh,
- uint32_t p1, uint32_t p0,
- uint32_t p3, uint32_t p2,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t *hev,
- uint32_t *mask,
- uint32_t *flat) {
+static INLINE void filter_hev_mask_flatmask4_dspr2(uint32_t limit,
+ uint32_t flimit,
+ uint32_t thresh,
+ uint32_t p1, uint32_t p0,
+ uint32_t p3, uint32_t p2,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t *hev,
+ uint32_t *mask,
+ uint32_t *flat) {
uint32_t c, r, r3, r_k, r_flat;
uint32_t s1, s2, s3;
uint32_t ones = 0xFFFFFFFF;
@@ -279,12 +279,12 @@
*flat = flat1;
}
-static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
- uint32_t p2, uint32_t p1,
- uint32_t p0, uint32_t q0,
- uint32_t q1, uint32_t q2,
- uint32_t q3, uint32_t q4,
- uint32_t *flat2) {
+static INLINE void flatmask5(uint32_t p4, uint32_t p3,
+ uint32_t p2, uint32_t p1,
+ uint32_t p0, uint32_t q0,
+ uint32_t q1, uint32_t q2,
+ uint32_t q3, uint32_t q4,
+ uint32_t *flat2) {
uint32_t c, r, r_k, r_flat;
uint32_t ones = 0xFFFFFFFF;
uint32_t flat_thresh = 0x01010101;
diff --git a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_dspr2.c
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_dspr2.c
index 93fbc27..4138f56 100644
--- a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -53,7 +52,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
for (i = 0; i < 2; i++) {
sp3 = s - (pitch << 2);
@@ -81,13 +80,13 @@
[sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
"sw %[p1_f0], (%[sp1]) \n\t"
@@ -104,13 +103,13 @@
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@@ -130,18 +129,18 @@
);
} else if ((flat != 0) && (mask != 0)) {
/* filtering */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -319,7 +318,7 @@
}
}
-void vp9_lpf_vertical_8_dspr2(unsigned char *s,
+void vpx_lpf_vertical_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -351,7 +350,7 @@
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -451,39 +450,39 @@
:
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat != 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
index 5dc86fe..8a48650 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -58,7 +57,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
for (i = 0; i < (2 * count); i++) {
sp7 = s - (pitch << 3);
@@ -110,17 +109,17 @@
[sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
- vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
+ flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
"sw %[p1_f0], (%[sp1]) \n\t"
@@ -139,17 +138,17 @@
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
COMBINE_LEFT_RIGHT_0TO2()
COMBINE_LEFT_RIGHT_3TO6()
@@ -189,13 +188,13 @@
/* f1 */
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@@ -215,18 +214,18 @@
);
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0+f1 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -399,36 +398,36 @@
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 + f2 */
/* f0 function */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* f1 function */
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
- q0_l, q1_l, q2_l, q3_l,
- &p2_l_f1, &p1_l_f1, &p0_l_f1,
- &q0_l_f1, &q1_l_f1, &q2_l_f1);
+ mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
+ q0_l, q1_l, q2_l, q3_l,
+ &p2_l_f1, &p1_l_f1, &p0_l_f1,
+ &q0_l_f1, &q1_l_f1, &q2_l_f1);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
- q0_r, q1_r, q2_r, q3_r,
- &p2_r_f1, &p1_r_f1, &p0_r_f1,
- &q0_r_f1, &q1_r_f1, &q2_r_f1);
+ mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
+ q0_r, q1_r, q2_r, q3_r,
+ &p2_r_f1, &p1_r_f1, &p0_r_f1,
+ &q0_r_f1, &q1_r_f1, &q2_r_f1);
/* f2 function */
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
if (mask & flat & flat2 & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
similarity index 91%
rename from vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
index 65ea49a..e580f01 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_vertical_16_dspr2(uint8_t *s,
+void vpx_lpf_vertical_16_dspr2(uint8_t *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -55,7 +54,7 @@
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -248,61 +247,61 @@
:
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
- vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
+ flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
(mask == 0xFFFFFFFF)) {
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
STORE_F2()
} else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
/* f1 */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -466,32 +465,32 @@
}
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0+f1+f2 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
PACK_LEFT_0TO3()
- vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
- q0_l, q1_l, q2_l, q3_l,
- &p2_l_f1, &p1_l_f1, &p0_l_f1,
- &q0_l_f1, &q1_l_f1, &q2_l_f1);
+ mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
+ q0_l, q1_l, q2_l, q3_l,
+ &p2_l_f1, &p1_l_f1, &p0_l_f1,
+ &q0_l_f1, &q1_l_f1, &q2_l_f1);
PACK_RIGHT_0TO3()
- vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
- q0_r, q1_r, q2_r, q3_r,
- &p2_r_f1, &p1_r_f1, &p0_r_f1,
- &q0_r_f1, &q1_r_f1, &q2_r_f1);
+ mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
+ q0_r, q1_r, q2_r, q3_r,
+ &p2_r_f1, &p1_r_f1, &p0_r_f1,
+ &q0_r_f1, &q1_r_f1, &q2_r_f1);
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
if (mask & flat & flat2 & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
new file mode 100644
index 0000000..137f5bc
--- /dev/null
+++ b/vpx_dsp/quantize.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_dsp/quantize.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp, eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 16;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int coeff = coeff_ptr[0];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + round_ptr[0];
+ const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
+ qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
+ if (abs_qcoeff)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp, eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
+ int skip_block,
+ const int16_t *round_ptr,
+ const int16_t quant,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr,
+ uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
+ int eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int coeff = coeff_ptr[0];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
+ const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
+ qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
+ if (abs_qcoeff)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 16; // quantization
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+ if (abs_qcoeff)
+ eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 = abs_coeff
+ + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ if (abs_qcoeff)
+ eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
diff --git a/vpx_dsp/quantize.h b/vpx_dsp/quantize.h
new file mode 100644
index 0000000..0ad1744
--- /dev/null
+++ b/vpx_dsp/quantize.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_QUANTIZE_H_
+#define VPX_DSP_QUANTIZE_H_
+
+#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
+ int skip_block,
+ const int16_t *round_ptr,
+ const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr,
+ uint16_t *eob_ptr);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_QUANTIZE_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 278cc17..415a436 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -54,6 +54,21 @@
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c
endif # CONFIG_VP9_HIGHBITDEPTH
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+DSP_SRCS-yes += quantize.c
+DSP_SRCS-yes += quantize.h
+
+DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
+endif
+ifeq ($(ARCH_X86_64),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
+endif
+endif
+endif # CONFIG_VP9_ENCODER
+
ifeq ($(CONFIG_ENCODERS),yes)
DSP_SRCS-yes += sad.c
DSP_SRCS-yes += subtract.c
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
index c7bac95..6793036 100644
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -24,6 +24,20 @@
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#if CONFIG_VP9_HIGHBITDEPTH
+// Note:
+// tran_low_t is the datatype used for final transform coefficients.
+// tran_high_t is the datatype used for intermediate transform stages.
+typedef int64_t tran_high_t;
+typedef int32_t tran_low_t;
+#else
+// Note:
+// tran_low_t is the datatype used for final transform coefficients.
+// tran_high_t is the datatype used for intermediate transform stages.
+typedef int32_t tran_high_t;
+typedef int16_t tran_low_t;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static INLINE uint8_t clip_pixel(int val) {
return (val > 255) ? 255 : (val < 0) ? 0 : val;
}
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 90666e0..393539c 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -5,12 +5,18 @@
*/
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
EOF
}
forward_decls qw/vpx_dsp_forward_decls/;
-# Functions which use x86inc.asm instead of x86_abi_support.asm
+# x86inc.asm had specific constraints. break it out so it's easy to disable.
+# zero all the variables to avoid tricky else conditions.
+$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
+ $avx2_x86inc = '';
+$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
+ $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
$mmx_x86inc = 'mmx';
$sse_x86inc = 'sse';
@@ -18,97 +24,125 @@
$ssse3_x86inc = 'ssse3';
$avx_x86inc = 'avx';
$avx2_x86inc = 'avx2';
-} else {
- $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
- $avx_x86inc = $avx2_x86inc = '';
+ if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64_x86inc = 'mmx';
+ $sse_x86_64_x86inc = 'sse';
+ $sse2_x86_64_x86inc = 'sse2';
+ $ssse3_x86_64_x86inc = 'ssse3';
+ $avx_x86_64_x86inc = 'avx';
+ $avx2_x86_64_x86inc = 'avx2';
+ }
}
-# Functions which are 64 bit only.
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
$mmx_x86_64 = 'mmx';
$sse2_x86_64 = 'sse2';
$ssse3_x86_64 = 'ssse3';
$avx_x86_64 = 'avx';
$avx2_x86_64 = 'avx2';
-} else {
- $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
- $avx_x86_64 = $avx2_x86_64 = '';
}
#
# Loopfilter
#
-add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon_asm msa/;
-$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
+add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_vertical_16 sse2 neon_asm msa/;
+$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;
-add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm msa/;
-$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
+add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm msa/;
+$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
-add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon msa/;
+add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_vertical_8 sse2 neon msa/;
-add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm msa/;
-$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
+add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm msa/;
+$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
-add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon msa/;
+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_vertical_4 mmx neon msa/;
-add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon msa/;
+add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_vertical_4_dual sse2 neon msa/;
-add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
-$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
+add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
+$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
-add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon msa/;
+add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_8 sse2 neon msa/;
-add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm msa/;
-$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
+add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm msa/;
+$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
-add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon msa/;
+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_4 mmx neon msa/;
-add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon msa/;
+add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_horizontal_4_dual sse2 neon msa/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vp9_highbd_lpf_vertical_16 sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
- add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
- add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_vertical_8 sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
- add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
- add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_vertical_4 sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
- add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;
+ add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
- add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
- add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
- add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
- add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
- add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
+} # CONFIG_VP9_HIGHBITDEPTH
+
+#
+# Encoder functions.
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b/;
+
+ add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b_32x32/;
+
+ add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_highbd_quantize_b sse2/;
+
+ add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_highbd_quantize_b_32x32 sse2/;
+} else {
+ add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
+} # CONFIG_VP9_ENCODER
} # CONFIG_VP9_HIGHBITDEPTH
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
diff --git a/vpx_dsp/x86/highbd_loopfilter_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c
index 358008f..c4fd5e1 100644
--- a/vpx_dsp/x86/highbd_loopfilter_sse2.c
+++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c
@@ -508,7 +508,7 @@
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -519,7 +519,7 @@
highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
}
-void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -687,7 +687,7 @@
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = signed_char_clamp_bd_sse2(filt, bd);
filt = _mm_and_si128(filt, mask);
@@ -756,7 +756,7 @@
_mm_store_si128((__m128i *)(s + 2 * p), q2);
}
-void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -764,12 +764,12 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
+ vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
1, bd);
}
-void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -891,7 +891,7 @@
filt = _mm_adds_epi16(filt, work_a);
filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);
@@ -936,7 +936,7 @@
_mm_storeu_si128((__m128i *)(s + 1 * p), q1);
}
-void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -944,8 +944,8 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
+ vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
bd);
}
@@ -1054,7 +1054,7 @@
highbd_transpose(src1, in_p, dest1, out_p, 1);
}
-void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1071,7 +1071,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
+ vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@@ -1081,7 +1081,7 @@
highbd_transpose(src, 8, dst, p, 1);
}
-void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -1097,7 +1097,7 @@
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
+ vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1108,7 +1108,7 @@
highbd_transpose(src, 16, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1125,7 +1125,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
+ vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@@ -1135,7 +1135,7 @@
highbd_transpose(src, 8, dst, p, 1);
}
-void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -1151,7 +1151,7 @@
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
+ vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1163,7 +1163,7 @@
highbd_transpose(src, 16, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1192,7 +1192,7 @@
highbd_transpose(src, 8, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
+void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
int p,
const uint8_t *blimit,
const uint8_t *limit,
diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
similarity index 97%
rename from vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
rename to vpx_dsp/x86/highbd_quantize_intrin_sse2.c
index 0174cfe..06c748d 100644
--- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -10,8 +10,9 @@
#include <emmintrin.h>
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
#if CONFIG_VP9_HIGHBITDEPTH
// from vp9_idct.h: typedef int32_t tran_low_t;
diff --git a/vpx_dsp/x86/loopfilter_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c
index 36f59c9..23a97dd 100644
--- a/vpx_dsp/x86/loopfilter_avx2.c
+++ b/vpx_dsp/x86/loopfilter_avx2.c
@@ -103,7 +103,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ /* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -515,7 +515,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ /* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -976,7 +976,7 @@
}
}
-void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh, int count) {
if (count == 1)
diff --git a/vpx_dsp/x86/loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm
index f5f7d5a..b9c18b6 100644
--- a/vpx_dsp/x86/loopfilter_mmx.asm
+++ b/vpx_dsp/x86/loopfilter_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
-;void vp9_lpf_horizontal_4_mmx
+;void vpx_lpf_horizontal_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -21,8 +21,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
-sym(vp9_lpf_horizontal_4_mmx):
+global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
+sym(vpx_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -224,7 +224,7 @@
ret
-;void vp9_lpf_vertical_4_mmx
+;void vpx_lpf_vertical_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -233,8 +233,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_lpf_vertical_4_mmx) PRIVATE
-sym(vp9_lpf_vertical_4_mmx):
+global sym(vpx_lpf_vertical_4_mmx) PRIVATE
+sym(vpx_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
diff --git a/vpx_dsp/x86/loopfilter_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c
index 977f46b..ed10127 100644
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -100,7 +100,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -495,7 +495,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
filter2 = _mm_adds_epi8(filt, t3);
@@ -717,7 +717,7 @@
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@@ -727,7 +727,7 @@
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
-void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@@ -874,7 +874,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -943,7 +943,7 @@
}
}
-void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
+void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -1115,7 +1115,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -1190,7 +1190,7 @@
}
}
-void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
const unsigned char *_blimit0,
const unsigned char *_limit0,
const unsigned char *_thresh0,
@@ -1286,7 +1286,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -1464,7 +1464,7 @@
} while (++idx8x8 < num_8x8_to_transpose);
}
-void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@@ -1478,7 +1478,7 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1489,7 +1489,7 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
@@ -1505,7 +1505,7 @@
transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
+ vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1514,7 +1514,7 @@
transpose(src, 8, dst, p, 1);
}
-void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@@ -1528,7 +1528,7 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1540,7 +1540,7 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh) {
@@ -1568,7 +1568,7 @@
transpose(src, 8, dst, p, 2);
}
-void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) {
DECLARE_ALIGNED(16, unsigned char, t_dst[256]);
diff --git a/vpx_dsp/x86/quantize_sse2.c b/vpx_dsp/x86/quantize_sse2.c
new file mode 100644
index 0000000..8d51aeb
--- /dev/null
+++ b/vpx_dsp/x86/quantize_sse2.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t* zbin_ptr,
+ const int16_t* round_ptr, const int16_t* quant_ptr,
+ const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
+ int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan_ptr,
+ const int16_t* iscan_ptr) {
+ __m128i zero;
+ (void)scan_ptr;
+
+ coeff_ptr += n_coeffs;
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+ if (!skip_block) {
+ __m128i eob;
+ __m128i zbin;
+ __m128i round, quant, dequant, shift;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ __m128i pw_1;
+ zbin = _mm_load_si128((const __m128i*)zbin_ptr);
+ round = _mm_load_si128((const __m128i*)round_ptr);
+ quant = _mm_load_si128((const __m128i*)quant_ptr);
+ pw_1 = _mm_set1_epi16(1);
+ zbin = _mm_sub_epi16(zbin, pw_1);
+ dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+ shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+ // Do DC and first 15 AC
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ shift = _mm_unpackhi_epi64(shift, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/vpx_dsp/x86/quantize_ssse3_x86_64.asm b/vpx_dsp/x86/quantize_ssse3_x86_64.asm
new file mode 100644
index 0000000..3784d9d
--- /dev/null
+++ b/vpx_dsp/x86/quantize_ssse3_x86_64.asm
@@ -0,0 +1,216 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+
+SECTION .text
+
+; TODO(yunqingwang)fix quantize_b code for skip=1 case.
+%macro QUANTIZE_FN 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, \
+ eob, scan, iscan
+ cmp dword skipm, 0
+ jne .blank
+
+ ; actual quantize loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m0, [zbinq] ; m0 = zbin
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+%ifidn %1, b_32x32
+ pcmpeqw m5, m5
+ psrlw m5, 15
+ paddw m0, m5
+ paddw m1, m5
+ psrlw m0, 1 ; m0 = (m0 + 1) / 2
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ mova m3, [r2q] ; m3 = dequant
+ psubw m0, [pw_1]
+ mov r2, shiftmp
+ mov r3, qcoeffmp
+ mova m4, [r2] ; m4 = shift
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, b_32x32
+ psllw m4, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea iscanq, [ iscanq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ punpckhqdq m0, m0
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m8, m6 ; m8 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m8, m4 ; m8 = m8*qsh>>16
+ punpckhqdq m4, m4
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m8, m7
+ pand m13, m12
+ mova [qcoeffq+ncoeffq*2+ 0], m8
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m8
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jz .accumulate_eob
+
+.ac_only_loop:
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+%ifidn %1, b_32x32
+ pmovmskb r6d, m7
+ pmovmskb r2d, m12
+ or r6, r2
+ jz .skip_iter
+%endif
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m14, m6 ; m14 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m14, m4 ; m14 = m14*qsh>>16
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m14, m7
+ pand m13, m12
+ mova [qcoeffq+ncoeffq*2+ 0], m14
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; dqc[i] = qc[i] * q
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m14
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+%ifidn %1, b_32x32
+ jmp .accumulate_eob
+.skip_iter:
+ mova [qcoeffq+ncoeffq*2+ 0], m5
+ mova [qcoeffq+ncoeffq*2+16], m5
+ mova [dqcoeffq+ncoeffq*2+ 0], m5
+ mova [dqcoeffq+ncoeffq*2+16], m5
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+%endif
+
+.accumulate_eob:
+ ; horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ pextrw r6, m8, 0
+ mov [r2], r6
+ RET
+
+ ; skip-block, i.e. just write all zeroes
+.blank:
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+ DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ neg ncoeffq
+ pxor m7, m7
+.blank_loop:
+ mova [dqcoeffq+ncoeffq*2+ 0], m7
+ mova [dqcoeffq+ncoeffq*2+16], m7
+ mova [qcoeffq+ncoeffq*2+ 0], m7
+ mova [qcoeffq+ncoeffq*2+16], m7
+ add ncoeffq, mmsize
+ jl .blank_loop
+ mov word [eobq], 0
+ RET
+%endmacro
+
+INIT_XMM ssse3
+QUANTIZE_FN b, 7
+QUANTIZE_FN b_32x32, 7