Merge "mips msa vp8 sixtap filter optimization"
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index db7e961..c1c5d77 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -902,116 +902,6 @@
}
}
-DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
- { 0, 0, 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 128},
- { 0, 0, 0, 128},
- { 0, 0, 128},
- { 0, 128},
- { 128},
- { 0, 0, 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 0, 128},
- { 0, 0, 0, 0, 128},
- { 0, 0, 0, 128},
- { 0, 0, 128},
- { 0, 128},
- { 128}
-};
-
-/* This test exercises the horizontal and vertical filter functions. */
-TEST_P(ConvolveTest, ChangeFilterWorks) {
- uint8_t* const in = input();
- uint8_t* const out = output();
-
- /* Assume that the first input sample is at the 8/16th position. */
- const int kInitialSubPelOffset = 8;
-
- /* Filters are 8-tap, so the first filter tap will be applied to the pixel
- * at position -3 with respect to the current filtering position. Since
- * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
- * which is non-zero only in the last tap. So, applying the filter at the
- * current input position will result in an output equal to the pixel at
- * offset +4 (-3 + 7) with respect to the current filtering position.
- */
- const int kPixelSelected = 4;
-
- /* Assume that each output pixel requires us to step on by 17/16th pixels in
- * the input.
- */
- const int kInputPixelStep = 17;
-
- /* The filters are setup in such a way that the expected output produces
- * sets of 8 identical output samples. As the filter position moves to the
- * next 1/16th pixel position the only active (=128) filter tap moves one
- * position to the left, resulting in the same input pixel being replicated
- * in to the output for 8 consecutive samples. After each set of 8 positions
- * the filters select a different input pixel. kFilterPeriodAdjust below
- * computes which input pixel is written to the output for a specified
- * x or y position.
- */
-
- /* Test the horizontal filter. */
- ASM_REGISTER_STATE_CHECK(
- UUT_->h8_(in, kInputStride, out, kOutputStride,
- kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep, NULL, 0, Width(), Height()));
-
- for (int x = 0; x < Width(); ++x) {
- const int kFilterPeriodAdjust = (x >> 3) << 3;
- const int ref_x =
- kPixelSelected + ((kInitialSubPelOffset
- + kFilterPeriodAdjust * kInputPixelStep)
- >> SUBPEL_BITS);
- ASSERT_EQ(lookup(in, ref_x), lookup(out, x))
- << "x == " << x << "width = " << Width();
- }
-
- /* Test the vertical filter. */
- ASM_REGISTER_STATE_CHECK(
- UUT_->v8_(in, kInputStride, out, kOutputStride,
- NULL, 0, kChangeFilters[kInitialSubPelOffset],
- kInputPixelStep, Width(), Height()));
-
- for (int y = 0; y < Height(); ++y) {
- const int kFilterPeriodAdjust = (y >> 3) << 3;
- const int ref_y =
- kPixelSelected + ((kInitialSubPelOffset
- + kFilterPeriodAdjust * kInputPixelStep)
- >> SUBPEL_BITS);
- ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride))
- << "y == " << y;
- }
-
- /* Test the horizontal and vertical filters in combination. */
- ASM_REGISTER_STATE_CHECK(
- UUT_->hv8_(in, kInputStride, out, kOutputStride,
- kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
- kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
- Width(), Height()));
-
- for (int y = 0; y < Height(); ++y) {
- const int kFilterPeriodAdjustY = (y >> 3) << 3;
- const int ref_y =
- kPixelSelected + ((kInitialSubPelOffset
- + kFilterPeriodAdjustY * kInputPixelStep)
- >> SUBPEL_BITS);
- for (int x = 0; x < Width(); ++x) {
- const int kFilterPeriodAdjustX = (x >> 3) << 3;
- const int ref_x =
- kPixelSelected + ((kInitialSubPelOffset
- + kFilterPeriodAdjustX * kInputPixelStep)
- >> SUBPEL_BITS);
-
- ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x),
- lookup(out, y * kOutputStride + x))
- << "x == " << x << ", y == " << y;
- }
- }
-}
-
/* This test exercises that enough rows and columns are filtered with every
possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest, CheckScalingFiltering) {
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 66ca4bb..0b2ecb5 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -19,6 +19,7 @@
#include "test/util.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
@@ -901,14 +902,6 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
@@ -929,7 +922,8 @@
&idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+// TODO(jingning) Re-enable the mips/msa unit test.
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE && 0
INSTANTIATE_TEST_CASE_P(
MSA, Trans16x16DCT,
::testing::Values(
diff --git a/test/decode_api_test.cc b/test/decode_api_test.cc
index 42ac13f..32be1f4 100644
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -129,8 +129,13 @@
vpx_codec_ctx_t dec;
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
const uint32_t frame_size = static_cast<uint32_t>(video.frame_size());
+#if CONFIG_VP9_HIGHBITDEPTH
EXPECT_EQ(VPX_CODEC_MEM_ERROR,
vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#else
+ EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM,
+ vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#endif
vpx_codec_iter_t iter = NULL;
EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 4ee4ad4..b5733c1 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -19,6 +19,7 @@
#include "test/util.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
@@ -531,21 +532,14 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
- make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+// TODO(jingning) Re-enable the mips/msa unit test.
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE && 0
INSTANTIATE_TEST_CASE_P(
MSA, Trans4x4DCT,
::testing::Values(
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 61bfe50..6348057 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -19,6 +19,7 @@
#include "test/util.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
@@ -772,7 +773,8 @@
VPX_BITS_8)));
#endif
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+// TODO(jingning) Re-enable the mips/msa unit test.
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE && 0
INSTANTIATE_TEST_CASE_P(
MSA, FwdTrans8x8DCT,
::testing::Values(
diff --git a/test/invalid_file_test.cc b/test/invalid_file_test.cc
index 2cbbc6b..1b5ef5c 100644
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -112,7 +112,9 @@
const DecodeParam kVP9InvalidFileTests[] = {
{1, "invalid-vp90-02-v2.webm"},
+#if CONFIG_VP9_HIGHBITDEPTH
{1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
+#endif
{1, "invalid-vp90-03-v3.webm"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
{1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index ba51309..96aaa23 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -19,7 +19,7 @@
#include "test/util.h"
#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx/vpx_integer.h"
@@ -60,49 +60,49 @@
void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
}
void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
- vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
+ vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
}
#else
void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
@@ -114,25 +114,25 @@
void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON_ASM
@@ -141,13 +141,13 @@
void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
}
void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
- vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
}
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
@@ -534,46 +534,46 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 8, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 10, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 10, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 10, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 10, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
- &vp9_highbd_lpf_horizontal_4_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
- &vp9_highbd_lpf_vertical_4_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
- &vp9_highbd_lpf_horizontal_8_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 12, 1),
- make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
- &vp9_highbd_lpf_horizontal_16_c, 12, 2),
- make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
- &vp9_highbd_lpf_vertical_8_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+ &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+ &vpx_highbd_lpf_vertical_4_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+ &vpx_highbd_lpf_horizontal_8_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 12, 1),
+ make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+ &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+ make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+ &vpx_highbd_lpf_vertical_8_c, 12, 1),
make_tuple(&wrapper_vertical_16_sse2,
&wrapper_vertical_16_c, 12, 1),
make_tuple(&wrapper_vertical_16_dual_sse2,
@@ -586,10 +586,10 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -598,8 +598,8 @@
INSTANTIATE_TEST_CASE_P(
AVX2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
+ make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
2)));
#endif
@@ -608,42 +608,42 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 8),
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 10),
- make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
- &vp9_highbd_lpf_horizontal_4_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
- &vp9_highbd_lpf_horizontal_8_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
- &vp9_highbd_lpf_vertical_4_dual_c, 12),
- make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
- &vp9_highbd_lpf_vertical_8_dual_c, 12)));
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+ &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+ &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+ &vpx_highbd_lpf_vertical_4_dual_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+ &vpx_highbd_lpf_vertical_8_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_sse2,
- &vp9_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_sse2,
- &vp9_lpf_vertical_8_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_sse2,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_sse2,
+ &vpx_lpf_vertical_8_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -657,36 +657,36 @@
#if HAVE_NEON_ASM
// Using #if inside the macro is unsupported on MSVS but the tests are not
// currently built for MSVS with ARM and NEON.
- make_tuple(&vp9_lpf_horizontal_16_neon,
- &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_neon,
- &vp9_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_horizontal_16_neon,
+ &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_neon,
+ &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&wrapper_vertical_16_neon,
&wrapper_vertical_16_c, 8, 1),
make_tuple(&wrapper_vertical_16_dual_neon,
&wrapper_vertical_16_dual_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_8_neon,
- &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_vertical_8_neon,
- &vp9_lpf_vertical_8_c, 8, 1),
#endif // HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_4_neon,
- &vp9_lpf_horizontal_4_c, 8, 1),
- make_tuple(&vp9_lpf_vertical_4_neon,
- &vp9_lpf_vertical_4_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_8_neon,
+ &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_vertical_8_neon,
+ &vpx_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_4_neon,
+ &vpx_lpf_horizontal_4_c, 8, 1),
+ make_tuple(&vpx_lpf_vertical_4_neon,
+ &vpx_lpf_vertical_4_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
#if HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_8_dual_neon,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_neon,
- &vp9_lpf_vertical_8_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_neon,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_neon,
+ &vpx_lpf_vertical_8_dual_c, 8),
#endif // HAVE_NEON_ASM
- make_tuple(&vp9_lpf_horizontal_4_dual_neon,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_neon,
- &vp9_lpf_vertical_4_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_neon,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_neon,
+ &vpx_lpf_vertical_4_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
@@ -694,23 +694,23 @@
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
::testing::Values(
- make_tuple(&vp9_lpf_horizontal_4_dual_msa,
- &vp9_lpf_horizontal_4_dual_c, 8),
- make_tuple(&vp9_lpf_horizontal_8_dual_msa,
- &vp9_lpf_horizontal_8_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_4_dual_msa,
- &vp9_lpf_vertical_4_dual_c, 8),
- make_tuple(&vp9_lpf_vertical_8_dual_msa,
- &vp9_lpf_vertical_8_dual_c, 8)));
+ make_tuple(&vpx_lpf_horizontal_4_dual_msa,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_msa,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_msa,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_msa,
+ &vpx_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 3e93ae8..8d19aff 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -19,6 +19,7 @@
#include "test/util.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_integer.h"
diff --git a/test/test.mk b/test/test.mk
index a8a365e..8ecc856 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -91,6 +91,7 @@
## shared library builds don't make these functions accessible.
##
ifeq ($(CONFIG_SHARED),)
+LIBVPX_TEST_SRCS-$(CONFIG_VP9) += lpf_8_test.cc
## VP8
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
@@ -142,7 +143,6 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 64095bc..74775bb 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1994,5 +1994,47 @@
make_tuple(5, 6, subpel_variance32x64_msa, 0),
make_tuple(6, 5, subpel_variance64x32_msa, 0),
make_tuple(6, 6, subpel_variance64x64_msa, 0)));
+
+const SubpixAvgVarMxNFunc subpel_avg_variance64x64_msa =
+ vpx_sub_pixel_avg_variance64x64_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance64x32_msa =
+ vpx_sub_pixel_avg_variance64x32_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance32x64_msa =
+ vpx_sub_pixel_avg_variance32x64_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance32x32_msa =
+ vpx_sub_pixel_avg_variance32x32_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance32x16_msa =
+ vpx_sub_pixel_avg_variance32x16_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance16x32_msa =
+ vpx_sub_pixel_avg_variance16x32_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance16x16_msa =
+ vpx_sub_pixel_avg_variance16x16_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance16x8_msa =
+ vpx_sub_pixel_avg_variance16x8_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance8x16_msa =
+ vpx_sub_pixel_avg_variance8x16_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance8x8_msa =
+ vpx_sub_pixel_avg_variance8x8_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance8x4_msa =
+ vpx_sub_pixel_avg_variance8x4_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance4x8_msa =
+ vpx_sub_pixel_avg_variance4x8_msa;
+const SubpixAvgVarMxNFunc subpel_avg_variance4x4_msa =
+ vpx_sub_pixel_avg_variance4x4_msa;
+INSTANTIATE_TEST_CASE_P(
+ MSA, VpxSubpelAvgVarianceTest,
+ ::testing::Values(make_tuple(6, 6, subpel_avg_variance64x64_msa, 0),
+ make_tuple(6, 5, subpel_avg_variance64x32_msa, 0),
+ make_tuple(5, 6, subpel_avg_variance32x64_msa, 0),
+ make_tuple(5, 5, subpel_avg_variance32x32_msa, 0),
+ make_tuple(5, 4, subpel_avg_variance32x16_msa, 0),
+ make_tuple(4, 5, subpel_avg_variance16x32_msa, 0),
+ make_tuple(4, 4, subpel_avg_variance16x16_msa, 0),
+ make_tuple(4, 3, subpel_avg_variance16x8_msa, 0),
+ make_tuple(3, 4, subpel_avg_variance8x16_msa, 0),
+ make_tuple(3, 3, subpel_avg_variance8x8_msa, 0),
+ make_tuple(3, 2, subpel_avg_variance8x4_msa, 0),
+ make_tuple(2, 3, subpel_avg_variance4x8_msa, 0),
+ make_tuple(2, 2, subpel_avg_variance4x4_msa, 0)));
#endif // HAVE_MSA
} // namespace
diff --git a/test/vp9_arf_freq_test.cc b/test/vp9_arf_freq_test.cc
index 92c236f..07968bc 100644
--- a/test/vp9_arf_freq_test.cc
+++ b/test/vp9_arf_freq_test.cc
@@ -21,8 +21,8 @@
const unsigned int kFrames = 100;
const int kBitrate = 500;
-#define ARF_NOT_SEEN 1000001
-#define ARF_SEEN_ONCE 1000000
+#define ARF_NOT_SEEN 1000001
+#define ARF_SEEN_ONCE 1000000
typedef struct {
const char *filename;
@@ -108,7 +108,7 @@
}
virtual void BeginPassHook(unsigned int) {
- min_arf_ = ARF_NOT_SEEN;
+ min_run_ = ARF_NOT_SEEN;
run_of_visible_frames_ = 0;
}
@@ -137,15 +137,15 @@
if (frames == 1) {
run_of_visible_frames_++;
} else if (frames == 2) {
- if (min_arf_ == ARF_NOT_SEEN) {
- min_arf_ = ARF_SEEN_ONCE;
- } else if (min_arf_ == ARF_SEEN_ONCE ||
- run_of_visible_frames_ < min_arf_) {
- min_arf_ = run_of_visible_frames_;
+ if (min_run_ == ARF_NOT_SEEN) {
+ min_run_ = ARF_SEEN_ONCE;
+ } else if (min_run_ == ARF_SEEN_ONCE ||
+ run_of_visible_frames_ < min_run_) {
+ min_run_ = run_of_visible_frames_;
}
run_of_visible_frames_ = 1;
} else {
- min_arf_ = 0;
+ min_run_ = 0;
run_of_visible_frames_ = 1;
}
}
@@ -166,8 +166,8 @@
}
}
- int GetMinArfDistance() const {
- return min_arf_;
+ int GetMinVisibleRun() const {
+ return min_run_;
}
int GetMinArfDistanceRequested() const {
@@ -185,7 +185,7 @@
private:
int min_arf_requested_;
- int min_arf_;
+ int min_run_;
int run_of_visible_frames_;
};
@@ -214,9 +214,10 @@
}
ASSERT_NO_FATAL_FAILURE(RunLoop(video));
- const int min_arf_dist = GetMinArfDistance();
+ const int min_run = GetMinVisibleRun();
const int min_arf_dist_requested = GetMinArfDistanceRequested();
- if (min_arf_dist != ARF_NOT_SEEN && min_arf_dist != ARF_SEEN_ONCE) {
+ if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) {
+ const int min_arf_dist = min_run + 1;
EXPECT_GE(min_arf_dist, min_arf_dist_requested);
}
delete(video);
diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc
index 856b4c1..09c2069 100644
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -291,6 +291,12 @@
make_tuple(16, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
make_tuple(32, &vp9_int_pro_row_neon, &vp9_int_pro_row_c),
make_tuple(64, &vp9_int_pro_row_neon, &vp9_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+ NEON, IntProColTest, ::testing::Values(
+ make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
+ make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
+ make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c)));
#endif
#if HAVE_MSA
diff --git a/test/vp9_boolcoder_test.cc b/test/vp9_boolcoder_test.cc
index c7f0cd8..b917429 100644
--- a/test/vp9_boolcoder_test.cc
+++ b/test/vp9_boolcoder_test.cc
@@ -14,11 +14,11 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vp9/decoder/vp9_reader.h"
-#include "vp9/encoder/vp9_writer.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/bitwriter.h"
#include "test/acm_random.h"
-#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
@@ -50,9 +50,9 @@
const int random_seed = 6432;
const int kBufferSize = 10000;
ACMRandom bit_rnd(random_seed);
- vp9_writer bw;
+ vpx_writer bw;
uint8_t bw_buffer[kBufferSize];
- vp9_start_encode(&bw, bw_buffer);
+ vpx_start_encode(&bw, bw_buffer);
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
for (int i = 0; i < kBitsToTest; ++i) {
@@ -61,16 +61,16 @@
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
- vp9_write(&bw, bit, static_cast<int>(probas[i]));
+ vpx_write(&bw, bit, static_cast<int>(probas[i]));
}
- vp9_stop_encode(&bw);
+ vpx_stop_encode(&bw);
// First bit should be zero
GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);
- vp9_reader br;
- vp9_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
+ vpx_reader br;
+ vpx_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL);
bit_rnd.Reset(random_seed);
for (int i = 0; i < kBitsToTest; ++i) {
if (bit_method == 2) {
@@ -78,7 +78,7 @@
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
- GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
+ GTEST_ASSERT_EQ(vpx_read(&br, probas[i]), bit)
<< "pos: " << i << " / " << kBitsToTest
<< " bit_method: " << bit_method
<< " method: " << method;
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index 943c00b..0e09652 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -19,7 +19,7 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index 6ebea9f..cd07a56 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -15,14 +15,13 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
+#include "vpx_dsp/mips/common_dspr2.h"
#ifdef __cplusplus
extern "C" {
#endif
#if HAVE_DSPR2
-#define CROP_WIDTH 512
extern uint8_t *vp9_ff_cropTbl;
#define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input) ({ \
@@ -51,40 +50,6 @@
); \
out; })
-static INLINE void vp9_prefetch_load(const unsigned char *src) {
- __asm__ __volatile__ (
- "pref 0, 0(%[src]) \n\t"
- :
- : [src] "r" (src)
- );
-}
-
-/* prefetch data for store */
-static INLINE void vp9_prefetch_store(unsigned char *dst) {
- __asm__ __volatile__ (
- "pref 1, 0(%[dst]) \n\t"
- :
- : [dst] "r" (dst)
- );
-}
-
-static INLINE void vp9_prefetch_load_streamed(const unsigned char *src) {
- __asm__ __volatile__ (
- "pref 4, 0(%[src]) \n\t"
- :
- : [src] "r" (src)
- );
-}
-
-/* prefetch data for store */
-static INLINE void vp9_prefetch_store_streamed(unsigned char *dst) {
- __asm__ __volatile__ (
- "pref 5, 0(%[dst]) \n\t"
- :
- : [dst] "r" (dst)
- );
-}
-
void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride);
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
index 91d62bc..aad7c45 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_dspr2.c
@@ -44,7 +44,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -148,8 +148,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -245,7 +245,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -257,7 +257,7 @@
filter_y, w, h);
break;
case 64:
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_bi_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
index 148b20f..bc60e93 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_avg_horiz_dspr2.c
@@ -40,9 +40,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -135,9 +135,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -290,9 +290,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -539,11 +539,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -781,9 +781,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -807,8 +807,8 @@
filter_x, h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_bi_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
index 92644f2..b714f9a 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_dspr2.c
@@ -41,8 +41,8 @@
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -132,8 +132,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@@ -272,8 +272,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@@ -504,9 +504,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@@ -747,8 +747,8 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
+ prefetch_load(src);
+ prefetch_load(src + 32);
switch (w) {
case 4:
@@ -769,7 +769,7 @@
(w/16));
break;
case 64:
- vp9_prefetch_load(src + 32);
+ prefetch_load(src + 32);
convolve_bi_horiz_64_transposed_dspr2(src, src_stride,
dst, dst_stride,
filter, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
index 1debdb4..27ea100 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_horiz_dspr2.c
@@ -39,9 +39,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -122,9 +122,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -252,9 +252,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -459,11 +459,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -651,7 +651,7 @@
if (16 == x_step_q4) {
uint32_t pos = 38;
- vp9_prefetch_load((const uint8_t *)filter_x);
+ prefetch_load((const uint8_t *)filter_x);
/* bit positon for extract from acc */
__asm__ __volatile__ (
@@ -661,9 +661,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -687,8 +687,8 @@
filter_x, (int32_t)h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
index bf01f11..32f5fb6 100644
--- a/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve2_vert_dspr2.c
@@ -44,7 +44,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -141,7 +141,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -230,7 +230,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4 :
@@ -242,7 +242,7 @@
filter_y, w, h);
break;
case 64 :
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_bi_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
index 1742279..d9cbfe6 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_dspr2.c
@@ -49,7 +49,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -210,8 +210,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -372,7 +372,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -384,7 +384,7 @@
filter_y, w, h);
break;
case 64:
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_avg_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
@@ -452,17 +452,17 @@
uint32_t tp3, tp4, tn2;
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
/* 1 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -482,9 +482,9 @@
case 8:
/* 2 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -509,9 +509,9 @@
case 16:
/* 4 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -544,9 +544,9 @@
case 32:
/* 8 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -593,16 +593,16 @@
}
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_load(src + src_stride + 64);
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride + 64);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
index 69da1cf..cdb8312 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_avg_horiz_dspr2.c
@@ -43,9 +43,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -165,9 +165,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -357,9 +357,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -668,11 +668,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -985,9 +985,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -1011,8 +1011,8 @@
filter_x, h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_avg_horiz_64_dspr2(src, src_stride,
dst, dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
index 58b50d2..a1309d1 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_dspr2.c
@@ -60,8 +60,8 @@
for (y = h; y--;) {
dst_ptr = dst;
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -176,8 +176,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
dst_ptr = dst;
odd_dst = (dst_ptr + dst_stride);
@@ -355,8 +355,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
src = src_ptr;
dst = dst_ptr;
@@ -645,9 +645,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
src = src_ptr;
dst = dst_ptr;
@@ -993,8 +993,8 @@
src -= (src_stride * 3 + 3);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
+ prefetch_load(src);
+ prefetch_load(src + 32);
switch (w) {
case 4:
@@ -1015,7 +1015,7 @@
(w/16));
break;
case 64:
- vp9_prefetch_load(src + 32);
+ prefetch_load(src + 32);
convolve_horiz_64_transposed_dspr2(src, src_stride,
temp, intermediate_height,
filter_x, intermediate_height);
@@ -1078,9 +1078,9 @@
int x, y;
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -1089,9 +1089,9 @@
/* 1 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], (%[src]) \n\t"
@@ -1112,9 +1112,9 @@
/* 2 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1137,9 +1137,9 @@
/* 4 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1169,9 +1169,9 @@
/* 8 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -1209,16 +1209,16 @@
uint32_t tp1, tp2, tp3, tp4;
uint32_t tp5, tp6, tp7, tp8;
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
/* 16 word storage */
for (y = h; y--; ) {
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_load(src + src_stride + 64);
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_load(src + src_stride + 64);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
index 0303896..d0e3095 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_horiz_dspr2.c
@@ -43,9 +43,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -154,9 +154,9 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_load(src + src_stride);
- vp9_prefetch_load(src + src_stride + 32);
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_load(src + src_stride);
+ prefetch_load(src + src_stride + 32);
+ prefetch_store(dst + dst_stride);
__asm__ __volatile__ (
"ulw %[tp1], 0(%[src]) \n\t"
@@ -323,9 +323,9 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_store(dst_ptr + dst_stride);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_store(dst_ptr + dst_stride);
for (c = 0; c < count; c++) {
__asm__ __volatile__ (
@@ -593,11 +593,11 @@
dst = dst_ptr;
/* prefetch data to cache memory */
- vp9_prefetch_load(src_ptr + src_stride);
- vp9_prefetch_load(src_ptr + src_stride + 32);
- vp9_prefetch_load(src_ptr + src_stride + 64);
- vp9_prefetch_store(dst_ptr + dst_stride);
- vp9_prefetch_store(dst_ptr + dst_stride + 32);
+ prefetch_load(src_ptr + src_stride);
+ prefetch_load(src_ptr + src_stride + 32);
+ prefetch_load(src_ptr + src_stride + 64);
+ prefetch_store(dst_ptr + dst_stride);
+ prefetch_store(dst_ptr + dst_stride + 32);
for (c = 0; c < 4; c++) {
__asm__ __volatile__ (
@@ -859,7 +859,7 @@
if (16 == x_step_q4) {
uint32_t pos = 38;
- vp9_prefetch_load((const uint8_t *)filter_x);
+ prefetch_load((const uint8_t *)filter_x);
src -= 3;
/* bit positon for extract from acc */
@@ -870,9 +870,9 @@
);
/* prefetch data to cache memory */
- vp9_prefetch_load(src);
- vp9_prefetch_load(src + 32);
- vp9_prefetch_store(dst);
+ prefetch_load(src);
+ prefetch_load(src + 32);
+ prefetch_store(dst);
switch (w) {
case 4:
@@ -896,8 +896,8 @@
filter_x, (int32_t)h, 2);
break;
case 64:
- vp9_prefetch_load(src + 64);
- vp9_prefetch_store(dst + 32);
+ prefetch_load(src + 64);
+ prefetch_store(dst + 32);
convolve_horiz_64_dspr2(src, (int32_t)src_stride,
dst, (int32_t)dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
index 0930bb3..98acb81 100644
--- a/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_convolve8_vert_dspr2.c
@@ -49,7 +49,7 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride);
for (x = 0; x < w; x += 4) {
src_ptr = src + x;
@@ -203,8 +203,8 @@
for (y = h; y--;) {
/* prefetch data to cache memory */
- vp9_prefetch_store(dst + dst_stride);
- vp9_prefetch_store(dst + dst_stride + 32);
+ prefetch_store(dst + dst_stride);
+ prefetch_store(dst + dst_stride + 32);
for (x = 0; x < 64; x += 4) {
src_ptr = src + x;
@@ -358,7 +358,7 @@
: [pos] "r" (pos)
);
- vp9_prefetch_store(dst);
+ prefetch_store(dst);
switch (w) {
case 4 :
@@ -370,7 +370,7 @@
filter_y, w, h);
break;
case 64 :
- vp9_prefetch_store(dst + 32);
+ prefetch_store(dst + 32);
convolve_vert_64_dspr2(src, src_stride,
dst, dst_stride,
filter_y, h);
diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 202d913..10a24f3 100644
--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -34,7 +34,7 @@
for (i = no_rows; i--; ) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
__asm__ __volatile__ (
"lh %[load1], 0(%[input]) \n\t"
@@ -421,14 +421,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 16; ++i) {
dest_pix = (dest + i);
@@ -1124,7 +1124,7 @@
for (i = 0; i < 16; ++i) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;
@@ -1144,7 +1144,7 @@
for (i = 0; i < 16; ++i) {
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 16));
+ prefetch_load((const uint8_t *)(input + 16));
iadst16(input, outptr);
input += 16;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 7ceebb6..a256145 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -44,14 +44,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 32; ++i) {
dest_pix = dest + i;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index 74a90b0..dd18831 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -96,8 +96,8 @@
}
/* prefetch row */
- vp9_prefetch_load((const uint8_t *)(input + 32));
- vp9_prefetch_load((const uint8_t *)(input + 48));
+ prefetch_load((const uint8_t *)(input + 32));
+ prefetch_load((const uint8_t *)(input + 48));
__asm__ __volatile__ (
"lh %[load1], 2(%[input]) \n\t"
diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 280190a..4e31f9f 100644
--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -115,14 +115,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 4; ++i) {
dest_pix = (dest + i);
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index 04d2266..6898d56 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -211,14 +211,14 @@
uint8_t *cm = vp9_ff_cropTbl;
/* prefetch vp9_ff_cropTbl */
- vp9_prefetch_load(vp9_ff_cropTbl);
- vp9_prefetch_load(vp9_ff_cropTbl + 32);
- vp9_prefetch_load(vp9_ff_cropTbl + 64);
- vp9_prefetch_load(vp9_ff_cropTbl + 96);
- vp9_prefetch_load(vp9_ff_cropTbl + 128);
- vp9_prefetch_load(vp9_ff_cropTbl + 160);
- vp9_prefetch_load(vp9_ff_cropTbl + 192);
- vp9_prefetch_load(vp9_ff_cropTbl + 224);
+ prefetch_load(vp9_ff_cropTbl);
+ prefetch_load(vp9_ff_cropTbl + 32);
+ prefetch_load(vp9_ff_cropTbl + 64);
+ prefetch_load(vp9_ff_cropTbl + 96);
+ prefetch_load(vp9_ff_cropTbl + 128);
+ prefetch_load(vp9_ff_cropTbl + 160);
+ prefetch_load(vp9_ff_cropTbl + 192);
+ prefetch_load(vp9_ff_cropTbl + 224);
for (i = 0; i < 8; ++i) {
dest_pix = (dest + i);
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 8054cdf..d776b44 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -166,7 +166,7 @@
int up_available;
int left_available;
- const vp9_prob (*partition_probs)[PARTITION_TYPES - 1];
+ const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
/* Distance of MB away from frame edges */
int mb_to_left_edge;
@@ -262,7 +262,7 @@
}
}
-static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
+static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi,
const MODE_INFO *above_mi,
const MODE_INFO *left_mi,
int block) {
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 9c2d779..42c3a09 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -16,6 +16,7 @@
#include <assert.h>
#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_systemdependent.h"
@@ -24,9 +25,6 @@
extern "C" {
#endif
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
// Only need this for fixed-size arrays, for structs just assign.
#define vp9_copy(dest, src) { \
assert(sizeof(dest) == sizeof(src)); \
@@ -42,18 +40,6 @@
#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest))
#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest))
-static INLINE uint8_t clip_pixel(int val) {
- return (val > 255) ? 255 : (val < 0) ? 0 : val;
-}
-
-static INLINE int clamp(int value, int low, int high) {
- return value < low ? low : (value > high ? high : value);
-}
-
-static INLINE double fclamp(double value, double low, double high) {
- return value < low ? low : (value > high ? high : value);
-}
-
static INLINE int get_unsigned_bits(unsigned int num_values) {
return num_values > 0 ? get_msb(num_values) + 1 : 0;
}
@@ -70,20 +56,6 @@
return (uint16_t)clamp(val, 0, 4095);
}
}
-
-// Note:
-// tran_low_t is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int64_t tran_high_t;
-typedef int32_t tran_low_t;
-
-#else
-
-// Note:
-// tran_low_t is the datatype used for final transform coefficients.
-// tran_high_t is the datatype used for intermediate transform stages.
-typedef int32_t tran_high_t;
-typedef int16_t tran_low_t;
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_DEBUG
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index ad6c04b..579857b 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -16,7 +16,7 @@
#include "vpx/vpx_integer.h"
// Unconstrained Node Tree
-const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
2, 6, // 0 = LOW_VAL
-TWO_TOKEN, 4, // 1 = TWO
-THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE
@@ -27,30 +27,30 @@
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE
};
-const vp9_prob vp9_cat1_prob[] = { 159 };
-const vp9_prob vp9_cat2_prob[] = { 165, 145 };
-const vp9_prob vp9_cat3_prob[] = { 173, 148, 140 };
-const vp9_prob vp9_cat4_prob[] = { 176, 155, 140, 135 };
-const vp9_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 };
-const vp9_prob vp9_cat6_prob[] = {
+const vpx_prob vp9_cat1_prob[] = { 159 };
+const vpx_prob vp9_cat2_prob[] = { 165, 145 };
+const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 };
+const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 };
+const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp9_cat6_prob[] = {
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
#if CONFIG_VP9_HIGHBITDEPTH
-const vp9_prob vp9_cat1_prob_high10[] = { 159 };
-const vp9_prob vp9_cat2_prob_high10[] = { 165, 145 };
-const vp9_prob vp9_cat3_prob_high10[] = { 173, 148, 140 };
-const vp9_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 };
-const vp9_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
-const vp9_prob vp9_cat6_prob_high10[] = {
+const vpx_prob vp9_cat1_prob_high10[] = { 159 };
+const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 };
+const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 };
+const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 };
+const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp9_cat6_prob_high10[] = {
255, 255, 254, 254, 254, 252, 249, 243,
230, 196, 177, 153, 140, 133, 130, 129
};
-const vp9_prob vp9_cat1_prob_high12[] = { 159 };
-const vp9_prob vp9_cat2_prob_high12[] = { 165, 145 };
-const vp9_prob vp9_cat3_prob_high12[] = { 173, 148, 140 };
-const vp9_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 };
-const vp9_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
-const vp9_prob vp9_cat6_prob_high12[] = {
+const vpx_prob vp9_cat1_prob_high12[] = { 159 };
+const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 };
+const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 };
+const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 };
+const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp9_cat6_prob_high12[] = {
255, 255, 255, 255, 254, 254, 254, 252, 249,
243, 230, 196, 177, 153, 140, 133, 130, 129
};
@@ -147,7 +147,7 @@
// by averaging :
// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] +
// vp9_pareto8_full[l+1][node] ) >> 1;
-const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
+const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
{ 3, 86, 128, 6, 86, 23, 88, 29},
{ 6, 86, 128, 11, 87, 42, 91, 52},
{ 9, 86, 129, 17, 88, 61, 94, 76},
@@ -742,14 +742,14 @@
}
};
-static void extend_to_full_distribution(vp9_prob *probs, vp9_prob p) {
+static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1],
- MODEL_NODES * sizeof(vp9_prob));
+ MODEL_NODES * sizeof(vpx_prob));
}
-void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) {
+void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
if (full != model)
- memcpy(full, model, sizeof(vp9_prob) * UNCONSTRAINED_NODES);
+ memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
}
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index 2fc97c3..a1746bc 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -12,10 +12,10 @@
#define VP9_COMMON_VP9_ENTROPY_H_
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/prob.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
-#include "vp9/common/vp9_prob.h"
#ifdef __cplusplus
extern "C" {
@@ -76,8 +76,8 @@
#define EOB_MODEL_TOKEN 3
typedef struct {
- const vp9_tree_index *tree;
- const vp9_prob *prob;
+ const vpx_tree_index *tree;
+ const vpx_prob *prob;
int len;
int base_val;
const int16_t *cost;
@@ -160,17 +160,17 @@
#define PIVOT_NODE 2 // which node is pivot
#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
-extern const vp9_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
-extern const vp9_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
+extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
+extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
-typedef vp9_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
+typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS]
[COEFF_CONTEXTS][UNCONSTRAINED_NODES];
typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
[COEFF_CONTEXTS]
[UNCONSTRAINED_NODES + 1];
-void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
+void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full);
typedef char ENTROPY_CONTEXT;
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 22d431b..670348b 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -13,7 +13,7 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_seg_common.h"
-const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = {
+const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = {
{ // above = dc
{ 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc
{ 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v
@@ -127,7 +127,7 @@
}
};
-const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
+const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = {
{ 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc
{ 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v
{ 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h
@@ -140,14 +140,14 @@
{ 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm
};
-static const vp9_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
+static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
{ 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8
{ 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16
{ 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32
{ 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32
};
-static const vp9_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
{ 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc
{ 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v
{ 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h
@@ -160,7 +160,7 @@
{ 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm
};
-const vp9_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
+const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// 8x8 -> 4x4
{ 158, 97, 94 }, // a/l both not split
@@ -184,7 +184,7 @@
{ 12, 3, 3 }, // a/l both split
};
-static const vp9_prob default_partition_probs[PARTITION_CONTEXTS]
+static const vpx_prob default_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1] = {
// 8x8 -> 4x4
{ 199, 122, 141 }, // a/l both not split
@@ -208,7 +208,7 @@
{ 10, 7, 6 }, // a/l both split
};
-static const vp9_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
+static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS]
[INTER_MODES - 1] = {
{2, 173, 34}, // 0 = both zero mv
{7, 145, 85}, // 1 = one zero mv + one a predicted mv
@@ -220,7 +220,7 @@
};
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
-const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
+const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
-DC_PRED, 2, /* 0 = DC_NODE */
-TM_PRED, 4, /* 1 = TM_NODE */
-V_PRED, 6, /* 2 = V_NODE */
@@ -232,31 +232,31 @@
-D153_PRED, -D207_PRED /* 8 = D153_NODE */
};
-const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
+const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
-INTER_OFFSET(ZEROMV), 2,
-INTER_OFFSET(NEARESTMV), 4,
-INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV)
};
-const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
+const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
-PARTITION_NONE, 2,
-PARTITION_HORZ, 4,
-PARTITION_VERT, -PARTITION_SPLIT
};
-static const vp9_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
+static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
9, 102, 187, 225
};
-static const vp9_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
+static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
239, 183, 119, 96, 41
};
-static const vp9_prob default_comp_ref_p[REF_CONTEXTS] = {
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = {
50, 126, 123, 221, 226
};
-static const vp9_prob default_single_ref_p[REF_CONTEXTS][2] = {
+static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = {
{ 33, 16 },
{ 77, 74 },
{ 142, 142 },
@@ -302,11 +302,11 @@
ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
}
-static const vp9_prob default_skip_probs[SKIP_CONTEXTS] = {
+static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = {
192, 128, 64
};
-static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1] = {
{ 235, 162, },
{ 36, 255, },
@@ -328,7 +328,7 @@
vp9_copy(fc->inter_mode_probs, default_inter_mode_probs);
}
-const vp9_tree_index vp9_switchable_interp_tree
+const vpx_tree_index vp9_switchable_interp_tree
[TREE_SIZE(SWITCHABLE_FILTERS)] = {
-EIGHTTAP, 2,
-EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP
@@ -355,24 +355,24 @@
pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
- vp9_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
+ vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
counts->inter_mode[i], fc->inter_mode_probs[i]);
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
- vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
+ vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
counts->y_mode[i], fc->y_mode_prob[i]);
for (i = 0; i < INTRA_MODES; ++i)
- vp9_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
+ vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
counts->uv_mode[i], fc->uv_mode_prob[i]);
for (i = 0; i < PARTITION_CONTEXTS; i++)
- vp9_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i],
+ vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i],
counts->partition[i], fc->partition_prob[i]);
if (cm->interp_filter == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- vp9_tree_merge_probs(vp9_switchable_interp_tree,
+ vpx_tree_merge_probs(vp9_switchable_interp_tree,
pre_fc->switchable_interp_prob[i],
counts->switchable_interp[i],
fc->switchable_interp_prob[i]);
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 8c9e6a7..371738a 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -28,9 +28,9 @@
struct VP9Common;
struct tx_probs {
- vp9_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
- vp9_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
- vp9_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
+ vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
+ vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
+ vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
};
struct tx_counts {
@@ -41,19 +41,19 @@
};
typedef struct frame_contexts {
- vp9_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
- vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
- vp9_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+ vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+ vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+ vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
- vp9_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+ vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1];
- vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
- vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
- vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
- vp9_prob single_ref_prob[REF_CONTEXTS][2];
- vp9_prob comp_ref_prob[REF_CONTEXTS];
+ vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+ vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
+ vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
+ vpx_prob single_ref_prob[REF_CONTEXTS][2];
+ vpx_prob comp_ref_prob[REF_CONTEXTS];
struct tx_probs tx_probs;
- vp9_prob skip_probs[SKIP_CONTEXTS];
+ vpx_prob skip_probs[SKIP_CONTEXTS];
nmv_context nmvc;
int initialized;
} FRAME_CONTEXT;
@@ -77,15 +77,15 @@
nmv_context_counts mv;
} FRAME_COUNTS;
-extern const vp9_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
-extern const vp9_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
+extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES]
[INTRA_MODES - 1];
-extern const vp9_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
+extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS]
[PARTITION_TYPES - 1];
-extern const vp9_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
-extern const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)];
-extern const vp9_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)];
-extern const vp9_tree_index vp9_switchable_interp_tree
+extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
+extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)];
+extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)];
+extern const vpx_tree_index vp9_switchable_interp_tree
[TREE_SIZE(SWITCHABLE_FILTERS)];
void vp9_setup_past_independence(struct VP9Common *cm);
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 76cdb05..3acfe14 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -14,13 +14,13 @@
// Integer pel reference mv threshold for use of high-precision 1/8 mv
#define COMPANDED_MVREF_THRESH 8
-const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
+const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
-MV_JOINT_ZERO, 2,
-MV_JOINT_HNZVZ, 4,
-MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
};
-const vp9_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
+const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
-MV_CLASS_0, 2,
-MV_CLASS_1, 4,
6, 8,
@@ -33,11 +33,11 @@
-MV_CLASS_9, -MV_CLASS_10,
};
-const vp9_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
+const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
-0, -1,
};
-const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = {
+const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = {
-0, 2,
-1, 4,
-2, -3
@@ -183,7 +183,7 @@
const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
const nmv_context_counts *counts = &cm->counts.mv;
- vp9_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
+ vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
fc->joints);
for (i = 0; i < 2; ++i) {
@@ -192,19 +192,19 @@
const nmv_component_counts *c = &counts->comps[i];
comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign);
- vp9_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
+ vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
comp->classes);
- vp9_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0,
+ vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0,
comp->class0);
for (j = 0; j < MV_OFFSET_BITS; ++j)
comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
for (j = 0; j < CLASS0_SIZE; ++j)
- vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j],
+ vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j],
c->class0_fp[j], comp->class0_fp[j]);
- vp9_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+ vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
if (allow_hp) {
comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index 637f451..8c817bf 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -14,8 +14,9 @@
#include "./vpx_config.h"
+#include "vpx_dsp/prob.h"
+
#include "vp9/common/vp9_mv.h"
-#include "vp9/common/vp9_prob.h"
#ifdef __cplusplus
extern "C" {
@@ -76,24 +77,24 @@
#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
#define MV_LOW (-(1 << MV_IN_USE_BITS))
-extern const vp9_tree_index vp9_mv_joint_tree[];
-extern const vp9_tree_index vp9_mv_class_tree[];
-extern const vp9_tree_index vp9_mv_class0_tree[];
-extern const vp9_tree_index vp9_mv_fp_tree[];
+extern const vpx_tree_index vp9_mv_joint_tree[];
+extern const vpx_tree_index vp9_mv_class_tree[];
+extern const vpx_tree_index vp9_mv_class0_tree[];
+extern const vpx_tree_index vp9_mv_fp_tree[];
typedef struct {
- vp9_prob sign;
- vp9_prob classes[MV_CLASSES - 1];
- vp9_prob class0[CLASS0_SIZE - 1];
- vp9_prob bits[MV_OFFSET_BITS];
- vp9_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
- vp9_prob fp[MV_FP_SIZE - 1];
- vp9_prob class0_hp;
- vp9_prob hp;
+ vpx_prob sign;
+ vpx_prob classes[MV_CLASSES - 1];
+ vpx_prob class0[CLASS0_SIZE - 1];
+ vpx_prob bits[MV_OFFSET_BITS];
+ vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
+ vpx_prob fp[MV_FP_SIZE - 1];
+ vpx_prob class0_hp;
+ vpx_prob hp;
} nmv_component;
typedef struct {
- vp9_prob joints[MV_JOINTS - 1];
+ vpx_prob joints[MV_JOINTS - 1];
nmv_component comps[2];
} nmv_context;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 9816728..0915918 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -9,6 +9,7 @@
*/
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
@@ -326,55 +327,55 @@
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
- vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else {
- vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
- vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
- vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
- vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1);
} else {
- vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
- vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1);
} else {
- vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1);
}
}
@@ -426,55 +427,55 @@
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else if (mask_16x16_0 & 1) {
- vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else {
- vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_8x8_0 & 1) {
- vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_0 & 1) {
- vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_int_0 & 1) {
- vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1, bd);
} else {
- vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, 1, bd);
}
}
@@ -512,11 +513,11 @@
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
count = 2;
} else {
- vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_8x8 & 1) {
@@ -524,28 +525,28 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4 & 1) {
@@ -553,31 +554,31 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, lfin->mblim,
lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
} else if (mask_4x4_int & 1) {
- vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
}
@@ -609,11 +610,11 @@
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2, bd);
count = 2;
} else {
- vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
} else if (mask_8x8 & 1) {
@@ -621,31 +622,31 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
- vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
- vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
- vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
@@ -654,35 +655,35 @@
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
if ((mask_4x4_int & 3) == 3) {
- vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr,
lfin->mblim, lfin->lim,
lfin->hev_thr, bd);
} else {
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
} else if (mask_4x4_int & 2) {
- vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
lfin->lim, lfin->hev_thr, 1, bd);
}
}
count = 2;
} else {
- vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1, bd);
}
}
} else if (mask_4x4_int & 1) {
- vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
@@ -1093,15 +1094,15 @@
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
- vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
} else if (mask_4x4 & 1) {
- vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
}
if (mask_4x4_int & 1)
- vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1127,18 +1128,18 @@
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
} else if (mask_8x8 & 1) {
- vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
} else if (mask_4x4 & 1) {
- vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
}
}
if (mask_4x4_int & 1)
- vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd);
s += 8;
lfl += 1;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index ca0dfc8..c373c02 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -345,7 +345,7 @@
xd->partition_probs =
frame_is_intra_only(cm) ?
&vp9_kf_partition_probs[0] :
- (const vp9_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob;
+ (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob;
}
static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd,
@@ -373,7 +373,7 @@
set_partition_probs(cm, xd);
}
-static INLINE const vp9_prob* get_partition_probs(const MACROBLOCKD *xd,
+static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd,
int ctx) {
return xd->partition_probs[ctx];
}
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 7616144..67b95db 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -47,7 +47,7 @@
return above_sip + left_sip;
}
-static INLINE vp9_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
+static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg,
const MACROBLOCKD *xd) {
return seg->pred_probs[vp9_get_pred_context_seg_id(xd)];
}
@@ -60,7 +60,7 @@
return above_skip + left_skip;
}
-static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->skip_probs[vp9_get_skip_context(xd)];
}
@@ -69,14 +69,14 @@
int vp9_get_intra_inter_context(const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)];
}
int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
}
@@ -84,7 +84,7 @@
int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd);
return cm->fc->comp_ref_prob[pred_context];
@@ -92,14 +92,14 @@
int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0];
}
int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
+static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1];
}
@@ -127,7 +127,7 @@
return (above_ctx + left_ctx) > max_tx_size;
}
-static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
+static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
const struct tx_probs *tx_probs) {
switch (max_tx_size) {
case TX_8X8:
@@ -142,7 +142,7 @@
}
}
-static INLINE const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size,
+static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size,
const MACROBLOCKD *xd,
const struct tx_probs *tx_probs) {
return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 1e9acb8..f969ff1 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -1059,20 +1059,19 @@
}
}
-void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
+void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,
const uint8_t *ref, int ref_stride,
uint8_t *dst, int dst_stride,
int aoff, int loff, int plane) {
- const int bwl = bwl_in - tx_size;
- const int wmask = (1 << bwl) - 1;
- const int have_top = (block_idx >> bwl) || xd->up_available;
- const int have_left = (block_idx & wmask) || xd->left_available;
- const int have_right = ((block_idx & wmask) != wmask);
+ const int bw = (1 << bwl_in);
+ const int txw = (1 << tx_size);
+ const int have_top = loff || xd->up_available;
+ const int have_left = aoff || xd->left_available;
+ const int have_right = (aoff + txw) < bw;
const int x = aoff * 4;
const int y = loff * 4;
- assert(bwl >= 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode,
diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h
index da5e435..de45380 100644
--- a/vp9/common/vp9_reconintra.h
+++ b/vp9/common/vp9_reconintra.h
@@ -20,7 +20,7 @@
void vp9_init_intra_predictors(void);
-void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
+void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,
const uint8_t *ref, int ref_stride,
uint8_t *dst, int dst_stride,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 538f1ed..f80d31e 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -220,49 +220,6 @@
specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
#
-# Loopfilter
-#
-add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
-$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
-
-add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
-$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
-
-add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/;
-$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
-
-add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
-$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
-
-add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/;
-
-add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/;
-
-add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
-$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
-
-add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/;
-$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
-
-add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
-$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
-
-add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/;
-
-add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
-
-#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
@@ -668,42 +625,6 @@
specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64";
#
- # Loopfilter
- #
- add_proto qw/void vp9_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vp9_highbd_lpf_vertical_16 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vp9_highbd_lpf_vertical_16_dual sse2/;
-
- add_proto qw/void vp9_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_vertical_8 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_vertical_8_dual sse2/;
-
- add_proto qw/void vp9_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_vertical_4 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_vertical_4_dual sse2/;
-
- add_proto qw/void vp9_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_16 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_8 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_8_dual sse2/;
-
- add_proto qw/void vp9_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_4 sse2/;
-
- add_proto qw/void vp9_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vp9_highbd_lpf_horizontal_4_dual sse2/;
-
- #
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
@@ -824,7 +745,7 @@
specialize qw/vp9_int_pro_row sse2 neon/;
add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
-specialize qw/vp9_int_pro_col sse2/;
+specialize qw/vp9_int_pro_col sse2 neon/;
add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
specialize qw/vp9_vector_var sse2/;
@@ -860,12 +781,6 @@
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32/;
- add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b/;
-
- add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/;
-
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant/;
} else {
@@ -881,12 +796,6 @@
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
- add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64_x86inc";
-
- add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
-
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
}
@@ -920,21 +829,12 @@
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct4x4_1 sse2/;
- add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4 sse2/;
-
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct8x8_1 sse2/;
- add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2/;
-
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_1 sse2/;
- add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16 sse2/;
-
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_1 sse2/;
@@ -959,21 +859,12 @@
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct4x4_1 sse2/;
- add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4 sse2 msa/;
-
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
- add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
-
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_1 sse2 msa/;
- add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16 sse2 msa/;
-
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_1 sse2 msa/;
@@ -1014,12 +905,6 @@
add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_fp_32x32/;
- add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_highbd_quantize_b sse2/;
-
- add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vp9_highbd_quantize_b_32x32 sse2/;
-
#
# Structured Similarity (SSIM)
#
@@ -1030,32 +915,23 @@
# fdct functions
add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_highbd_fht4x4 sse2/;
+ specialize qw/vp9_highbd_fht4x4/;
add_proto qw/void vp9_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_highbd_fht8x8 sse2/;
+ specialize qw/vp9_highbd_fht8x8/;
add_proto qw/void vp9_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_highbd_fht16x16 sse2/;
+ specialize qw/vp9_highbd_fht16x16/;
add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fwht4x4/;
- add_proto qw/void vp9_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct4x4 sse2/;
-
add_proto qw/void vp9_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct8x8_1/;
- add_proto qw/void vp9_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct8x8 sse2/;
-
add_proto qw/void vp9_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct16x16_1/;
- add_proto qw/void vp9_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_highbd_fdct16x16 sse2/;
-
add_proto qw/void vp9_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct32x32_1/;
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index 471e238..c8ef618 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -54,7 +54,7 @@
seg->feature_data[segment_id][feature_id] = seg_data;
}
-const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
+const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
2, 4, 6, 8, 10, 12,
0, -1, -2, -3, -4, -5, -6, -7
};
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index 95c9918..5b75d8d 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -11,7 +11,7 @@
#ifndef VP9_COMMON_VP9_SEG_COMMON_H_
#define VP9_COMMON_VP9_SEG_COMMON_H_
-#include "vp9/common/vp9_prob.h"
+#include "vpx_dsp/prob.h"
#ifdef __cplusplus
extern "C" {
@@ -42,8 +42,8 @@
uint8_t abs_delta;
uint8_t temporal_update;
- vp9_prob tree_probs[SEG_TREE_PROBS];
- vp9_prob pred_probs[PREDICTION_PROBS];
+ vpx_prob tree_probs[SEG_TREE_PROBS];
+ vpx_prob pred_probs[PREDICTION_PROBS];
int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
unsigned int feature_mask[MAX_SEGMENTS];
@@ -76,7 +76,7 @@
return seg->feature_data[segment_id][feature_id];
}
-extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
+extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/x86/vp9_idct_sse2.asm b/vp9/common/x86/vp9_idct_sse2.asm
index 9619e37..69b68e6 100644
--- a/vp9/common/x86/vp9_idct_sse2.asm
+++ b/vp9/common/x86/vp9_idct_sse2.asm
@@ -11,38 +11,50 @@
SECTION .text
+%macro REORDER_INPUTS 0
+ ; a c d b to a b c d
+ SWAP 1, 3, 2
+%endmacro
+
%macro TRANSFORM_COLS 0
+ ; input:
+ ; m0 a
+ ; m1 b
+ ; m2 c
+ ; m3 d
paddw m0, m2
psubw m3, m1
- psubw m4, m0, m3
- psraw m4, 1
- psubw m5, m4, m1 ;b1
- psubw m4, m2 ;c1
+
+ ; wide subtract
+ punpcklwd m4, m0
+ punpcklwd m5, m3
+ psrad m4, 16
+ psrad m5, 16
+ psubd m4, m5
+ psrad m4, 1
+ packssdw m4, m4 ; e
+
+ psubw m5, m4, m1 ; b
+ psubw m4, m2 ; c
psubw m0, m5
paddw m3, m4
- ; m0 a0
- SWAP 1, 4 ; m1 c1
- SWAP 2, 3 ; m2 d1
- SWAP 3, 5 ; m3 b1
+ ; m0 a
+ SWAP 1, 5 ; m1 b
+ SWAP 2, 4 ; m2 c
+ ; m3 d
%endmacro
%macro TRANSPOSE_4X4 0
- SWAP 4, 0
- SWAP 5, 2
- punpcklwd m4, m1
- pshufd m0, m4, 0x0e
- punpcklwd m5, m3
- pshufd m2, m5, 0x0e
- SWAP 1, 4
- SWAP 3, 0
- punpckldq m1, m5
- pshufd m4, m1, 0x0e
- punpckldq m3, m2
- pshufd m0, m3, 0x0e
- SWAP 2, 3, 0, 1, 4
+ punpcklwd m0, m2
+ punpcklwd m1, m3
+ mova m2, m0
+ punpcklwd m0, m1
+ punpckhwd m2, m1
+ pshufd m1, m0, 0x0e
+ pshufd m3, m2, 0x0e
%endmacro
-; transposes a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
+; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3
%macro TRANSPOSE_4X4_WIDE 0
mova m3, m0
punpcklwd m0, m1
@@ -68,16 +80,18 @@
%endmacro
INIT_XMM sse2
-cglobal iwht4x4_16_add, 3, 4, 7, input, output, stride
- mova m0, [inputq + 0] ;a1
- mova m1, [inputq + 16] ;c1
+cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride
+ mova m0, [inputq + 0]
+ mova m1, [inputq + 16]
psraw m0, 2
psraw m1, 2
TRANSPOSE_4X4_WIDE
+ REORDER_INPUTS
TRANSFORM_COLS
TRANSPOSE_4X4
+ REORDER_INPUTS
TRANSFORM_COLS
pxor m4, m4
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index b3cf3fd..c6d3bf1 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -14,6 +14,8 @@
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
+#include "vpx_dsp/bitreader_buffer.h"
+#include "vpx_dsp/bitreader.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/mem_ops.h"
@@ -38,8 +40,6 @@
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_dsubexp.h"
-#include "vp9/decoder/vp9_read_bit_buffer.h"
-#include "vp9/decoder/vp9_reader.h"
#define MAX_VP9_HEADER_SIZE 80
@@ -74,19 +74,19 @@
return len != 0 && len <= (size_t)(end - start);
}
-static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) {
- const int data = vp9_rb_read_literal(rb, get_unsigned_bits(max));
+static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) {
+ const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max));
return data > max ? max : data;
}
-static TX_MODE read_tx_mode(vp9_reader *r) {
- TX_MODE tx_mode = vp9_read_literal(r, 2);
+static TX_MODE read_tx_mode(vpx_reader *r) {
+ TX_MODE tx_mode = vpx_read_literal(r, 2);
if (tx_mode == ALLOW_32X32)
- tx_mode += vp9_read_bit(r);
+ tx_mode += vpx_read_bit(r);
return tx_mode;
}
-static void read_tx_mode_probs(struct tx_probs *tx_probs, vp9_reader *r) {
+static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) {
int i, j;
for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
@@ -102,14 +102,14 @@
vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]);
}
-static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
+static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
int i, j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
}
-static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) {
+static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
int i, j;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
for (j = 0; j < INTER_MODES - 1; ++j)
@@ -117,9 +117,9 @@
}
static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm,
- vp9_reader *r) {
+ vpx_reader *r) {
if (is_compound_reference_allowed(cm)) {
- return vp9_read_bit(r) ? (vp9_read_bit(r) ? REFERENCE_MODE_SELECT
+ return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT
: COMPOUND_REFERENCE)
: SINGLE_REFERENCE;
} else {
@@ -127,7 +127,7 @@
}
}
-static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
+static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) {
FRAME_CONTEXT *const fc = cm->fc;
int i;
@@ -146,14 +146,14 @@
vp9_diff_update_prob(r, &fc->comp_ref_prob[i]);
}
-static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
+static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) {
int i;
for (i = 0; i < n; ++i)
- if (vp9_read(r, MV_UPDATE_PROB))
- p[i] = (vp9_read_literal(r, 7) << 1) | 1;
+ if (vpx_read(r, MV_UPDATE_PROB))
+ p[i] = (vpx_read_literal(r, 7) << 1) | 1;
}
-static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
+static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) {
int i, j;
update_mv_probs(ctx->joints, MV_JOINTS - 1, r);
@@ -361,54 +361,38 @@
}
}
-static INLINE void dec_txfrm_block_to_raster_xy(int bwl,
- TX_SIZE tx_size, int block,
- int *x, int *y) {
- const int tx_cols_log2 = bwl - tx_size;
- const int tx_cols = 1 << tx_cols_log2;
- const int raster_mb = block >> (tx_size << 1);
- *x = (raster_mb & (tx_cols - 1)) << tx_size;
- *y = (raster_mb >> tx_cols_log2) << tx_size;
-}
-
-struct intra_args {
- MACROBLOCKD *xd;
- vp9_reader *r;
- int seg_id;
-};
-
-static void predict_and_reconstruct_intra_block(int plane, int block,
- TX_SIZE tx_size, void *arg) {
- struct intra_args *const args = (struct intra_args *)arg;
- MACROBLOCKD *const xd = args->xd;
+static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
+ vpx_reader *r,
+ MB_MODE_INFO *const mbmi,
+ int plane,
+ int row, int col,
+ TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- MODE_INFO *const mi = xd->mi[0];
- const PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block)
- : mi->mbmi.uv_mode;
- int x, y;
+ PREDICTION_MODE mode = (plane == 0) ? mbmi->mode : mbmi->uv_mode;
uint8_t *dst;
- const int bwl = pd->n4_wl;
- dec_txfrm_block_to_raster_xy(bwl, tx_size, block, &x, &y);
- dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x];
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
- vp9_predict_intra_block(xd, block >> (tx_size << 1),
- bwl, tx_size, mode,
+ if (mbmi->sb_type < BLOCK_8X8)
+ if (plane == 0)
+ mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
+
+ vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode,
dst, pd->dst.stride, dst, pd->dst.stride,
- x, y, plane);
+ col, row, plane);
- if (!mi->mbmi.skip) {
+ if (!mbmi->skip) {
const TX_TYPE tx_type = (plane || xd->lossless) ?
DCT_DCT : intra_mode_to_tx_type_lookup[mode];
const scan_order *sc = (plane || xd->lossless) ?
&vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type];
- const int eob = vp9_decode_block_tokens(xd, plane, sc, x, y, tx_size,
- args->r, args->seg_id);
+ const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size,
+ r, mbmi->segment_id);
inverse_transform_block_intra(xd, plane, tx_type, tx_size,
dst, pd->dst.stride, eob);
}
}
-static int reconstruct_inter_block(MACROBLOCKD *const xd, vp9_reader *r,
+static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
MB_MODE_INFO *const mbmi, int plane,
int row, int col, TX_SIZE tx_size) {
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -776,58 +760,6 @@
return MIN(mbmi->tx_size, x);
}
-// TODO(slavarnway): Eliminate the foreach_ functions in future commits.
-// NOTE: Jingning removed the foreach_ for recon inter in a previous commit.
-
-typedef void (*dec_foreach_transformed_block_visitor)(int plane, int block,
- TX_SIZE tx_size,
- void *arg);
-
-static void dec_foreach_transformed_block_in_plane(
- const MACROBLOCKD *const xd,
- int plane,
- dec_foreach_transformed_block_visitor visit, void *arg) {
- const struct macroblockd_plane *const pd = &xd->plane[plane];
- const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
- // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
- // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
- // transform size varies per plane, look it up in a common way.
- const TX_SIZE tx_size =
- plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
- : mbmi->tx_size;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
- const int step = 1 << (tx_size << 1);
- int i = 0, r, c;
-
- // If mb_to_right_edge is < 0 we are in a situation in which
- // the current block size extends into the UMV and we won't
- // visit the sub blocks that are wholly within the UMV.
- const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 :
- xd->mb_to_right_edge >> (5 + pd->subsampling_x));
- const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 :
- xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
-
- // Keep track of the row and column of the blocks we use so that we know
- // if we are in the unrestricted motion border.
- for (r = 0; r < max_blocks_high; r += (1 << tx_size)) {
- for (c = 0; c < num_4x4_w; c += (1 << tx_size)) {
- // Skip visiting the sub blocks that are wholly within the UMV.
- if (c < max_blocks_wide)
- visit(plane, i, tx_size, arg);
- i += step;
- }
- }
-}
-
-static void dec_foreach_transformed_block(const MACROBLOCKD* const xd,
- dec_foreach_transformed_block_visitor visit, void *arg) {
- int plane;
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane)
- dec_foreach_transformed_block_in_plane(xd, plane, visit, arg);
-}
-
static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -880,7 +812,7 @@
static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
int mi_row, int mi_col,
- vp9_reader *r, BLOCK_SIZE bsize,
+ vpx_reader *r, BLOCK_SIZE bsize,
int bwl, int bhl) {
VP9_COMMON *const cm = &pbi->common;
const int less8x8 = bsize < BLOCK_8X8;
@@ -900,16 +832,33 @@
VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
}
- vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
+ vpx_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
if (mbmi->skip) {
dec_reset_skip_context(xd);
}
if (!is_inter_block(mbmi)) {
- struct intra_args arg = {xd, r, mbmi->segment_id};
- dec_foreach_transformed_block(xd,
- predict_and_reconstruct_intra_block, &arg);
+ int plane;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size =
+ plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int step = (1 << tx_size);
+ int row, col;
+ const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
+ 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
+ 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += step)
+ for (col = 0; col < max_blocks_wide; col += step)
+ predict_and_reconstruct_intra_block(xd, r, mbmi, plane,
+ row, col, tx_size);
+ }
} else {
// Prediction
dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
@@ -944,7 +893,7 @@
}
}
- xd->corrupted |= vp9_reader_has_error(r);
+ xd->corrupted |= vpx_reader_has_error(r);
}
static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd,
@@ -974,19 +923,19 @@
}
static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
- vp9_reader *r,
+ vpx_reader *r,
int has_rows, int has_cols, int bsl) {
const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl);
- const vp9_prob *const probs = get_partition_probs(xd, ctx);
+ const vpx_prob *const probs = get_partition_probs(xd, ctx);
FRAME_COUNTS *counts = xd->counts;
PARTITION_TYPE p;
if (has_rows && has_cols)
- p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs);
+ p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs);
else if (!has_rows && has_cols)
- p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
+ p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
else if (has_rows && !has_cols)
- p = vp9_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT;
+ p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT;
else
p = PARTITION_SPLIT;
@@ -999,7 +948,7 @@
// TODO(slavarnway): eliminate bsize and subsize in future commits
static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
int mi_row, int mi_col,
- vp9_reader* r, BLOCK_SIZE bsize, int n4x4_l2) {
+ vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) {
VP9_COMMON *const cm = &pbi->common;
const int n8x8_l2 = n4x4_l2 - 1;
const int num_8x8_wh = 1 << n8x8_l2;
@@ -1059,7 +1008,7 @@
const uint8_t *data_end,
size_t read_size,
struct vpx_internal_error_info *error_info,
- vp9_reader *r,
+ vpx_reader *r,
vpx_decrypt_cb decrypt_cb,
void *decrypt_state) {
// Validate the calculated partition length. If the buffer
@@ -1069,16 +1018,16 @@
vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt tile length");
- if (vp9_reader_init(r, data, read_size, decrypt_cb, decrypt_state))
+ if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state))
vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder %d", 1);
}
static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs,
- vp9_reader *r) {
+ vpx_reader *r) {
int i, j, k, l, m;
- if (vp9_read_bit(r))
+ if (vpx_read_bit(r))
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
@@ -1088,7 +1037,7 @@
}
static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode,
- vp9_reader *r) {
+ vpx_reader *r) {
const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
TX_SIZE tx_size;
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
@@ -1096,27 +1045,27 @@
}
static void setup_segmentation(struct segmentation *seg,
- struct vp9_read_bit_buffer *rb) {
+ struct vpx_read_bit_buffer *rb) {
int i, j;
seg->update_map = 0;
seg->update_data = 0;
- seg->enabled = vp9_rb_read_bit(rb);
+ seg->enabled = vpx_rb_read_bit(rb);
if (!seg->enabled)
return;
// Segmentation map update
- seg->update_map = vp9_rb_read_bit(rb);
+ seg->update_map = vpx_rb_read_bit(rb);
if (seg->update_map) {
for (i = 0; i < SEG_TREE_PROBS; i++)
- seg->tree_probs[i] = vp9_rb_read_bit(rb) ? vp9_rb_read_literal(rb, 8)
+ seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
: MAX_PROB;
- seg->temporal_update = vp9_rb_read_bit(rb);
+ seg->temporal_update = vpx_rb_read_bit(rb);
if (seg->temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++)
- seg->pred_probs[i] = vp9_rb_read_bit(rb) ? vp9_rb_read_literal(rb, 8)
+ seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8)
: MAX_PROB;
} else {
for (i = 0; i < PREDICTION_PROBS; i++)
@@ -1125,21 +1074,21 @@
}
// Segmentation data update
- seg->update_data = vp9_rb_read_bit(rb);
+ seg->update_data = vpx_rb_read_bit(rb);
if (seg->update_data) {
- seg->abs_delta = vp9_rb_read_bit(rb);
+ seg->abs_delta = vpx_rb_read_bit(rb);
vp9_clearall_segfeatures(seg);
for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
int data = 0;
- const int feature_enabled = vp9_rb_read_bit(rb);
+ const int feature_enabled = vpx_rb_read_bit(rb);
if (feature_enabled) {
vp9_enable_segfeature(seg, i, j);
data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j));
if (vp9_is_segfeature_signed(j))
- data = vp9_rb_read_bit(rb) ? -data : data;
+ data = vpx_rb_read_bit(rb) ? -data : data;
}
vp9_set_segdata(seg, i, j, data);
}
@@ -1148,38 +1097,38 @@
}
static void setup_loopfilter(struct loopfilter *lf,
- struct vp9_read_bit_buffer *rb) {
- lf->filter_level = vp9_rb_read_literal(rb, 6);
- lf->sharpness_level = vp9_rb_read_literal(rb, 3);
+ struct vpx_read_bit_buffer *rb) {
+ lf->filter_level = vpx_rb_read_literal(rb, 6);
+ lf->sharpness_level = vpx_rb_read_literal(rb, 3);
// Read in loop filter deltas applied at the MB level based on mode or ref
// frame.
lf->mode_ref_delta_update = 0;
- lf->mode_ref_delta_enabled = vp9_rb_read_bit(rb);
+ lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb);
if (lf->mode_ref_delta_enabled) {
- lf->mode_ref_delta_update = vp9_rb_read_bit(rb);
+ lf->mode_ref_delta_update = vpx_rb_read_bit(rb);
if (lf->mode_ref_delta_update) {
int i;
for (i = 0; i < MAX_REF_LF_DELTAS; i++)
- if (vp9_rb_read_bit(rb))
- lf->ref_deltas[i] = vp9_rb_read_signed_literal(rb, 6);
+ if (vpx_rb_read_bit(rb))
+ lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6);
for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
- if (vp9_rb_read_bit(rb))
- lf->mode_deltas[i] = vp9_rb_read_signed_literal(rb, 6);
+ if (vpx_rb_read_bit(rb))
+ lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6);
}
}
}
-static INLINE int read_delta_q(struct vp9_read_bit_buffer *rb) {
- return vp9_rb_read_bit(rb) ? vp9_rb_read_signed_literal(rb, 4) : 0;
+static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
+ return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0;
}
static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- struct vp9_read_bit_buffer *rb) {
- cm->base_qindex = vp9_rb_read_literal(rb, QINDEX_BITS);
+ struct vpx_read_bit_buffer *rb) {
+ cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS);
cm->y_dc_delta_q = read_delta_q(rb);
cm->uv_dc_delta_q = read_delta_q(rb);
cm->uv_ac_delta_q = read_delta_q(rb);
@@ -1221,19 +1170,19 @@
}
}
-static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
+static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH,
EIGHTTAP,
EIGHTTAP_SHARP,
BILINEAR };
- return vp9_rb_read_bit(rb) ? SWITCHABLE
- : literal_to_filter[vp9_rb_read_literal(rb, 2)];
+ return vpx_rb_read_bit(rb) ? SWITCHABLE
+ : literal_to_filter[vpx_rb_read_literal(rb, 2)];
}
-static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+static void setup_display_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
cm->display_width = cm->width;
cm->display_height = cm->height;
- if (vp9_rb_read_bit(rb))
+ if (vpx_rb_read_bit(rb))
vp9_read_frame_size(rb, &cm->display_width, &cm->display_height);
}
@@ -1277,7 +1226,7 @@
}
}
-static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
int width, height;
BufferPool *const pool = cm->buffer_pool;
vp9_read_frame_size(rb, &width, &height);
@@ -1316,13 +1265,13 @@
}
static void setup_frame_size_with_refs(VP9_COMMON *cm,
- struct vp9_read_bit_buffer *rb) {
+ struct vpx_read_bit_buffer *rb) {
int width, height;
int found = 0, i;
int has_valid_ref_frame = 0;
BufferPool *const pool = cm->buffer_pool;
for (i = 0; i < REFS_PER_FRAME; ++i) {
- if (vp9_rb_read_bit(rb)) {
+ if (vpx_rb_read_bit(rb)) {
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
@@ -1388,14 +1337,14 @@
pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
}
-static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
int min_log2_tile_cols, max_log2_tile_cols, max_ones;
vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
// columns
max_ones = max_log2_tile_cols - min_log2_tile_cols;
cm->log2_tile_cols = min_log2_tile_cols;
- while (max_ones-- && vp9_rb_read_bit(rb))
+ while (max_ones-- && vpx_rb_read_bit(rb))
cm->log2_tile_cols++;
if (cm->log2_tile_cols > 6)
@@ -1403,9 +1352,9 @@
"Invalid number of tile columns");
// rows
- cm->log2_tile_rows = vp9_rb_read_bit(rb);
+ cm->log2_tile_rows = vpx_rb_read_bit(rb);
if (cm->log2_tile_rows)
- cm->log2_tile_rows += vp9_rb_read_bit(rb);
+ cm->log2_tile_rows += vpx_rb_read_bit(rb);
}
typedef struct TileBuffer {
@@ -1607,7 +1556,7 @@
if (pbi->frame_parallel_decode)
vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX);
- return vp9_reader_find_end(&tile_data->bit_reader);
+ return vpx_reader_find_end(&tile_data->bit_reader);
}
static int tile_worker_hook(TileWorkerData *const tile_data,
@@ -1786,7 +1735,7 @@
if (final_worker > -1) {
TileWorkerData *const tile_data =
(TileWorkerData*)pbi->tile_workers[final_worker].data1;
- bit_reader_end = vp9_reader_find_end(&tile_data->bit_reader);
+ bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
final_worker = -1;
}
@@ -1809,9 +1758,9 @@
}
static void read_bitdepth_colorspace_sampling(
- VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
+ VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
if (cm->profile >= PROFILE_2) {
- cm->bit_depth = vp9_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
+ cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = 1;
#endif
@@ -1821,16 +1770,16 @@
cm->use_highbitdepth = 0;
#endif
}
- cm->color_space = vp9_rb_read_literal(rb, 3);
+ cm->color_space = vpx_rb_read_literal(rb, 3);
if (cm->color_space != VPX_CS_SRGB) {
- vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
+ vpx_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
- cm->subsampling_x = vp9_rb_read_bit(rb);
- cm->subsampling_y = vp9_rb_read_bit(rb);
+ cm->subsampling_x = vpx_rb_read_bit(rb);
+ cm->subsampling_y = vpx_rb_read_bit(rb);
if (cm->subsampling_x == 1 && cm->subsampling_y == 1)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"4:2:0 color not supported in profile 1 or 3");
- if (vp9_rb_read_bit(rb))
+ if (vpx_rb_read_bit(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Reserved bit set");
} else {
@@ -1841,7 +1790,7 @@
// Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
// 4:2:2 or 4:4:0 chroma sampling is not allowed.
cm->subsampling_y = cm->subsampling_x = 0;
- if (vp9_rb_read_bit(rb))
+ if (vpx_rb_read_bit(rb))
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Reserved bit set");
} else {
@@ -1852,7 +1801,7 @@
}
static size_t read_uncompressed_header(VP9Decoder *pbi,
- struct vp9_read_bit_buffer *rb) {
+ struct vpx_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
BufferPool *const pool = cm->buffer_pool;
RefCntBuffer *const frame_bufs = pool->frame_bufs;
@@ -1862,20 +1811,25 @@
cm->last_frame_type = cm->frame_type;
cm->last_intra_only = cm->intra_only;
- if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
+ if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame marker");
cm->profile = vp9_read_profile(rb);
-
+#if CONFIG_VP9_HIGHBITDEPTH
if (cm->profile >= MAX_PROFILES)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Unsupported bitstream profile");
+#else
+ if (cm->profile >= PROFILE_2)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Unsupported bitstream profile");
+#endif
- cm->show_existing_frame = vp9_rb_read_bit(rb);
+ cm->show_existing_frame = vpx_rb_read_bit(rb);
if (cm->show_existing_frame) {
// Show an existing frame directly.
- const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
+ const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)];
lock_buffer_pool(pool);
if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
unlock_buffer_pool(pool);
@@ -1897,9 +1851,9 @@
return 0;
}
- cm->frame_type = (FRAME_TYPE) vp9_rb_read_bit(rb);
- cm->show_frame = vp9_rb_read_bit(rb);
- cm->error_resilient_mode = vp9_rb_read_bit(rb);
+ cm->frame_type = (FRAME_TYPE) vpx_rb_read_bit(rb);
+ cm->show_frame = vpx_rb_read_bit(rb);
+ cm->error_resilient_mode = vpx_rb_read_bit(rb);
if (cm->frame_type == KEY_FRAME) {
if (!vp9_read_sync_code(rb))
@@ -1920,10 +1874,10 @@
pbi->need_resync = 0;
}
} else {
- cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb);
+ cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb);
cm->reset_frame_context = cm->error_resilient_mode ?
- 0 : vp9_rb_read_literal(rb, 2);
+ 0 : vpx_rb_read_literal(rb, 2);
if (cm->intra_only) {
if (!vp9_read_sync_code(rb))
@@ -1944,26 +1898,26 @@
#endif
}
- pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
+ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
setup_frame_size(cm, rb);
if (pbi->need_resync) {
memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
pbi->need_resync = 0;
}
} else if (pbi->need_resync != 1) { /* Skip if need resync */
- pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
+ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
for (i = 0; i < REFS_PER_FRAME; ++i) {
- const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
+ const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2);
const int idx = cm->ref_frame_map[ref];
RefBuffer *const ref_frame = &cm->frame_refs[i];
ref_frame->idx = idx;
ref_frame->buf = &frame_bufs[idx].buf;
- cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
+ cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb);
}
setup_frame_size_with_refs(cm, rb);
- cm->allow_high_precision_mv = vp9_rb_read_bit(rb);
+ cm->allow_high_precision_mv = vpx_rb_read_bit(rb);
cm->interp_filter = read_interp_filter(rb);
for (i = 0; i < REFS_PER_FRAME; ++i) {
@@ -1995,8 +1949,8 @@
}
if (!cm->error_resilient_mode) {
- cm->refresh_frame_context = vp9_rb_read_bit(rb);
- cm->frame_parallel_decoding_mode = vp9_rb_read_bit(rb);
+ cm->refresh_frame_context = vpx_rb_read_bit(rb);
+ cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb);
} else {
cm->refresh_frame_context = 0;
cm->frame_parallel_decoding_mode = 1;
@@ -2004,7 +1958,7 @@
// This flag will be overridden by the call to vp9_setup_past_independence
// below, forcing the use of context 0 for those frame types.
- cm->frame_context_idx = vp9_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
+ cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
// Generate next_ref_frame_map.
lock_buffer_pool(pool);
@@ -2039,7 +1993,7 @@
setup_segmentation_dequant(cm);
setup_tile_info(cm, rb);
- sz = vp9_rb_read_literal(rb, 16);
+ sz = vpx_rb_read_literal(rb, 16);
if (sz == 0)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@@ -2053,10 +2007,10 @@
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
FRAME_CONTEXT *const fc = cm->fc;
- vp9_reader r;
+ vpx_reader r;
int k;
- if (vp9_reader_init(&r, data, partition_size, pbi->decrypt_cb,
+ if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb,
pbi->decrypt_state))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
@@ -2097,7 +2051,7 @@
read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
}
- return vp9_reader_has_error(&r);
+ return vpx_reader_has_error(&r);
}
#ifdef NDEBUG
@@ -2137,9 +2091,9 @@
}
#endif // NDEBUG
-static struct vp9_read_bit_buffer *init_read_bit_buffer(
+static struct vpx_read_bit_buffer *init_read_bit_buffer(
VP9Decoder *pbi,
- struct vp9_read_bit_buffer *rb,
+ struct vpx_read_bit_buffer *rb,
const uint8_t *data,
const uint8_t *data_end,
uint8_t clear_data[MAX_VP9_HEADER_SIZE]) {
@@ -2160,23 +2114,23 @@
//------------------------------------------------------------------------------
-int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
- return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
- vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
- vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
+int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) {
+ return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
+ vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
+ vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
}
-void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+void vp9_read_frame_size(struct vpx_read_bit_buffer *rb,
int *width, int *height) {
- *width = vp9_rb_read_literal(rb, 16) + 1;
- *height = vp9_rb_read_literal(rb, 16) + 1;
+ *width = vpx_rb_read_literal(rb, 16) + 1;
+ *height = vpx_rb_read_literal(rb, 16) + 1;
}
-BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
- int profile = vp9_rb_read_bit(rb);
- profile |= vp9_rb_read_bit(rb) << 1;
+BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) {
+ int profile = vpx_rb_read_bit(rb);
+ profile |= vpx_rb_read_bit(rb) << 1;
if (profile > 2)
- profile += vp9_rb_read_bit(rb);
+ profile += vpx_rb_read_bit(rb);
return (BITSTREAM_PROFILE) profile;
}
@@ -2185,7 +2139,7 @@
const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- struct vp9_read_bit_buffer rb;
+ struct vpx_read_bit_buffer rb;
int context_updated = 0;
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
@@ -2201,7 +2155,7 @@
return;
}
- data += vp9_rb_bytes_read(&rb);
+ data += vpx_rb_bytes_read(&rb);
if (!read_is_valid(data, first_partition_size, data_end))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Truncated packet or corrupt header length");
diff --git a/vp9/decoder/vp9_decodeframe.h b/vp9/decoder/vp9_decodeframe.h
index a876e7c..05af706 100644
--- a/vp9/decoder/vp9_decodeframe.h
+++ b/vp9/decoder/vp9_decodeframe.h
@@ -17,12 +17,12 @@
#endif
struct VP9Decoder;
-struct vp9_read_bit_buffer;
+struct vpx_read_bit_buffer;
-int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb);
-void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb);
+void vp9_read_frame_size(struct vpx_read_bit_buffer *rb,
int *width, int *height);
-BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb);
+BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb);
void vp9_decode_frame(struct VP9Decoder *pbi,
const uint8_t *data, const uint8_t *data_end,
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index d42a654..341e6d7 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -21,14 +21,13 @@
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_decodeframe.h"
-#include "vp9/decoder/vp9_reader.h"
-static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
- return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p);
+static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) {
+ return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p);
}
static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd,
- vp9_reader *r, int size_group) {
+ vpx_reader *r, int size_group) {
const PREDICTION_MODE y_mode =
read_intra_mode(r, cm->fc->y_mode_prob[size_group]);
FRAME_COUNTS *counts = xd->counts;
@@ -38,7 +37,7 @@
}
static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd,
- vp9_reader *r,
+ vpx_reader *r,
PREDICTION_MODE y_mode) {
const PREDICTION_MODE uv_mode = read_intra_mode(r,
cm->fc->uv_mode_prob[y_mode]);
@@ -49,8 +48,8 @@
}
static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd,
- vp9_reader *r, int ctx) {
- const int mode = vp9_read_tree(r, vp9_inter_mode_tree,
+ vpx_reader *r, int ctx) {
+ const int mode = vpx_read_tree(r, vp9_inter_mode_tree,
cm->fc->inter_mode_probs[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
@@ -59,20 +58,20 @@
return NEARESTMV + mode;
}
-static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
- return vp9_read_tree(r, vp9_segment_tree, seg->tree_probs);
+static int read_segment_id(vpx_reader *r, const struct segmentation *seg) {
+ return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs);
}
static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
- TX_SIZE max_tx_size, vp9_reader *r) {
+ TX_SIZE max_tx_size, vpx_reader *r) {
FRAME_COUNTS *counts = xd->counts;
const int ctx = get_tx_size_context(xd);
- const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
- int tx_size = vp9_read(r, tx_probs[0]);
+ const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
+ int tx_size = vpx_read(r, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
- tx_size += vp9_read(r, tx_probs[1]);
+ tx_size += vpx_read(r, tx_probs[1]);
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
- tx_size += vp9_read(r, tx_probs[2]);
+ tx_size += vpx_read(r, tx_probs[2]);
}
if (counts)
@@ -81,7 +80,7 @@
}
static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
- int allow_select, vp9_reader *r) {
+ int allow_select, vpx_reader *r) {
TX_MODE tx_mode = cm->tx_mode;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
@@ -129,7 +128,7 @@
static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset,
int x_mis, int y_mis,
- vp9_reader *r) {
+ vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
int segment_id;
@@ -148,7 +147,7 @@
}
static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- int mi_row, int mi_col, vp9_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r) {
struct segmentation *const seg = &cm->seg;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int predicted_segment_id, segment_id;
@@ -174,8 +173,8 @@
}
if (seg->temporal_update) {
- const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
- mbmi->seg_id_predicted = vp9_read(r, pred_prob);
+ const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
+ mbmi->seg_id_predicted = vpx_read(r, pred_prob);
segment_id = mbmi->seg_id_predicted ? predicted_segment_id
: read_segment_id(r, seg);
} else {
@@ -186,12 +185,12 @@
}
static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, vp9_reader *r) {
+ int segment_id, vpx_reader *r) {
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
- const int skip = vp9_read(r, cm->fc->skip_probs[ctx]);
+ const int skip = vpx_read(r, cm->fc->skip_probs[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
++counts->skip[ctx][skip];
@@ -201,7 +200,7 @@
static void read_intra_frame_mode_info(VP9_COMMON *const cm,
MACROBLOCKD *const xd,
- int mi_row, int mi_col, vp9_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r) {
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
const MODE_INFO *above_mi = xd->above_mi;
@@ -249,16 +248,16 @@
mbmi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mbmi->mode]);
}
-static int read_mv_component(vp9_reader *r,
+static int read_mv_component(vpx_reader *r,
const nmv_component *mvcomp, int usehp) {
int mag, d, fr, hp;
- const int sign = vp9_read(r, mvcomp->sign);
- const int mv_class = vp9_read_tree(r, vp9_mv_class_tree, mvcomp->classes);
+ const int sign = vpx_read(r, mvcomp->sign);
+ const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes);
const int class0 = mv_class == MV_CLASS_0;
// Integer part
if (class0) {
- d = vp9_read_tree(r, vp9_mv_class0_tree, mvcomp->class0);
+ d = vpx_read_tree(r, vp9_mv_class0_tree, mvcomp->class0);
mag = 0;
} else {
int i;
@@ -266,16 +265,16 @@
d = 0;
for (i = 0; i < n; ++i)
- d |= vp9_read(r, mvcomp->bits[i]) << i;
+ d |= vpx_read(r, mvcomp->bits[i]) << i;
mag = CLASS0_SIZE << (mv_class + 2);
}
// Fractional part
- fr = vp9_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d]
+ fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d]
: mvcomp->fp);
// High precision part (if hp is not used, the default value of the hp is 1)
- hp = usehp ? vp9_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp)
+ hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp)
: 1;
// Result
@@ -283,11 +282,11 @@
return sign ? -mag : mag;
}
-static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
+static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref,
const nmv_context *ctx,
nmv_context_counts *counts, int allow_hp) {
const MV_JOINT_TYPE joint_type =
- (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints);
+ (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints);
const int use_hp = allow_hp && vp9_use_mv_hp(ref);
MV diff = {0, 0};
@@ -305,11 +304,11 @@
static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
const MACROBLOCKD *xd,
- vp9_reader *r) {
+ vpx_reader *r) {
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
const int ctx = vp9_get_reference_mode_context(cm, xd);
const REFERENCE_MODE mode =
- (REFERENCE_MODE)vp9_read(r, cm->fc->comp_inter_prob[ctx]);
+ (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
++counts->comp_inter[ctx][mode];
@@ -321,7 +320,7 @@
// Read the referncence frame
static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- vp9_reader *r,
+ vpx_reader *r,
int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = xd->counts;
@@ -336,19 +335,19 @@
if (mode == COMPOUND_REFERENCE) {
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
- const int bit = vp9_read(r, fc->comp_ref_prob[ctx]);
+ const int bit = vpx_read(r, fc->comp_ref_prob[ctx]);
if (counts)
++counts->comp_ref[ctx][bit];
ref_frame[idx] = cm->comp_fixed_ref;
ref_frame[!idx] = cm->comp_var_ref[bit];
} else if (mode == SINGLE_REFERENCE) {
const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
- const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
+ const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]);
if (counts)
++counts->single_ref[ctx0][0][bit0];
if (bit0) {
const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
- const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
+ const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]);
if (counts)
++counts->single_ref[ctx1][1][bit1];
ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
@@ -366,10 +365,10 @@
static INLINE INTERP_FILTER read_switchable_interp_filter(
VP9_COMMON *const cm, MACROBLOCKD *const xd,
- vp9_reader *r) {
+ vpx_reader *r) {
const int ctx = vp9_get_pred_context_switchable_interp(xd);
const INTERP_FILTER type =
- (INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree,
+ (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
@@ -379,7 +378,7 @@
static void read_intra_block_mode_info(VP9_COMMON *const cm,
MACROBLOCKD *const xd, MODE_INFO *mi,
- vp9_reader *r) {
+ vpx_reader *r) {
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mi->mbmi.sb_type;
int i;
@@ -421,7 +420,7 @@
PREDICTION_MODE mode,
int_mv mv[2], int_mv ref_mv[2],
int_mv nearest_mv[2], int_mv near_mv[2],
- int is_compound, int allow_hp, vp9_reader *r) {
+ int is_compound, int allow_hp, vpx_reader *r) {
int i;
int ret = 1;
@@ -462,12 +461,12 @@
}
static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- int segment_id, vp9_reader *r) {
+ int segment_id, vpx_reader *r) {
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
const int ctx = vp9_get_intra_inter_context(xd);
- const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
+ const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]);
FRAME_COUNTS *counts = xd->counts;
if (counts)
++counts->intra_inter[ctx][is_inter];
@@ -484,7 +483,7 @@
static void read_inter_block_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
MODE_INFO *const mi,
- int mi_row, int mi_col, vp9_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
@@ -586,7 +585,7 @@
static void read_inter_frame_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
- int mi_row, int mi_col, vp9_reader *r) {
+ int mi_row, int mi_col, vpx_reader *r) {
VP9_COMMON *const cm = &pbi->common;
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -605,8 +604,8 @@
read_intra_block_mode_info(cm, xd, mi, r);
}
-void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
- int mi_row, int mi_col, vp9_reader *r,
+void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+ int mi_row, int mi_col, vpx_reader *r,
int x_mis, int y_mis) {
VP9_COMMON *const cm = &pbi->common;
MODE_INFO *const mi = xd->mi[0];
diff --git a/vp9/decoder/vp9_decodemv.h b/vp9/decoder/vp9_decodemv.h
index 53bac8c..75f568c 100644
--- a/vp9/decoder/vp9_decodemv.h
+++ b/vp9/decoder/vp9_decodemv.h
@@ -11,15 +11,16 @@
#ifndef VP9_DECODER_VP9_DECODEMV_H_
#define VP9_DECODER_VP9_DECODEMV_H_
+#include "vpx_dsp/bitreader.h"
+
#include "vp9/decoder/vp9_decoder.h"
-#include "vp9/decoder/vp9_reader.h"
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
- int mi_row, int mi_col, vp9_reader *r,
+void vpx_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
+ int mi_row, int mi_col, vpx_reader *r,
int x_mis, int y_mis);
#ifdef __cplusplus
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index af47f85..915f9dc 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_codec.h"
+#include "vpx_dsp/bitreader.h"
#include "vpx_scale/yv12config.h"
#include "vpx_util/vpx_thread.h"
@@ -21,7 +22,6 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_ppflags.h"
#include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_reader.h"
#ifdef __cplusplus
extern "C" {
@@ -30,7 +30,7 @@
// TODO(hkuang): combine this with TileWorkerData.
typedef struct TileData {
VP9_COMMON *cm;
- vp9_reader bit_reader;
+ vpx_reader bit_reader;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
@@ -38,7 +38,7 @@
typedef struct TileWorkerData {
struct VP9Decoder *pbi;
- vp9_reader bit_reader;
+ vpx_reader bit_reader;
FRAME_COUNTS counts;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 5596c9e..e4412dc 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -38,10 +38,10 @@
++coef_counts[band][ctx][token]; \
} while (0)
-static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
+static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) {
int i, val = 0;
for (i = 0; i < n; ++i)
- val = (val << 1) | vp9_read(r, probs[i]);
+ val = (val << 1) | vpx_read(r, probs[i]);
return val;
}
@@ -49,15 +49,15 @@
PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
- vp9_reader *r) {
+ vpx_reader *r) {
FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
int band, c = 0;
- const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
- const vp9_prob *prob;
+ const vpx_prob *prob;
unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32];
@@ -117,12 +117,12 @@
prob = coef_probs[band][ctx];
if (counts)
++eob_branch_count[band][ctx];
- if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) {
+ if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) {
INCREMENT_COUNT(EOB_MODEL_TOKEN);
break;
}
- while (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
+ while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) {
INCREMENT_COUNT(ZERO_TOKEN);
dqv = dq[1];
token_cache[scan[c]] = 0;
@@ -134,13 +134,13 @@
prob = coef_probs[band][ctx];
}
- if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) {
+ if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) {
INCREMENT_COUNT(ONE_TOKEN);
token = ONE_TOKEN;
val = 1;
} else {
INCREMENT_COUNT(TWO_TOKEN);
- token = vp9_read_tree(r, vp9_coef_con_tree,
+ token = vpx_read_tree(r, vp9_coef_con_tree,
vp9_pareto8_full[prob[PIVOT_NODE] - 1]);
switch (token) {
case TWO_TOKEN:
@@ -188,13 +188,13 @@
v = (val * dqv) >> dq_shift;
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_VP9_HIGHBITDEPTH
- dqcoeff[scan[c]] = highbd_check_range((vp9_read_bit(r) ? -v : v),
+ dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v),
xd->bd);
#else
- dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
+ dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v);
#endif // CONFIG_VP9_HIGHBITDEPTH
#else
- dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v;
+ dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
token_cache[scan[c]] = vp9_pt_energy_class[token];
++c;
@@ -253,7 +253,7 @@
int vp9_decode_block_tokens(MACROBLOCKD *xd,
int plane, const scan_order *sc,
int x, int y,
- TX_SIZE tx_size, vp9_reader *r,
+ TX_SIZE tx_size, vpx_reader *r,
int seg_id) {
struct macroblockd_plane *const pd = &xd->plane[plane];
const int16_t *const dequant = pd->seg_dequant[seg_id];
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
index cf0e48a..d242d44 100644
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -12,8 +12,8 @@
#ifndef VP9_DECODER_VP9_DETOKENIZE_H_
#define VP9_DECODER_VP9_DETOKENIZE_H_
+#include "vpx_dsp/bitreader.h"
#include "vp9/decoder/vp9_decoder.h"
-#include "vp9/decoder/vp9_reader.h"
#include "vp9/common/vp9_scan.h"
#ifdef __cplusplus
@@ -23,7 +23,7 @@
int vp9_decode_block_tokens(MACROBLOCKD *xd,
int plane, const scan_order *sc,
int x, int y,
- TX_SIZE tx_size, vp9_reader *r,
+ TX_SIZE tx_size, vpx_reader *r,
int seg_id);
#ifdef __cplusplus
diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index b33c3b7..4fbc6db 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c
@@ -21,11 +21,11 @@
return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1);
}
-static int decode_uniform(vp9_reader *r) {
+static int decode_uniform(vpx_reader *r) {
const int l = 8;
const int m = (1 << l) - 191;
- const int v = vp9_read_literal(r, l - 1);
- return v < m ? v : (v << 1) - m + vp9_read_bit(r);
+ const int v = vpx_read_literal(r, l - 1);
+ return v < m ? v : (v << 1) - m + vpx_read_bit(r);
}
static int inv_remap_prob(int v, int m) {
@@ -58,19 +58,19 @@
}
}
-static int decode_term_subexp(vp9_reader *r) {
- if (!vp9_read_bit(r))
- return vp9_read_literal(r, 4);
- if (!vp9_read_bit(r))
- return vp9_read_literal(r, 4) + 16;
- if (!vp9_read_bit(r))
- return vp9_read_literal(r, 5) + 32;
+static int decode_term_subexp(vpx_reader *r) {
+ if (!vpx_read_bit(r))
+ return vpx_read_literal(r, 4);
+ if (!vpx_read_bit(r))
+ return vpx_read_literal(r, 4) + 16;
+ if (!vpx_read_bit(r))
+ return vpx_read_literal(r, 5) + 32;
return decode_uniform(r) + 64;
}
-void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) {
- if (vp9_read(r, DIFF_UPDATE_PROB)) {
+void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p) {
+ if (vpx_read(r, DIFF_UPDATE_PROB)) {
const int delp = decode_term_subexp(r);
- *p = (vp9_prob)inv_remap_prob(delp, *p);
+ *p = (vpx_prob)inv_remap_prob(delp, *p);
}
}
diff --git a/vp9/decoder/vp9_dsubexp.h b/vp9/decoder/vp9_dsubexp.h
index 436f434..a8bcc70 100644
--- a/vp9/decoder/vp9_dsubexp.h
+++ b/vp9/decoder/vp9_dsubexp.h
@@ -12,13 +12,13 @@
#ifndef VP9_DECODER_VP9_DSUBEXP_H_
#define VP9_DECODER_VP9_DSUBEXP_H_
-#include "vp9/decoder/vp9_reader.h"
+#include "vpx_dsp/bitreader.h"
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p);
+void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h
deleted file mode 100644
index fc88bd7..0000000
--- a/vp9/decoder/vp9_read_bit_buffer.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_
-#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_
-
-#include <limits.h>
-
-#include "vpx/vpx_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef void (*vp9_rb_error_handler)(void *data);
-
-struct vp9_read_bit_buffer {
- const uint8_t *bit_buffer;
- const uint8_t *bit_buffer_end;
- size_t bit_offset;
-
- void *error_handler_data;
- vp9_rb_error_handler error_handler;
-};
-
-size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb);
-
-int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb);
-
-int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits);
-
-int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, int bits);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_
diff --git a/vp9/encoder/arm/neon/vp9_avg_neon.c b/vp9/encoder/arm/neon/vp9_avg_neon.c
index fecab57..40d7e87 100644
--- a/vp9/encoder/arm/neon/vp9_avg_neon.c
+++ b/vp9/encoder/arm/neon/vp9_avg_neon.c
@@ -100,3 +100,17 @@
hbuf += 8;
vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi));
}
+
+int16_t vp9_int_pro_col_neon(uint8_t const *ref, const int width) {
+ int i;
+ uint16x8_t vec_sum = vdupq_n_u16(0);
+
+ for (i = 0; i < width; i += 16) {
+ const uint8x16_t vec_row = vld1q_u8(ref);
+ vec_sum = vaddw_u8(vec_sum, vget_low_u8(vec_row));
+ vec_sum = vaddw_u8(vec_sum, vget_high_u8(vec_row));
+ ref += 16;
+ }
+
+ return horizontal_add_u16x8(vec_sum);
+}
diff --git a/vp9/encoder/arm/neon/vp9_dct_neon.c b/vp9/encoder/arm/neon/vp9_dct_neon.c
index a6d4797..97106ff 100644
--- a/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -10,6 +10,7 @@
#include <arm_neon.h>
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_blockd.h"
@@ -49,193 +50,3 @@
quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
}
-
-void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
- int i;
- // stage 1
- int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2);
- int16x8_t input_1 = vshlq_n_s16(vld1q_s16(&input[1 * stride]), 2);
- int16x8_t input_2 = vshlq_n_s16(vld1q_s16(&input[2 * stride]), 2);
- int16x8_t input_3 = vshlq_n_s16(vld1q_s16(&input[3 * stride]), 2);
- int16x8_t input_4 = vshlq_n_s16(vld1q_s16(&input[4 * stride]), 2);
- int16x8_t input_5 = vshlq_n_s16(vld1q_s16(&input[5 * stride]), 2);
- int16x8_t input_6 = vshlq_n_s16(vld1q_s16(&input[6 * stride]), 2);
- int16x8_t input_7 = vshlq_n_s16(vld1q_s16(&input[7 * stride]), 2);
- for (i = 0; i < 2; ++i) {
- int16x8_t out_0, out_1, out_2, out_3, out_4, out_5, out_6, out_7;
- const int16x8_t v_s0 = vaddq_s16(input_0, input_7);
- const int16x8_t v_s1 = vaddq_s16(input_1, input_6);
- const int16x8_t v_s2 = vaddq_s16(input_2, input_5);
- const int16x8_t v_s3 = vaddq_s16(input_3, input_4);
- const int16x8_t v_s4 = vsubq_s16(input_3, input_4);
- const int16x8_t v_s5 = vsubq_s16(input_2, input_5);
- const int16x8_t v_s6 = vsubq_s16(input_1, input_6);
- const int16x8_t v_s7 = vsubq_s16(input_0, input_7);
- // fdct4(step, step);
- int16x8_t v_x0 = vaddq_s16(v_s0, v_s3);
- int16x8_t v_x1 = vaddq_s16(v_s1, v_s2);
- int16x8_t v_x2 = vsubq_s16(v_s1, v_s2);
- int16x8_t v_x3 = vsubq_s16(v_s0, v_s3);
- // fdct4(step, step);
- int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
- int32x4_t v_t0_hi = vaddl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
- int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
- int32x4_t v_t1_hi = vsubl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
- int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64);
- int32x4_t v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_24_64);
- int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64);
- int32x4_t v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_24_64);
- v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64);
- v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
- v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
- v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
- v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64);
- v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64);
- v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64);
- v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
- const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
- const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
- const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
- out_0 = vcombine_s16(a, c); // 00 01 02 03 40 41 42 43
- out_2 = vcombine_s16(e, g); // 20 21 22 23 60 61 62 63
- out_4 = vcombine_s16(b, d); // 04 05 06 07 44 45 46 47
- out_6 = vcombine_s16(f, h); // 24 25 26 27 64 65 66 67
- }
- // Stage 2
- v_x0 = vsubq_s16(v_s6, v_s5);
- v_x1 = vaddq_s16(v_s6, v_s5);
- v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64);
- v_t0_hi = vmull_n_s16(vget_high_s16(v_x0), (int16_t)cospi_16_64);
- v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64);
- v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_16_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x8_t ab = vcombine_s16(a, b);
- const int16x8_t cd = vcombine_s16(c, d);
- // Stage 3
- v_x0 = vaddq_s16(v_s4, ab);
- v_x1 = vsubq_s16(v_s4, ab);
- v_x2 = vsubq_s16(v_s7, cd);
- v_x3 = vaddq_s16(v_s7, cd);
- }
- // Stage 4
- v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_4_64);
- v_t0_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_4_64);
- v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64);
- v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64);
- v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_12_64);
- v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_12_64);
- v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64);
- v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64);
- v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_12_64);
- v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_12_64);
- v_t2_lo = vmlsl_n_s16(v_t2_lo, vget_low_s16(v_x1), (int16_t)cospi_20_64);
- v_t2_hi = vmlsl_n_s16(v_t2_hi, vget_high_s16(v_x1), (int16_t)cospi_20_64);
- v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_28_64);
- v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_28_64);
- v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x0), (int16_t)cospi_4_64);
- v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x0), (int16_t)cospi_4_64);
- {
- const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
- const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
- const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
- const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
- const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
- const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
- const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
- const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
- out_1 = vcombine_s16(a, c); // 10 11 12 13 50 51 52 53
- out_3 = vcombine_s16(e, g); // 30 31 32 33 70 71 72 73
- out_5 = vcombine_s16(b, d); // 14 15 16 17 54 55 56 57
- out_7 = vcombine_s16(f, h); // 34 35 36 37 74 75 76 77
- }
- // transpose 8x8
- {
- // 00 01 02 03 40 41 42 43
- // 10 11 12 13 50 51 52 53
- // 20 21 22 23 60 61 62 63
- // 30 31 32 33 70 71 72 73
- // 04 05 06 07 44 45 46 47
- // 14 15 16 17 54 55 56 57
- // 24 25 26 27 64 65 66 67
- // 34 35 36 37 74 75 76 77
- const int32x4x2_t r02_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_0),
- vreinterpretq_s32_s16(out_2));
- const int32x4x2_t r13_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_1),
- vreinterpretq_s32_s16(out_3));
- const int32x4x2_t r46_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_4),
- vreinterpretq_s32_s16(out_6));
- const int32x4x2_t r57_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_5),
- vreinterpretq_s32_s16(out_7));
- const int16x8x2_t r01_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[0]),
- vreinterpretq_s16_s32(r13_s32.val[0]));
- const int16x8x2_t r23_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[1]),
- vreinterpretq_s16_s32(r13_s32.val[1]));
- const int16x8x2_t r45_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[0]),
- vreinterpretq_s16_s32(r57_s32.val[0]));
- const int16x8x2_t r67_s16 =
- vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[1]),
- vreinterpretq_s16_s32(r57_s32.val[1]));
- input_0 = r01_s16.val[0];
- input_1 = r01_s16.val[1];
- input_2 = r23_s16.val[0];
- input_3 = r23_s16.val[1];
- input_4 = r45_s16.val[0];
- input_5 = r45_s16.val[1];
- input_6 = r67_s16.val[0];
- input_7 = r67_s16.val[1];
- // 00 10 20 30 40 50 60 70
- // 01 11 21 31 41 51 61 71
- // 02 12 22 32 42 52 62 72
- // 03 13 23 33 43 53 63 73
- // 04 14 24 34 44 54 64 74
- // 05 15 25 35 45 55 65 75
- // 06 16 26 36 46 56 66 76
- // 07 17 27 37 47 57 67 77
- }
- } // for
- {
- // from vp9_dct_sse2.c
- // Post-condition (division by two)
- // division of two 16 bits signed numbers using shifts
- // n / 2 = (n - (n >> 15)) >> 1
- const int16x8_t sign_in0 = vshrq_n_s16(input_0, 15);
- const int16x8_t sign_in1 = vshrq_n_s16(input_1, 15);
- const int16x8_t sign_in2 = vshrq_n_s16(input_2, 15);
- const int16x8_t sign_in3 = vshrq_n_s16(input_3, 15);
- const int16x8_t sign_in4 = vshrq_n_s16(input_4, 15);
- const int16x8_t sign_in5 = vshrq_n_s16(input_5, 15);
- const int16x8_t sign_in6 = vshrq_n_s16(input_6, 15);
- const int16x8_t sign_in7 = vshrq_n_s16(input_7, 15);
- input_0 = vhsubq_s16(input_0, sign_in0);
- input_1 = vhsubq_s16(input_1, sign_in1);
- input_2 = vhsubq_s16(input_2, sign_in2);
- input_3 = vhsubq_s16(input_3, sign_in3);
- input_4 = vhsubq_s16(input_4, sign_in4);
- input_5 = vhsubq_s16(input_5, sign_in5);
- input_6 = vhsubq_s16(input_6, sign_in6);
- input_7 = vhsubq_s16(input_7, sign_in7);
- // store results
- vst1q_s16(&final_output[0 * 8], input_0);
- vst1q_s16(&final_output[1 * 8], input_1);
- vst1q_s16(&final_output[2 * 8], input_2);
- vst1q_s16(&final_output[3 * 8], input_3);
- vst1q_s16(&final_output[4 * 8], input_4);
- vst1q_s16(&final_output[5 * 8], input_5);
- vst1q_s16(&final_output[6 * 8], input_6);
- vst1q_s16(&final_output[7 * 8], input_7);
- }
-}
-
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 6270bf4..b619063 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -347,7 +347,10 @@
// For video conference clips, if the background has high motion in current
// frame because of the camera movement, set this frame as the golden frame.
// Use 70% and 5% as the thresholds for golden frame refreshing.
- if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) {
+ // Also, force this frame as a golden update frame if this frame will change
+ // the resolution (resize_pending != 0).
+ if (cpi->resize_pending != 0 ||
+ (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
vp9_cyclic_refresh_set_golden_update(cpi);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -562,4 +565,5 @@
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
+ cpi->refresh_golden_frame = 1;
}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 4ca4083..f06bd56 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -13,6 +13,7 @@
#include <limits.h>
#include "vpx/vpx_encoder.h"
+#include "vpx_dsp/bitwriter_buffer.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem_ops.h"
@@ -32,7 +33,6 @@
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_subexp.h"
#include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_write_bit_buffer.h"
static const struct vp9_token intra_mode_encodings[INTRA_MODES] = {
{0, 1}, {6, 3}, {28, 5}, {30, 5}, {58, 6}, {59, 6}, {126, 7}, {127, 7},
@@ -44,27 +44,27 @@
static const struct vp9_token inter_mode_encodings[INTER_MODES] =
{{2, 2}, {6, 3}, {0, 1}, {7, 3}};
-static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode,
- const vp9_prob *probs) {
+static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode,
+ const vpx_prob *probs) {
vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]);
}
-static void write_inter_mode(vp9_writer *w, PREDICTION_MODE mode,
- const vp9_prob *probs) {
+static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode,
+ const vpx_prob *probs) {
assert(is_inter_mode(mode));
vp9_write_token(w, vp9_inter_mode_tree, probs,
&inter_mode_encodings[INTER_OFFSET(mode)]);
}
-static void encode_unsigned_max(struct vp9_write_bit_buffer *wb,
+static void encode_unsigned_max(struct vpx_write_bit_buffer *wb,
int data, int max) {
- vp9_wb_write_literal(wb, data, get_unsigned_bits(max));
+ vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
}
-static void prob_diff_update(const vp9_tree_index *tree,
- vp9_prob probs[/*n - 1*/],
+static void prob_diff_update(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
const unsigned int counts[/*n - 1*/],
- int n, vp9_writer *w) {
+ int n, vpx_writer *w) {
int i;
unsigned int branch_ct[32][2];
@@ -77,32 +77,32 @@
}
static void write_selected_tx_size(const VP9_COMMON *cm,
- const MACROBLOCKD *xd, vp9_writer *w) {
+ const MACROBLOCKD *xd, vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
- const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
+ const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
&cm->fc->tx_probs);
- vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
+ vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
- vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
+ vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
- vp9_write(w, tx_size != TX_16X16, tx_probs[2]);
+ vpx_write(w, tx_size != TX_16X16, tx_probs[2]);
}
}
static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- int segment_id, const MODE_INFO *mi, vp9_writer *w) {
+ int segment_id, const MODE_INFO *mi, vpx_writer *w) {
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int skip = mi->mbmi.skip;
- vp9_write(w, skip, vp9_get_skip_prob(cm, xd));
+ vpx_write(w, skip, vp9_get_skip_prob(cm, xd));
return skip;
}
}
-static void update_skip_probs(VP9_COMMON *cm, vp9_writer *w,
+static void update_skip_probs(VP9_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
int k;
@@ -110,7 +110,7 @@
vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]);
}
-static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w,
+static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
int j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
@@ -119,7 +119,7 @@
counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
}
-static void pack_mb_tokens(vp9_writer *w,
+static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
TOKENEXTRA *p = *tp;
@@ -179,12 +179,12 @@
do {
const int bb = (v >> --n) & 1;
- vp9_write(w, bb, pb[i >> 1]);
+ vpx_write(w, bb, pb[i >> 1]);
i = b->tree[i + bb];
} while (n);
}
- vp9_write_bit(w, e & 1);
+ vpx_write_bit(w, e & 1);
}
++p;
}
@@ -192,7 +192,7 @@
*tp = p + (p->token == EOSB_TOKEN);
}
-static void write_segment_id(vp9_writer *w, const struct segmentation *seg,
+static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
int segment_id) {
if (seg->enabled && seg->update_map)
vp9_write_tree(w, vp9_segment_tree, seg->tree_probs, segment_id, 3, 0);
@@ -200,7 +200,7 @@
// This function encodes the reference frame
static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- vp9_writer *w) {
+ vpx_writer *w) {
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int is_compound = has_second_ref(mbmi);
const int segment_id = mbmi->segment_id;
@@ -215,27 +215,27 @@
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
- vp9_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd));
+ vpx_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd));
} else {
assert(!is_compound == (cm->reference_mode == SINGLE_REFERENCE));
}
if (is_compound) {
- vp9_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME,
+ vpx_write(w, mbmi->ref_frame[0] == GOLDEN_FRAME,
vp9_get_pred_prob_comp_ref_p(cm, xd));
} else {
const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
- vp9_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd));
+ vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd));
if (bit0) {
const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
- vp9_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd));
+ vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd));
}
}
}
}
static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
- vp9_writer *w) {
+ vpx_writer *w) {
VP9_COMMON *const cm = &cpi->common;
const nmv_context *nmvc = &cm->fc->nmvc;
const MACROBLOCK *const x = &cpi->td.mb;
@@ -254,8 +254,8 @@
if (seg->update_map) {
if (seg->temporal_update) {
const int pred_flag = mbmi->seg_id_predicted;
- vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
- vp9_write(w, pred_flag, pred_prob);
+ vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
+ vpx_write(w, pred_flag, pred_prob);
if (!pred_flag)
write_segment_id(w, seg, segment_id);
} else {
@@ -266,7 +266,7 @@
skip = write_skip(cm, xd, segment_id, mi, w);
if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
- vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
+ vpx_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
!(is_inter && skip)) {
@@ -290,7 +290,7 @@
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
} else {
const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
- const vp9_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
+ const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
write_ref_frames(cm, xd, w);
// If segment skip is not enabled code the mode.
@@ -339,7 +339,7 @@
}
static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO **mi_8x8, vp9_writer *w) {
+ MODE_INFO **mi_8x8, vpx_writer *w) {
const struct segmentation *const seg = &cm->seg;
const MODE_INFO *const mi = mi_8x8[0];
const MODE_INFO *const above_mi = xd->above_mi;
@@ -375,7 +375,7 @@
}
static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
- vp9_writer *w, TOKENEXTRA **tok,
+ vpx_writer *w, TOKENEXTRA **tok,
const TOKENEXTRA *const tok_end,
int mi_row, int mi_col) {
const VP9_COMMON *const cm = &cpi->common;
@@ -405,9 +405,9 @@
static void write_partition(const VP9_COMMON *const cm,
const MACROBLOCKD *const xd,
int hbs, int mi_row, int mi_col,
- PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
+ PARTITION_TYPE p, BLOCK_SIZE bsize, vpx_writer *w) {
const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- const vp9_prob *const probs = xd->partition_probs[ctx];
+ const vpx_prob *const probs = xd->partition_probs[ctx];
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -415,17 +415,17 @@
vp9_write_token(w, vp9_partition_tree, probs, &partition_encodings[p]);
} else if (!has_rows && has_cols) {
assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
- vp9_write(w, p == PARTITION_SPLIT, probs[1]);
+ vpx_write(w, p == PARTITION_SPLIT, probs[1]);
} else if (has_rows && !has_cols) {
assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
- vp9_write(w, p == PARTITION_SPLIT, probs[2]);
+ vpx_write(w, p == PARTITION_SPLIT, probs[2]);
} else {
assert(p == PARTITION_SPLIT);
}
}
static void write_modes_sb(VP9_COMP *cpi,
- const TileInfo *const tile, vp9_writer *w,
+ const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
const VP9_COMMON *const cm = &cpi->common;
@@ -483,7 +483,7 @@
}
static void write_modes(VP9_COMP *cpi,
- const TileInfo *const tile, vp9_writer *w,
+ const TileInfo *const tile, vpx_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -528,12 +528,12 @@
}
}
-static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
+static void update_coef_probs_common(vpx_writer* const bc, VP9_COMP *cpi,
TX_SIZE tx_size,
vp9_coeff_stats *frame_branch_ct,
vp9_coeff_probs_model *new_coef_probs) {
vp9_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
- const vp9_prob upd = DIFF_UPDATE_PROB;
+ const vpx_prob upd = DIFF_UPDATE_PROB;
const int entropy_nodes_update = UNCONSTRAINED_NODES;
int i, j, k, l, t;
int stepsize = cpi->sf.coeff_prob_appx_step;
@@ -548,8 +548,8 @@
for (k = 0; k < COEF_BANDS; ++k) {
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
for (t = 0; t < entropy_nodes_update; ++t) {
- vp9_prob newp = new_coef_probs[i][j][k][l][t];
- const vp9_prob oldp = old_coef_probs[i][j][k][l][t];
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ const vpx_prob oldp = old_coef_probs[i][j][k][l][t];
int s;
int u = 0;
if (t == PIVOT_NODE)
@@ -575,19 +575,19 @@
// printf("Update %d %d, savings %d\n", update[0], update[1], savings);
/* Is coef updated at all */
if (update[1] == 0 || savings < 0) {
- vp9_write_bit(bc, 0);
+ vpx_write_bit(bc, 0);
return;
}
- vp9_write_bit(bc, 1);
+ vpx_write_bit(bc, 1);
for (i = 0; i < PLANE_TYPES; ++i) {
for (j = 0; j < REF_TYPES; ++j) {
for (k = 0; k < COEF_BANDS; ++k) {
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
// calc probs and branch cts for this frame only
for (t = 0; t < entropy_nodes_update; ++t) {
- vp9_prob newp = new_coef_probs[i][j][k][l][t];
- vp9_prob *oldp = old_coef_probs[i][j][k][l] + t;
- const vp9_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ const vpx_prob upd = DIFF_UPDATE_PROB;
int s;
int u = 0;
if (t == PIVOT_NODE)
@@ -600,7 +600,7 @@
*oldp, &newp, upd);
if (s > 0 && newp != *oldp)
u = 1;
- vp9_write(bc, u, upd);
+ vpx_write(bc, u, upd);
if (u) {
/* send/use new probability */
vp9_write_prob_diff_update(bc, newp, *oldp);
@@ -623,8 +623,8 @@
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
// calc probs and branch cts for this frame only
for (t = 0; t < entropy_nodes_update; ++t) {
- vp9_prob newp = new_coef_probs[i][j][k][l][t];
- vp9_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
int s;
int u = 0;
@@ -648,11 +648,11 @@
if (u == 1 && updates == 1) {
int v;
// first update
- vp9_write_bit(bc, 1);
+ vpx_write_bit(bc, 1);
for (v = 0; v < noupdates_before_first; ++v)
- vp9_write(bc, 0, upd);
+ vpx_write(bc, 0, upd);
}
- vp9_write(bc, u, upd);
+ vpx_write(bc, u, upd);
if (u) {
/* send/use new probability */
vp9_write_prob_diff_update(bc, newp, *oldp);
@@ -664,7 +664,7 @@
}
}
if (updates == 0) {
- vp9_write_bit(bc, 0); // no updates
+ vpx_write_bit(bc, 0); // no updates
}
return;
}
@@ -673,7 +673,7 @@
}
}
-static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) {
+static void update_coef_probs(VP9_COMP *cpi, vpx_writer* w) {
const TX_MODE tx_mode = cpi->common.tx_mode;
const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
TX_SIZE tx_size;
@@ -682,7 +682,7 @@
vp9_coeff_probs_model frame_coef_probs[PLANE_TYPES];
if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 ||
(tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
- vp9_write_bit(w, 0);
+ vpx_write_bit(w, 0);
} else {
build_tree_distribution(cpi, tx_size, frame_branch_ct,
frame_coef_probs);
@@ -693,75 +693,75 @@
}
static void encode_loopfilter(struct loopfilter *lf,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
int i;
// Encode the loop filter level and type
- vp9_wb_write_literal(wb, lf->filter_level, 6);
- vp9_wb_write_literal(wb, lf->sharpness_level, 3);
+ vpx_wb_write_literal(wb, lf->filter_level, 6);
+ vpx_wb_write_literal(wb, lf->sharpness_level, 3);
// Write out loop filter deltas applied at the MB level based on mode or
// ref frame (if they are enabled).
- vp9_wb_write_bit(wb, lf->mode_ref_delta_enabled);
+ vpx_wb_write_bit(wb, lf->mode_ref_delta_enabled);
if (lf->mode_ref_delta_enabled) {
- vp9_wb_write_bit(wb, lf->mode_ref_delta_update);
+ vpx_wb_write_bit(wb, lf->mode_ref_delta_update);
if (lf->mode_ref_delta_update) {
for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
const int delta = lf->ref_deltas[i];
const int changed = delta != lf->last_ref_deltas[i];
- vp9_wb_write_bit(wb, changed);
+ vpx_wb_write_bit(wb, changed);
if (changed) {
lf->last_ref_deltas[i] = delta;
- vp9_wb_write_literal(wb, abs(delta) & 0x3F, 6);
- vp9_wb_write_bit(wb, delta < 0);
+ vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
+ vpx_wb_write_bit(wb, delta < 0);
}
}
for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
const int delta = lf->mode_deltas[i];
const int changed = delta != lf->last_mode_deltas[i];
- vp9_wb_write_bit(wb, changed);
+ vpx_wb_write_bit(wb, changed);
if (changed) {
lf->last_mode_deltas[i] = delta;
- vp9_wb_write_literal(wb, abs(delta) & 0x3F, 6);
- vp9_wb_write_bit(wb, delta < 0);
+ vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6);
+ vpx_wb_write_bit(wb, delta < 0);
}
}
}
}
}
-static void write_delta_q(struct vp9_write_bit_buffer *wb, int delta_q) {
+static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
- vp9_wb_write_bit(wb, 1);
- vp9_wb_write_literal(wb, abs(delta_q), 4);
- vp9_wb_write_bit(wb, delta_q < 0);
+ vpx_wb_write_bit(wb, 1);
+ vpx_wb_write_literal(wb, abs(delta_q), 4);
+ vpx_wb_write_bit(wb, delta_q < 0);
} else {
- vp9_wb_write_bit(wb, 0);
+ vpx_wb_write_bit(wb, 0);
}
}
static void encode_quantization(const VP9_COMMON *const cm,
- struct vp9_write_bit_buffer *wb) {
- vp9_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
write_delta_q(wb, cm->y_dc_delta_q);
write_delta_q(wb, cm->uv_dc_delta_q);
write_delta_q(wb, cm->uv_ac_delta_q);
}
static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
int i, j;
const struct segmentation *seg = &cm->seg;
- vp9_wb_write_bit(wb, seg->enabled);
+ vpx_wb_write_bit(wb, seg->enabled);
if (!seg->enabled)
return;
// Segmentation map
- vp9_wb_write_bit(wb, seg->update_map);
+ vpx_wb_write_bit(wb, seg->update_map);
if (seg->update_map) {
// Select the coding strategy (temporal or spatial)
vp9_choose_segmap_coding_method(cm, xd);
@@ -769,40 +769,40 @@
for (i = 0; i < SEG_TREE_PROBS; i++) {
const int prob = seg->tree_probs[i];
const int update = prob != MAX_PROB;
- vp9_wb_write_bit(wb, update);
+ vpx_wb_write_bit(wb, update);
if (update)
- vp9_wb_write_literal(wb, prob, 8);
+ vpx_wb_write_literal(wb, prob, 8);
}
// Write out the chosen coding method.
- vp9_wb_write_bit(wb, seg->temporal_update);
+ vpx_wb_write_bit(wb, seg->temporal_update);
if (seg->temporal_update) {
for (i = 0; i < PREDICTION_PROBS; i++) {
const int prob = seg->pred_probs[i];
const int update = prob != MAX_PROB;
- vp9_wb_write_bit(wb, update);
+ vpx_wb_write_bit(wb, update);
if (update)
- vp9_wb_write_literal(wb, prob, 8);
+ vpx_wb_write_literal(wb, prob, 8);
}
}
}
// Segmentation data
- vp9_wb_write_bit(wb, seg->update_data);
+ vpx_wb_write_bit(wb, seg->update_data);
if (seg->update_data) {
- vp9_wb_write_bit(wb, seg->abs_delta);
+ vpx_wb_write_bit(wb, seg->abs_delta);
for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
const int active = segfeature_active(seg, i, j);
- vp9_wb_write_bit(wb, active);
+ vpx_wb_write_bit(wb, active);
if (active) {
const int data = get_segdata(seg, i, j);
const int data_max = vp9_seg_feature_data_max(j);
if (vp9_is_segfeature_signed(j)) {
encode_unsigned_max(wb, abs(data), data_max);
- vp9_wb_write_bit(wb, data < 0);
+ vpx_wb_write_bit(wb, data < 0);
} else {
encode_unsigned_max(wb, data, data_max);
}
@@ -812,12 +812,12 @@
}
}
-static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w,
+static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
// Mode
- vp9_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
+ vpx_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
if (cm->tx_mode >= ALLOW_32X32)
- vp9_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
+ vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
// Probabilities
if (cm->tx_mode == TX_MODE_SELECT) {
@@ -850,12 +850,12 @@
}
static void write_interp_filter(INTERP_FILTER filter,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
const int filter_to_literal[] = { 1, 0, 2, 3 };
- vp9_wb_write_bit(wb, filter == SWITCHABLE);
+ vpx_wb_write_bit(wb, filter == SWITCHABLE);
if (filter != SWITCHABLE)
- vp9_wb_write_literal(wb, filter_to_literal[filter], 2);
+ vpx_wb_write_literal(wb, filter_to_literal[filter], 2);
}
static void fix_interp_filter(VP9_COMMON *cm, FRAME_COUNTS *counts) {
@@ -882,22 +882,22 @@
}
static void write_tile_info(const VP9_COMMON *const cm,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
int min_log2_tile_cols, max_log2_tile_cols, ones;
vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
// columns
ones = cm->log2_tile_cols - min_log2_tile_cols;
while (ones--)
- vp9_wb_write_bit(wb, 1);
+ vpx_wb_write_bit(wb, 1);
if (cm->log2_tile_cols < max_log2_tile_cols)
- vp9_wb_write_bit(wb, 0);
+ vpx_wb_write_bit(wb, 0);
// rows
- vp9_wb_write_bit(wb, cm->log2_tile_rows != 0);
+ vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
if (cm->log2_tile_rows != 0)
- vp9_wb_write_bit(wb, cm->log2_tile_rows != 1);
+ vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
}
static int get_refresh_mask(VP9_COMP *cpi) {
@@ -928,7 +928,7 @@
static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
VP9_COMMON *const cm = &cpi->common;
- vp9_writer residual_bc;
+ vpx_writer residual_bc;
int tile_row, tile_col;
TOKENEXTRA *tok_end;
size_t total_size = 0;
@@ -947,14 +947,14 @@
cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
- vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
+ vpx_start_encode(&residual_bc, data_ptr + total_size + 4);
else
- vp9_start_encode(&residual_bc, data_ptr + total_size);
+ vpx_start_encode(&residual_bc, data_ptr + total_size);
write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
&residual_bc, &tok, tok_end);
assert(tok == tok_end);
- vp9_stop_encode(&residual_bc);
+ vpx_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
// size of this tile
mem_put_be32(data_ptr + total_size, residual_bc.pos);
@@ -969,26 +969,26 @@
}
static void write_display_size(const VP9_COMMON *cm,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
const int scaling_active = cm->width != cm->display_width ||
cm->height != cm->display_height;
- vp9_wb_write_bit(wb, scaling_active);
+ vpx_wb_write_bit(wb, scaling_active);
if (scaling_active) {
- vp9_wb_write_literal(wb, cm->display_width - 1, 16);
- vp9_wb_write_literal(wb, cm->display_height - 1, 16);
+ vpx_wb_write_literal(wb, cm->display_width - 1, 16);
+ vpx_wb_write_literal(wb, cm->display_height - 1, 16);
}
}
static void write_frame_size(const VP9_COMMON *cm,
- struct vp9_write_bit_buffer *wb) {
- vp9_wb_write_literal(wb, cm->width - 1, 16);
- vp9_wb_write_literal(wb, cm->height - 1, 16);
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, cm->width - 1, 16);
+ vpx_wb_write_literal(wb, cm->height - 1, 16);
write_display_size(cm, wb);
}
static void write_frame_size_with_refs(VP9_COMP *cpi,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
VP9_COMMON *const cm = &cpi->common;
int found = 0;
@@ -1011,40 +1011,40 @@
found = cm->width == cfg->y_crop_width &&
cm->height == cfg->y_crop_height;
}
- vp9_wb_write_bit(wb, found);
+ vpx_wb_write_bit(wb, found);
if (found) {
break;
}
}
if (!found) {
- vp9_wb_write_literal(wb, cm->width - 1, 16);
- vp9_wb_write_literal(wb, cm->height - 1, 16);
+ vpx_wb_write_literal(wb, cm->width - 1, 16);
+ vpx_wb_write_literal(wb, cm->height - 1, 16);
}
write_display_size(cm, wb);
}
-static void write_sync_code(struct vp9_write_bit_buffer *wb) {
- vp9_wb_write_literal(wb, VP9_SYNC_CODE_0, 8);
- vp9_wb_write_literal(wb, VP9_SYNC_CODE_1, 8);
- vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
+static void write_sync_code(struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, VP9_SYNC_CODE_0, 8);
+ vpx_wb_write_literal(wb, VP9_SYNC_CODE_1, 8);
+ vpx_wb_write_literal(wb, VP9_SYNC_CODE_2, 8);
}
static void write_profile(BITSTREAM_PROFILE profile,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
switch (profile) {
case PROFILE_0:
- vp9_wb_write_literal(wb, 0, 2);
+ vpx_wb_write_literal(wb, 0, 2);
break;
case PROFILE_1:
- vp9_wb_write_literal(wb, 2, 2);
+ vpx_wb_write_literal(wb, 2, 2);
break;
case PROFILE_2:
- vp9_wb_write_literal(wb, 1, 2);
+ vpx_wb_write_literal(wb, 1, 2);
break;
case PROFILE_3:
- vp9_wb_write_literal(wb, 6, 3);
+ vpx_wb_write_literal(wb, 6, 3);
break;
default:
assert(0);
@@ -1052,41 +1052,41 @@
}
static void write_bitdepth_colorspace_sampling(
- VP9_COMMON *const cm, struct vp9_write_bit_buffer *wb) {
+ VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) {
if (cm->profile >= PROFILE_2) {
assert(cm->bit_depth > VPX_BITS_8);
- vp9_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
+ vpx_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
}
- vp9_wb_write_literal(wb, cm->color_space, 3);
+ vpx_wb_write_literal(wb, cm->color_space, 3);
if (cm->color_space != VPX_CS_SRGB) {
- vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+ vpx_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
- vp9_wb_write_bit(wb, cm->subsampling_x);
- vp9_wb_write_bit(wb, cm->subsampling_y);
- vp9_wb_write_bit(wb, 0); // unused
+ vpx_wb_write_bit(wb, cm->subsampling_x);
+ vpx_wb_write_bit(wb, cm->subsampling_y);
+ vpx_wb_write_bit(wb, 0); // unused
} else {
assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
}
} else {
assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
- vp9_wb_write_bit(wb, 0); // unused
+ vpx_wb_write_bit(wb, 0); // unused
}
}
static void write_uncompressed_header(VP9_COMP *cpi,
- struct vp9_write_bit_buffer *wb) {
+ struct vpx_write_bit_buffer *wb) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
- vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
+ vpx_wb_write_literal(wb, VP9_FRAME_MARKER, 2);
write_profile(cm->profile, wb);
- vp9_wb_write_bit(wb, 0); // show_existing_frame
- vp9_wb_write_bit(wb, cm->frame_type);
- vp9_wb_write_bit(wb, cm->show_frame);
- vp9_wb_write_bit(wb, cm->error_resilient_mode);
+ vpx_wb_write_bit(wb, 0); // show_existing_frame
+ vpx_wb_write_bit(wb, cm->frame_type);
+ vpx_wb_write_bit(wb, cm->show_frame);
+ vpx_wb_write_bit(wb, cm->error_resilient_mode);
if (cm->frame_type == KEY_FRAME) {
write_sync_code(wb);
@@ -1102,10 +1102,10 @@
// show_existing_frame flag which tells the decoder which frame we want to
// show.
if (!cm->show_frame)
- vp9_wb_write_bit(wb, cm->intra_only);
+ vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
- vp9_wb_write_literal(wb, cm->reset_frame_context, 2);
+ vpx_wb_write_literal(wb, cm->reset_frame_context, 2);
if (cm->intra_only) {
write_sync_code(wb);
@@ -1115,21 +1115,21 @@
write_bitdepth_colorspace_sampling(cm, wb);
}
- vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
write_frame_size(cm, wb);
} else {
MV_REFERENCE_FRAME ref_frame;
- vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+ vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
- vp9_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
+ vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
REF_FRAMES_LOG2);
- vp9_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
+ vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
}
write_frame_size_with_refs(cpi, wb);
- vp9_wb_write_bit(wb, cm->allow_high_precision_mv);
+ vpx_wb_write_bit(wb, cm->allow_high_precision_mv);
fix_interp_filter(cm, cpi->td.counts);
write_interp_filter(cm->interp_filter, wb);
@@ -1137,11 +1137,11 @@
}
if (!cm->error_resilient_mode) {
- vp9_wb_write_bit(wb, cm->refresh_frame_context);
- vp9_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
+ vpx_wb_write_bit(wb, cm->refresh_frame_context);
+ vpx_wb_write_bit(wb, cm->frame_parallel_decoding_mode);
}
- vp9_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
+ vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
encode_loopfilter(&cm->lf, wb);
encode_quantization(cm, wb);
@@ -1155,9 +1155,9 @@
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = cpi->td.counts;
- vp9_writer header_bc;
+ vpx_writer header_bc;
- vp9_start_encode(&header_bc, data);
+ vpx_start_encode(&header_bc, data);
if (xd->lossless)
cm->tx_mode = ONLY_4X4;
@@ -1185,9 +1185,9 @@
const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
- vp9_write_bit(&header_bc, use_compound_pred);
+ vpx_write_bit(&header_bc, use_compound_pred);
if (use_compound_pred) {
- vp9_write_bit(&header_bc, use_hybrid_pred);
+ vpx_write_bit(&header_bc, use_hybrid_pred);
if (use_hybrid_pred)
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i],
@@ -1221,7 +1221,7 @@
&counts->mv);
}
- vp9_stop_encode(&header_bc);
+ vpx_stop_encode(&header_bc);
assert(header_bc.pos <= 0xffff);
return header_bc.pos;
@@ -1230,14 +1230,14 @@
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
uint8_t *data = dest;
size_t first_part_size, uncompressed_hdr_size;
- struct vp9_write_bit_buffer wb = {data, 0};
- struct vp9_write_bit_buffer saved_wb;
+ struct vpx_write_bit_buffer wb = {data, 0};
+ struct vpx_write_bit_buffer saved_wb;
write_uncompressed_header(cpi, &wb);
saved_wb = wb;
- vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size
+ vpx_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size
- uncompressed_hdr_size = vp9_wb_bytes_written(&wb);
+ uncompressed_hdr_size = vpx_wb_bytes_written(&wb);
data += uncompressed_hdr_size;
vp9_clear_system_state();
@@ -1245,7 +1245,7 @@
first_part_size = write_compressed_header(cpi, data);
data += first_part_size;
// TODO(jbb): Figure out what to do if first_part_size > 16 bits.
- vp9_wb_write_literal(&saved_wb, (int)first_part_size, 16);
+ vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16);
data += encode_tiles(cpi, data);
diff --git a/vp9/encoder/vp9_cost.c b/vp9/encoder/vp9_cost.c
index 1c3c3d2..e2fbb34 100644
--- a/vp9/encoder/vp9_cost.c
+++ b/vp9/encoder/vp9_cost.c
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "vp9/encoder/vp9_cost.h"
@@ -34,14 +35,14 @@
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6,
4, 3, 1, 1};
-static void cost(int *costs, vp9_tree tree, const vp9_prob *probs,
+static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
- const vp9_prob prob = probs[i / 2];
+ const vpx_prob prob = probs[i / 2];
int b;
for (b = 0; b <= 1; ++b) {
const int cc = c + vp9_cost_bit(prob, b);
- const vp9_tree_index ii = tree[i + b];
+ const vpx_tree_index ii = tree[i + b];
if (ii <= 0)
costs[-ii] = cc;
@@ -50,11 +51,11 @@
}
}
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree) {
+void vp9_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree) {
cost(costs, tree, probs, 0, 0);
}
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree) {
+void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree) {
assert(tree[0] <= 0 && tree[1] > 0);
costs[-tree[0]] = vp9_cost_bit(probs[0], 0);
diff --git a/vp9/encoder/vp9_cost.h b/vp9/encoder/vp9_cost.h
index 6d2b940..eac74c4 100644
--- a/vp9/encoder/vp9_cost.h
+++ b/vp9/encoder/vp9_cost.h
@@ -11,7 +11,7 @@
#ifndef VP9_ENCODER_VP9_COST_H_
#define VP9_ENCODER_VP9_COST_H_
-#include "vp9/common/vp9_prob.h"
+#include "vpx_dsp/prob.h"
#ifdef __cplusplus
extern "C" {
@@ -21,20 +21,20 @@
#define vp9_cost_zero(prob) (vp9_prob_cost[prob])
-#define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob))
+#define vp9_cost_one(prob) vp9_cost_zero(vpx_complement(prob))
-#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vp9_complement(prob) \
+#define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? vpx_complement(prob) \
: (prob))
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
- vp9_prob p) {
+ vpx_prob p) {
return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p);
}
-static INLINE int treed_cost(vp9_tree tree, const vp9_prob *probs,
+static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs,
int bits, int len) {
int cost = 0;
- vp9_tree_index i = 0;
+ vpx_tree_index i = 0;
do {
const int bit = (bits >> --len) & 1;
@@ -45,8 +45,8 @@
return cost;
}
-void vp9_cost_tokens(int *costs, const vp9_prob *probs, vp9_tree tree);
-void vp9_cost_tokens_skip(int *costs, const vp9_prob *probs, vp9_tree tree);
+void vp9_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree);
+void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 414d2bb..5dcbd1f 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -13,22 +13,16 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
-#include "vpx_ports/mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
+#include "vpx_ports/mem.h"
+#include "vpx_dsp/fwd_txfm.h"
-static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
- tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
- // TODO(debargha, peter.derivaz): Find new bounds for this assert
- // and make the bounds consts.
- // assert(INT16_MIN <= rv && rv <= INT16_MAX);
- return rv;
-}
-
-void vp9_fdct4(const tran_low_t *input, tran_low_t *output) {
+static void fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t step[4];
tran_high_t temp1, temp2;
@@ -47,157 +41,7 @@
output[3] = (tran_low_t)fdct_round_shift(temp2);
}
-void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
- int r, c;
- tran_low_t sum = 0;
- for (r = 0; r < 4; ++r)
- for (c = 0; c < 4; ++c)
- sum += input[r * stride + c];
-
- output[0] = sum << 1;
- output[1] = 0;
-}
-
-void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
- // The 2D transform is done with two passes which are actually pretty
- // similar. In the first one, we transform the columns and transpose
- // the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we transpose the columns (that
- // is the transposed rows) and transpose the results (so that it goes back
- // in normal/row positions).
- int pass;
- // We need an intermediate buffer between passes.
- tran_low_t intermediate[4 * 4];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
- tran_low_t *out = intermediate;
- // Do the two transform/transpose passes
- for (pass = 0; pass < 2; ++pass) {
- tran_high_t input[4]; // canbe16
- tran_high_t step[4]; // canbe16
- tran_high_t temp1, temp2; // needs32
- int i;
- for (i = 0; i < 4; ++i) {
- // Load inputs.
- if (0 == pass) {
- input[0] = in_pass0[0 * stride] * 16;
- input[1] = in_pass0[1 * stride] * 16;
- input[2] = in_pass0[2 * stride] * 16;
- input[3] = in_pass0[3 * stride] * 16;
- if (i == 0 && input[0]) {
- input[0] += 1;
- }
- } else {
- input[0] = in[0 * 4];
- input[1] = in[1 * 4];
- input[2] = in[2 * 4];
- input[3] = in[3 * 4];
- }
- // Transform.
- step[0] = input[0] + input[3];
- step[1] = input[1] + input[2];
- step[2] = input[1] - input[2];
- step[3] = input[0] - input[3];
- temp1 = (step[0] + step[1]) * cospi_16_64;
- temp2 = (step[0] - step[1]) * cospi_16_64;
- out[0] = (tran_low_t)fdct_round_shift(temp1);
- out[2] = (tran_low_t)fdct_round_shift(temp2);
- temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
- temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
- out[1] = (tran_low_t)fdct_round_shift(temp1);
- out[3] = (tran_low_t)fdct_round_shift(temp2);
- // Do next column (which is a transposed row in second/horizontal pass)
- in_pass0++;
- in++;
- out += 4;
- }
- // Setup in/out for next pass.
- in = intermediate;
- out = output;
- }
-
- {
- int i, j;
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
- }
- }
-}
-
-void vp9_fadst4(const tran_low_t *input, tran_low_t *output) {
- tran_high_t x0, x1, x2, x3;
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- x0 = input[0];
- x1 = input[1];
- x2 = input[2];
- x3 = input[3];
-
- if (!(x0 | x1 | x2 | x3)) {
- output[0] = output[1] = output[2] = output[3] = 0;
- return;
- }
-
- s0 = sinpi_1_9 * x0;
- s1 = sinpi_4_9 * x0;
- s2 = sinpi_2_9 * x1;
- s3 = sinpi_1_9 * x1;
- s4 = sinpi_3_9 * x2;
- s5 = sinpi_4_9 * x3;
- s6 = sinpi_2_9 * x3;
- s7 = x0 + x1 - x3;
-
- x0 = s0 + s2 + s5;
- x1 = sinpi_3_9 * s7;
- x2 = s1 - s3 + s6;
- x3 = s4;
-
- s0 = x0 + x3;
- s1 = x1;
- s2 = x2 - x3;
- s3 = x2 - x0 + x3;
-
- // 1-D transform scaling factor is sqrt(2).
- output[0] = (tran_low_t)fdct_round_shift(s0);
- output[1] = (tran_low_t)fdct_round_shift(s1);
- output[2] = (tran_low_t)fdct_round_shift(s2);
- output[3] = (tran_low_t)fdct_round_shift(s3);
-}
-
-void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- if (tx_type == DCT_DCT) {
- vp9_fdct4x4_c(input, output, stride);
- } else {
- tran_low_t out[4 * 4];
- int i, j;
- tran_low_t temp_in[4], temp_out[4];
- const transform_2d ht = FHT_4[tx_type];
-
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * stride + i] * 16;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- out[j * 4 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
- }
- }
-}
-
-void vp9_fdct8(const tran_low_t *input, tran_low_t *output) {
+static void fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
@@ -249,550 +93,7 @@
output[7] = (tran_low_t)fdct_round_shift(t3);
}
-void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
- int r, c;
- tran_low_t sum = 0;
- for (r = 0; r < 8; ++r)
- for (c = 0; c < 8; ++c)
- sum += input[r * stride + c];
-
- output[0] = sum;
- output[1] = 0;
-}
-
-void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
- int i, j;
- tran_low_t intermediate[64];
-
- // Transform columns
- {
- tran_low_t *output = intermediate;
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- int i;
- for (i = 0; i < 8; i++) {
- // stage 1
- s0 = (input[0 * stride] + input[7 * stride]) * 4;
- s1 = (input[1 * stride] + input[6 * stride]) * 4;
- s2 = (input[2 * stride] + input[5 * stride]) * 4;
- s3 = (input[3 * stride] + input[4 * stride]) * 4;
- s4 = (input[3 * stride] - input[4 * stride]) * 4;
- s5 = (input[2 * stride] - input[5 * stride]) * 4;
- s6 = (input[1 * stride] - input[6 * stride]) * 4;
- s7 = (input[0 * stride] - input[7 * stride]) * 4;
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
- input++;
- output++;
- }
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- vp9_fdct8(&intermediate[i * 8], &final_output[i * 8]);
- for (j = 0; j < 8; ++j)
- final_output[j + i * 8] /= 2;
- }
-}
-
-void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
- tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- int eob = -1;
-
- int i, j;
- tran_low_t intermediate[64];
-
- // Transform columns
- {
- tran_low_t *output = intermediate;
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- int i;
- for (i = 0; i < 8; i++) {
- // stage 1
- s0 = (input[0 * stride] + input[7 * stride]) * 4;
- s1 = (input[1 * stride] + input[6 * stride]) * 4;
- s2 = (input[2 * stride] + input[5 * stride]) * 4;
- s3 = (input[3 * stride] + input[4 * stride]) * 4;
- s4 = (input[3 * stride] - input[4 * stride]) * 4;
- s5 = (input[2 * stride] - input[5 * stride]) * 4;
- s6 = (input[1 * stride] - input[6 * stride]) * 4;
- s7 = (input[0 * stride] - input[7 * stride]) * 4;
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
- t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
- output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
- output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
- output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
- output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
- input++;
- output++;
- }
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- vp9_fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
- for (j = 0; j < 8; ++j)
- coeff_ptr[j + i * 8] /= 2;
- }
-
- // TODO(jingning) Decide the need of these arguments after the
- // quantization process is completed.
- (void)zbin_ptr;
- (void)quant_shift_ptr;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant_ptr[rc != 0]) >> 16;
-
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-
- if (tmp)
- eob = i;
- }
- }
- *eob_ptr = eob + 1;
-}
-
-void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
- int r, c;
- tran_low_t sum = 0;
- for (r = 0; r < 16; ++r)
- for (c = 0; c < 16; ++c)
- sum += input[r * stride + c];
-
- output[0] = sum >> 1;
- output[1] = 0;
-}
-
-void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
- // The 2D transform is done with two passes which are actually pretty
- // similar. In the first one, we transform the columns and transpose
- // the results. In the second one, we transform the rows. To achieve that,
- // as the first pass results are transposed, we transpose the columns (that
- // is the transposed rows) and transpose the results (so that it goes back
- // in normal/row positions).
- int pass;
- // We need an intermediate buffer between passes.
- tran_low_t intermediate[256];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
- tran_low_t *out = intermediate;
- // Do the two transform/transpose passes
- for (pass = 0; pass < 2; ++pass) {
- tran_high_t step1[8]; // canbe16
- tran_high_t step2[8]; // canbe16
- tran_high_t step3[8]; // canbe16
- tran_high_t input[8]; // canbe16
- tran_high_t temp1, temp2; // needs32
- int i;
- for (i = 0; i < 16; i++) {
- if (0 == pass) {
- // Calculate input for the first 8 results.
- input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
- input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
- input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
- input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
- input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
- input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
- input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) * 4;
- input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) * 4;
- // Calculate input for the next 8 results.
- step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) * 4;
- step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) * 4;
- step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
- step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
- step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
- step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
- step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
- step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
- } else {
- // Calculate input for the first 8 results.
- input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
- input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
- input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
- input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
- input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
- input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
- input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
- input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
- // Calculate input for the next 8 results.
- step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
- step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
- step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
- step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
- step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
- step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
- step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
- step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
- }
- // Work on the first eight values; fdct8(input, even_results);
- {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
- tran_high_t t0, t1, t2, t3; // needs32
- tran_high_t x0, x1, x2, x3; // canbe16
-
- // stage 1
- s0 = input[0] + input[7];
- s1 = input[1] + input[6];
- s2 = input[2] + input[5];
- s3 = input[3] + input[4];
- s4 = input[3] - input[4];
- s5 = input[2] - input[5];
- s6 = input[1] - input[6];
- s7 = input[0] - input[7];
-
- // fdct4(step, step);
- x0 = s0 + s3;
- x1 = s1 + s2;
- x2 = s1 - s2;
- x3 = s0 - s3;
- t0 = (x0 + x1) * cospi_16_64;
- t1 = (x0 - x1) * cospi_16_64;
- t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
- t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
- out[0] = (tran_low_t)fdct_round_shift(t0);
- out[4] = (tran_low_t)fdct_round_shift(t2);
- out[8] = (tran_low_t)fdct_round_shift(t1);
- out[12] = (tran_low_t)fdct_round_shift(t3);
-
- // Stage 2
- t0 = (s6 - s5) * cospi_16_64;
- t1 = (s6 + s5) * cospi_16_64;
- t2 = fdct_round_shift(t0);
- t3 = fdct_round_shift(t1);
-
- // Stage 3
- x0 = s4 + t2;
- x1 = s4 - t2;
- x2 = s7 - t3;
- x3 = s7 + t3;
-
- // Stage 4
- t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
- t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
- t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
- t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
- out[2] = (tran_low_t)fdct_round_shift(t0);
- out[6] = (tran_low_t)fdct_round_shift(t2);
- out[10] = (tran_low_t)fdct_round_shift(t1);
- out[14] = (tran_low_t)fdct_round_shift(t3);
- }
- // Work on the next eight values; step1 -> odd_results
- {
- // step 2
- temp1 = (step1[5] - step1[2]) * cospi_16_64;
- temp2 = (step1[4] - step1[3]) * cospi_16_64;
- step2[2] = fdct_round_shift(temp1);
- step2[3] = fdct_round_shift(temp2);
- temp1 = (step1[4] + step1[3]) * cospi_16_64;
- temp2 = (step1[5] + step1[2]) * cospi_16_64;
- step2[4] = fdct_round_shift(temp1);
- step2[5] = fdct_round_shift(temp2);
- // step 3
- step3[0] = step1[0] + step2[3];
- step3[1] = step1[1] + step2[2];
- step3[2] = step1[1] - step2[2];
- step3[3] = step1[0] - step2[3];
- step3[4] = step1[7] - step2[4];
- step3[5] = step1[6] - step2[5];
- step3[6] = step1[6] + step2[5];
- step3[7] = step1[7] + step2[4];
- // step 4
- temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
- temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
- step2[1] = fdct_round_shift(temp1);
- step2[2] = fdct_round_shift(temp2);
- temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
- temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
- step2[5] = fdct_round_shift(temp1);
- step2[6] = fdct_round_shift(temp2);
- // step 5
- step1[0] = step3[0] + step2[1];
- step1[1] = step3[0] - step2[1];
- step1[2] = step3[3] + step2[2];
- step1[3] = step3[3] - step2[2];
- step1[4] = step3[4] - step2[5];
- step1[5] = step3[4] + step2[5];
- step1[6] = step3[7] - step2[6];
- step1[7] = step3[7] + step2[6];
- // step 6
- temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
- temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
- out[1] = (tran_low_t)fdct_round_shift(temp1);
- out[9] = (tran_low_t)fdct_round_shift(temp2);
- temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
- temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
- out[5] = (tran_low_t)fdct_round_shift(temp1);
- out[13] = (tran_low_t)fdct_round_shift(temp2);
- temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
- temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
- out[3] = (tran_low_t)fdct_round_shift(temp1);
- out[11] = (tran_low_t)fdct_round_shift(temp2);
- temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
- temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
- out[7] = (tran_low_t)fdct_round_shift(temp1);
- out[15] = (tran_low_t)fdct_round_shift(temp2);
- }
- // Do next column (which is a transposed row in second/horizontal pass)
- in++;
- in_pass0++;
- out += 16;
- }
- // Setup in/out for next pass.
- in = intermediate;
- out = output;
- }
-}
-
-void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
- tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
-
- tran_high_t x0 = input[7];
- tran_high_t x1 = input[0];
- tran_high_t x2 = input[5];
- tran_high_t x3 = input[2];
- tran_high_t x4 = input[3];
- tran_high_t x5 = input[4];
- tran_high_t x6 = input[1];
- tran_high_t x7 = input[6];
-
- // stage 1
- s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
- s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
- s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
- s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
- s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
- s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
- s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
- s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
-
- x0 = fdct_round_shift(s0 + s4);
- x1 = fdct_round_shift(s1 + s5);
- x2 = fdct_round_shift(s2 + s6);
- x3 = fdct_round_shift(s3 + s7);
- x4 = fdct_round_shift(s0 - s4);
- x5 = fdct_round_shift(s1 - s5);
- x6 = fdct_round_shift(s2 - s6);
- x7 = fdct_round_shift(s3 - s7);
-
- // stage 2
- s0 = x0;
- s1 = x1;
- s2 = x2;
- s3 = x3;
- s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
- s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
- s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
- s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
-
- x0 = s0 + s2;
- x1 = s1 + s3;
- x2 = s0 - s2;
- x3 = s1 - s3;
- x4 = fdct_round_shift(s4 + s6);
- x5 = fdct_round_shift(s5 + s7);
- x6 = fdct_round_shift(s4 - s6);
- x7 = fdct_round_shift(s5 - s7);
-
- // stage 3
- s2 = cospi_16_64 * (x2 + x3);
- s3 = cospi_16_64 * (x2 - x3);
- s6 = cospi_16_64 * (x6 + x7);
- s7 = cospi_16_64 * (x6 - x7);
-
- x2 = fdct_round_shift(s2);
- x3 = fdct_round_shift(s3);
- x6 = fdct_round_shift(s6);
- x7 = fdct_round_shift(s7);
-
- output[0] = (tran_low_t)x0;
- output[1] = (tran_low_t)-x4;
- output[2] = (tran_low_t)x6;
- output[3] = (tran_low_t)-x2;
- output[4] = (tran_low_t)x3;
- output[5] = (tran_low_t)-x7;
- output[6] = (tran_low_t)x5;
- output[7] = (tran_low_t)-x1;
-}
-
-void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- if (tx_type == DCT_DCT) {
- vp9_fdct8x8_c(input, output, stride);
- } else {
- tran_low_t out[64];
- int i, j;
- tran_low_t temp_in[8], temp_out[8];
- const transform_2d ht = FHT_8[tx_type];
-
- // Columns
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- out[j * 8 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- }
-}
-
-/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
- pixel. */
-void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
- int i;
- tran_high_t a1, b1, c1, d1, e1;
- const int16_t *ip_pass0 = input;
- const tran_low_t *ip = NULL;
- tran_low_t *op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip_pass0[0 * stride];
- b1 = ip_pass0[1 * stride];
- c1 = ip_pass0[2 * stride];
- d1 = ip_pass0[3 * stride];
-
- a1 += b1;
- d1 = d1 - c1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= c1;
- d1 += b1;
- op[0] = (tran_low_t)a1;
- op[4] = (tran_low_t)c1;
- op[8] = (tran_low_t)d1;
- op[12] = (tran_low_t)b1;
-
- ip_pass0++;
- op++;
- }
- ip = output;
- op = output;
-
- for (i = 0; i < 4; i++) {
- a1 = ip[0];
- b1 = ip[1];
- c1 = ip[2];
- d1 = ip[3];
-
- a1 += b1;
- d1 -= c1;
- e1 = (a1 - d1) >> 1;
- b1 = e1 - b1;
- c1 = e1 - c1;
- a1 -= c1;
- d1 += b1;
- op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
- op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
- op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
- op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
-
- ip += 4;
- op += 4;
- }
-}
-
-// Rewrote to use same algorithm as others.
-void vp9_fdct16(const tran_low_t in[16], tran_low_t out[16]) {
+static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
@@ -933,7 +234,118 @@
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
-void vp9_fadst16(const tran_low_t *input, tran_low_t *output) {
+static void fadst4(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t x0, x1, x2, x3;
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
+
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_4_9 * x0;
+ s2 = sinpi_2_9 * x1;
+ s3 = sinpi_1_9 * x1;
+ s4 = sinpi_3_9 * x2;
+ s5 = sinpi_4_9 * x3;
+ s6 = sinpi_2_9 * x3;
+ s7 = x0 + x1 - x3;
+
+ x0 = s0 + s2 + s5;
+ x1 = sinpi_3_9 * s7;
+ x2 = s1 - s3 + s6;
+ x3 = s4;
+
+ s0 = x0 + x3;
+ s1 = x1;
+ s2 = x2 - x3;
+ s3 = x2 - x0 + x3;
+
+ // 1-D transform scaling factor is sqrt(2).
+ output[0] = (tran_low_t)fdct_round_shift(s0);
+ output[1] = (tran_low_t)fdct_round_shift(s1);
+ output[2] = (tran_low_t)fdct_round_shift(s2);
+ output[3] = (tran_low_t)fdct_round_shift(s3);
+}
+
+static void fadst8(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_high_t x0 = input[7];
+ tran_high_t x1 = input[0];
+ tran_high_t x2 = input[5];
+ tran_high_t x3 = input[2];
+ tran_high_t x4 = input[3];
+ tran_high_t x5 = input[4];
+ tran_high_t x6 = input[1];
+ tran_high_t x7 = input[6];
+
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+ x0 = fdct_round_shift(s0 + s4);
+ x1 = fdct_round_shift(s1 + s5);
+ x2 = fdct_round_shift(s2 + s6);
+ x3 = fdct_round_shift(s3 + s7);
+ x4 = fdct_round_shift(s0 - s4);
+ x5 = fdct_round_shift(s1 - s5);
+ x6 = fdct_round_shift(s2 - s6);
+ x7 = fdct_round_shift(s3 - s7);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = fdct_round_shift(s4 + s6);
+ x5 = fdct_round_shift(s5 + s7);
+ x6 = fdct_round_shift(s4 - s6);
+ x7 = fdct_round_shift(s5 - s7);
+
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
+
+ x2 = fdct_round_shift(s2);
+ x3 = fdct_round_shift(s3);
+ x6 = fdct_round_shift(s6);
+ x7 = fdct_round_shift(s7);
+
+ output[0] = (tran_low_t)x0;
+ output[1] = (tran_low_t)-x4;
+ output[2] = (tran_low_t)x6;
+ output[3] = (tran_low_t)-x2;
+ output[4] = (tran_low_t)x3;
+ output[5] = (tran_low_t)-x7;
+ output[6] = (tran_low_t)x5;
+ output[7] = (tran_low_t)-x1;
+}
+
+static void fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
@@ -1096,6 +508,290 @@
output[15] = (tran_low_t)-x1;
}
+static const transform_2d FHT_4[] = {
+ { fdct4, fdct4 }, // DCT_DCT = 0
+ { fadst4, fdct4 }, // ADST_DCT = 1
+ { fdct4, fadst4 }, // DCT_ADST = 2
+ { fadst4, fadst4 } // ADST_ADST = 3
+};
+
+static const transform_2d FHT_8[] = {
+ { fdct8, fdct8 }, // DCT_DCT = 0
+ { fadst8, fdct8 }, // ADST_DCT = 1
+ { fdct8, fadst8 }, // DCT_ADST = 2
+ { fadst8, fadst8 } // ADST_ADST = 3
+};
+
+static const transform_2d FHT_16[] = {
+ { fdct16, fdct16 }, // DCT_DCT = 0
+ { fadst16, fdct16 }, // ADST_DCT = 1
+ { fdct16, fadst16 }, // DCT_ADST = 2
+ { fadst16, fadst16 } // ADST_ADST = 3
+};
+
+void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 4; ++r)
+ for (c = 0; c < 4; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum << 1;
+ output[1] = 0;
+}
+
+void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct4x4_c(input, output, stride);
+ } else {
+ tran_low_t out[4 * 4];
+ int i, j;
+ tran_low_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * stride + i] * 16;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ out[j * 4 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
+ }
+}
+
+void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 8; ++r)
+ for (c = 0; c < 8; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum;
+ output[1] = 0;
+}
+
+void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int eob = -1;
+
+ int i, j;
+ tran_low_t intermediate[64];
+
+ // Transform columns
+ {
+ tran_low_t *output = intermediate;
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ s0 = (input[0 * stride] + input[7 * stride]) * 4;
+ s1 = (input[1 * stride] + input[6 * stride]) * 4;
+ s2 = (input[2 * stride] + input[5 * stride]) * 4;
+ s3 = (input[3 * stride] + input[4 * stride]) * 4;
+ s4 = (input[3 * stride] - input[4 * stride]) * 4;
+ s5 = (input[2 * stride] - input[5 * stride]) * 4;
+ s6 = (input[1 * stride] - input[6 * stride]) * 4;
+ s7 = (input[0 * stride] - input[7 * stride]) * 4;
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
+ output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
+ output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
+ output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
+ output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
+ output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
+ output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
+ input++;
+ output++;
+ }
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
+ for (j = 0; j < 8; ++j)
+ coeff_ptr[j + i * 8] /= 2;
+ }
+
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> 16;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 16; ++r)
+ for (c = 0; c < 16; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum >> 1;
+ output[1] = 0;
+}
+
+void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct8x8_c(input, output, stride);
+ } else {
+ tran_low_t out[64];
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ out[j * 8 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ }
+ }
+}
+
+/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
+ pixel. */
+void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+ int i;
+ tran_high_t a1, b1, c1, d1, e1;
+ const int16_t *ip_pass0 = input;
+ const tran_low_t *ip = NULL;
+ tran_low_t *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip_pass0[0 * stride];
+ b1 = ip_pass0[1 * stride];
+ c1 = ip_pass0[2 * stride];
+ d1 = ip_pass0[3 * stride];
+
+ a1 += b1;
+ d1 = d1 - c1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= c1;
+ d1 += b1;
+ op[0] = (tran_low_t)a1;
+ op[4] = (tran_low_t)c1;
+ op[8] = (tran_low_t)d1;
+ op[12] = (tran_low_t)b1;
+
+ ip_pass0++;
+ op++;
+ }
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0];
+ b1 = ip[1];
+ c1 = ip[2];
+ d1 = ip[3];
+
+ a1 += b1;
+ d1 -= c1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= c1;
+ d1 += b1;
+ op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
+ op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
+ op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
+ op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
+
+ ip += 4;
+ op += 4;
+ }
+}
+
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
if (tx_type == DCT_DCT) {
@@ -1531,11 +1227,6 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
- int stride) {
- vp9_fdct4x4_c(input, output, stride);
-}
-
void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp9_fht4x4_c(input, output, stride, tx_type);
@@ -1546,21 +1237,11 @@
vp9_fdct8x8_1_c(input, final_output, stride);
}
-void vp9_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
- int stride) {
- vp9_fdct8x8_c(input, final_output, stride);
-}
-
void vp9_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
int stride) {
vp9_fdct16x16_1_c(input, output, stride);
}
-void vp9_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
- int stride) {
- vp9_fdct16x16_c(input, output, stride);
-}
-
void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp9_fht8x8_c(input, output, stride, tx_type);
diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h
index 49afcbb..6ce7e96 100644
--- a/vp9/encoder/vp9_dct.h
+++ b/vp9/encoder/vp9_dct.h
@@ -11,49 +11,12 @@
#ifndef VP9_ENCODER_VP9_DCT_H_
#define VP9_ENCODER_VP9_DCT_H_
-#include "vp9/common/vp9_idct.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride);
-void vp9_highbd_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride);
-void vp9_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
- int stride);
-void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride);
-void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
- int stride);
-
-void vp9_fdct4(const tran_low_t *input, tran_low_t *output);
-void vp9_fadst4(const tran_low_t *input, tran_low_t *output);
-void vp9_fdct8(const tran_low_t *input, tran_low_t *output);
-void vp9_fadst8(const tran_low_t *input, tran_low_t *output);
-void vp9_fdct16(const tran_low_t in[16], tran_low_t out[16]);
-void vp9_fadst16(const tran_low_t *input, tran_low_t *output);
void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round);
-static const transform_2d FHT_4[] = {
- { vp9_fdct4, vp9_fdct4 }, // DCT_DCT = 0
- { vp9_fadst4, vp9_fdct4 }, // ADST_DCT = 1
- { vp9_fdct4, vp9_fadst4 }, // DCT_ADST = 2
- { vp9_fadst4, vp9_fadst4 } // ADST_ADST = 3
-};
-
-static const transform_2d FHT_8[] = {
- { vp9_fdct8, vp9_fdct8 }, // DCT_DCT = 0
- { vp9_fadst8, vp9_fdct8 }, // ADST_DCT = 1
- { vp9_fdct8, vp9_fadst8 }, // DCT_ADST = 2
- { vp9_fadst8, vp9_fadst8 } // ADST_ADST = 3
-};
-
-static const transform_2d FHT_16[] = {
- { vp9_fdct16, vp9_fdct16 }, // DCT_DCT = 0
- { vp9_fadst16, vp9_fdct16 }, // ADST_DCT = 1
- { vp9_fdct16, vp9_fadst16 }, // DCT_ADST = 2
- { vp9_fadst16, vp9_fadst16 } // ADST_ADST = 3
-};
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 659ce72..9c3c510 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -23,7 +24,6 @@
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemb.h"
-#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_tokenize.h"
@@ -795,7 +795,7 @@
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
+ vp9_predict_intra_block(xd, bwl, TX_32X32, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
@@ -816,14 +816,17 @@
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_16X16][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
+ vp9_predict_intra_block(xd, bwl, TX_16X16, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vpx_highbd_subtract_block(16, 16, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
- vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ if (tx_type == DCT_DCT)
+ vp9_highbd_fdct16x16(src_diff, coeff, diff_stride);
+ else
+ vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
@@ -838,14 +841,17 @@
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_8X8][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
+ vp9_predict_intra_block(xd, bwl, TX_8X8, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
if (!x->skip_recode) {
vpx_highbd_subtract_block(8, 8, src_diff, diff_stride,
src, src_stride, dst, dst_stride, xd->bd);
- vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ if (tx_type == DCT_DCT)
+ vp9_highbd_fdct8x8(src_diff, coeff, diff_stride);
+ else
+ vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob,
@@ -860,7 +866,7 @@
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
+ vp9_predict_intra_block(xd, bwl, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
@@ -903,7 +909,7 @@
case TX_32X32:
scan_order = &vp9_default_scan_orders[TX_32X32];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
+ vp9_predict_intra_block(xd, bwl, TX_32X32, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
@@ -923,7 +929,7 @@
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_16X16][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
+ vp9_predict_intra_block(xd, bwl, TX_16X16, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
@@ -943,7 +949,7 @@
tx_type = get_tx_type(pd->plane_type, xd);
scan_order = &vp9_scan_orders[TX_8X8][tx_type];
mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
+ vp9_predict_intra_block(xd, bwl, TX_8X8, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
@@ -963,7 +969,7 @@
tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
- vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
+ vp9_predict_intra_block(xd, bwl, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
dst, dst_stride, i, j, plane);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 10180f2..a1d77db 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -29,7 +29,7 @@
vp9_tokens_from_tree(mv_fp_encodings, vp9_mv_fp_tree);
}
-static void encode_mv_component(vp9_writer* w, int comp,
+static void encode_mv_component(vpx_writer* w, int comp,
const nmv_component* mvcomp, int usehp) {
int offset;
const int sign = comp < 0;
@@ -42,7 +42,7 @@
assert(comp != 0);
// Sign
- vp9_write(w, sign, mvcomp->sign);
+ vpx_write(w, sign, mvcomp->sign);
// Class
vp9_write_token(w, vp9_mv_class_tree, mvcomp->classes,
@@ -56,7 +56,7 @@
int i;
const int n = mv_class + CLASS0_BITS - 1; // number of bits
for (i = 0; i < n; ++i)
- vp9_write(w, (d >> i) & 1, mvcomp->bits[i]);
+ vpx_write(w, (d >> i) & 1, mvcomp->bits[i]);
}
// Fractional bits
@@ -66,7 +66,7 @@
// High precision bit
if (usehp)
- vp9_write(w, hp,
+ vpx_write(w, hp,
mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
}
@@ -133,23 +133,23 @@
}
}
-static int update_mv(vp9_writer *w, const unsigned int ct[2], vp9_prob *cur_p,
- vp9_prob upd_p) {
- const vp9_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
+static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
+ vpx_prob upd_p) {
+ const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1;
const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) >
cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + 7 * 256;
- vp9_write(w, update, upd_p);
+ vpx_write(w, update, upd_p);
if (update) {
*cur_p = new_p;
- vp9_write_literal(w, new_p >> 1, 7);
+ vpx_write_literal(w, new_p >> 1, 7);
}
return update;
}
-static void write_mv_update(const vp9_tree_index *tree,
- vp9_prob probs[/*n - 1*/],
+static void write_mv_update(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
const unsigned int counts[/*n - 1*/],
- int n, vp9_writer *w) {
+ int n, vpx_writer *w) {
int i;
unsigned int branch_ct[32][2];
@@ -161,7 +161,7 @@
update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB);
}
-void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w,
+void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
nmv_context_counts *const counts) {
int i, j;
nmv_context *const mvc = &cm->fc->nmvc;
@@ -199,7 +199,7 @@
}
}
-void vp9_encode_mv(VP9_COMP* cpi, vp9_writer* w,
+void vp9_encode_mv(VP9_COMP* cpi, vpx_writer* w,
const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp) {
const MV diff = {mv->row - ref->row,
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index e8ee5ab..5fb114c 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -20,10 +20,10 @@
void vp9_entropy_mv_init(void);
-void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w,
+void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
nmv_context_counts *const counts);
-void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
+void vp9_encode_mv(VP9_COMP *cpi, vpx_writer* w, const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6c595b7..781204d 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -19,6 +19,7 @@
#include "vpx/internal/vpx_psnr.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
+#include "vpx_scale/vpx_scale.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_filter.h"
@@ -2627,9 +2628,10 @@
const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
int force_recode = 0;
- if ((cpi->sf.recode_loop == ALLOW_RECODE) ||
+ if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
+ (cpi->sf.recode_loop == ALLOW_RECODE) ||
(frame_is_kfgfarf &&
- (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
+ (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
if (frame_is_kfgfarf &&
(oxcf->resize_mode == RESIZE_DYNAMIC) &&
scale_down(cpi, q)) {
@@ -3059,17 +3061,17 @@
oxcf->rc_mode == VPX_CBR &&
!cpi->use_svc &&
oxcf->resize_mode == RESIZE_DYNAMIC) {
- if (cpi->resize_state == 1) {
+ if (cpi->resize_pending == 1) {
oxcf->scaled_frame_width =
(cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
oxcf->scaled_frame_height =
(cm->height * cpi->resize_scale_num) /cpi->resize_scale_den;
- } else if (cpi->resize_state == -1) {
+ } else if (cpi->resize_pending == -1) {
// Go back up to original size.
oxcf->scaled_frame_width = oxcf->width;
oxcf->scaled_frame_height = oxcf->height;
}
- if (cpi->resize_state != 0) {
+ if (cpi->resize_pending != 0) {
// There has been a change in frame size.
vp9_set_size_literal(cpi,
oxcf->scaled_frame_width,
@@ -3140,12 +3142,27 @@
set_frame_size(cpi);
- cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
-
- if (cpi->unscaled_last_source != NULL)
- cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
+ // For 1 pass CBR under dynamic resize mode: use faster scaling for source.
+ // Only for 2x2 scaling for now.
+ if (cpi->oxcf.pass == 0 &&
+ cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
+ cpi->un_scaled_source->y_width == (cm->width << 1) &&
+ cpi->un_scaled_source->y_height == (cm->height << 1)) {
+ cpi->Source = vp9_scale_if_required_fast(cm,
+ cpi->un_scaled_source,
+ &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp9_scale_if_required_fast(cm,
+ cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+ } else {
+ cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+ &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+ }
if (frame_is_intra_only(cm) == 0) {
vp9_scale_references(cpi);
@@ -3491,6 +3508,21 @@
}
}
+YV12_BUFFER_CONFIG *vp9_scale_if_required_fast(VP9_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled) {
+ if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
+ cm->mi_rows * MI_SIZE != unscaled->y_height) {
+ // For 2x2 scaling down.
+ vpx_scale_frame(unscaled, scaled, unscaled->y_buffer, 9, 2, 1,
+ 2, 1, 0);
+ vp9_extend_frame_borders(scaled);
+ return scaled;
+ } else {
+ return unscaled;
+ }
+}
+
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled) {
@@ -4275,7 +4307,7 @@
#if CONFIG_INTERNAL_STATS
if (oxcf->pass != 1) {
- double samples;
+ double samples = 0.0;
cpi->bytes += (int)(*size);
if (cm->show_frame) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index f095cad..b76b6b7 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -55,7 +55,7 @@
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
- vp9_prob segment_pred_probs[PREDICTION_PROBS];
+ vpx_prob segment_pred_probs[PREDICTION_PROBS];
unsigned char *last_frame_seg_map_copy;
@@ -614,6 +614,10 @@
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
+YV12_BUFFER_CONFIG *vp9_scale_if_required_fast(VP9_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled);
+
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 61279f8..5caf2cb 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1883,7 +1883,7 @@
double gf_group_error_left;
int gf_arf_bits;
const int is_key_frame = frame_is_intra_only(cm);
- const int kf_or_arf_active = is_key_frame || rc->source_alt_ref_active;
+ const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
@@ -1903,7 +1903,7 @@
// If this is a key frame or the overlay from a previous arf then
// the error score / cost of this frame has already been accounted for.
- if (is_key_frame || rc->source_alt_ref_active) {
+ if (arf_active_or_kf) {
gf_group_err -= gf_first_frame_err;
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
@@ -1936,7 +1936,7 @@
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
- active_max_gf_interval = rc->max_gf_interval - 4 + MIN(4, (int_lbq / 6));
+ active_max_gf_interval = 12 + MIN(4, (int_lbq / 6));
if (active_max_gf_interval < active_min_gf_interval)
active_max_gf_interval = active_min_gf_interval;
@@ -2001,11 +2001,11 @@
// Break out conditions.
if (
// Break at active_max_gf_interval unless almost totally static.
- ((i >= active_max_gf_interval + kf_or_arf_active) &&
- (zero_motion_accumulator < 0.995)) ||
+ (i >= (active_max_gf_interval + arf_active_or_kf) &&
+ zero_motion_accumulator < 0.995) ||
(
// Don't break out with a very short interval.
- (i >= active_min_gf_interval + kf_or_arf_active) &&
+ (i >= active_min_gf_interval + arf_active_or_kf) &&
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > 3.0) ||
@@ -2043,10 +2043,7 @@
}
// Set the interval until the next gf.
- if (is_key_frame || rc->source_alt_ref_pending)
- rc->baseline_gf_interval = i - 1;
- else
- rc->baseline_gf_interval = i;
+ rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
// Only encode alt reference frame in temporal base layer. So
// baseline_gf_interval should be multiple of a temporal layer group
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index d5eeb9c..acbd7dd 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -145,7 +145,7 @@
unsigned int err;
xd->mi[0]->mbmi.mode = mode;
- vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,
+ vp9_predict_intra_block(xd, 2, TX_16X16, mode,
x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride,
0, 0, 0);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index e99cbc7..6d09dbe 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -902,8 +902,7 @@
p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
// Use source buffer as an approximation for the fully reconstructed buffer.
- vp9_predict_intra_block(xd, block >> (2 * tx_size),
- b_width_log2_lookup[plane_bsize],
+ vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize],
tx_size, args->mode,
x->skip_encode ? p->src.buf : pd->dst.buf,
x->skip_encode ? src_stride : dst_stride,
@@ -1020,9 +1019,9 @@
static void init_ref_frame_cost(VP9_COMMON *const cm,
MACROBLOCKD *const xd,
int ref_frame_cost[MAX_REF_FRAMES]) {
- vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
- vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
- vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
+ vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
+ vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
+ vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 32c1f76..d53d95d 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -9,7 +9,7 @@
*/
#include <math.h>
-
+#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -20,113 +20,6 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rd.h"
-void vp9_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 16;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
- if (tmp)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + round_ptr[0];
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
- if (abs_qcoeff)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
-void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- const int rc = 0;
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp, eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
- INT16_MIN, INT16_MAX);
- tmp = (tmp * quant) >> 15;
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
- if (tmp)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
- int skip_block,
- const int16_t *round_ptr,
- const int16_t quant,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr) {
- const int n_coeffs = 1024;
- int eob = -1;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- const int coeff = coeff_ptr[0];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
- const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
- qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
- if (abs_qcoeff)
- eob = 0;
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -298,224 +191,6 @@
}
#endif
-void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff >= zbins[rc != 0]) {
- int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
- tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
- quant_shift_ptr[rc != 0]) >> 16; // quantization
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-
- if (tmp)
- eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *zbin_ptr,
- const int16_t *round_ptr, const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
- int i, non_zero_count = (int)n_coeffs, eob = -1;
- const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = (int)n_coeffs - 1; i >= 0; i--) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
- non_zero_count--;
- else
- break;
- }
-
- // Quantization pass: All coefficients with index >= zero_flag are
- // skippable. Note: zero_flag can be zero.
- for (i = 0; i < non_zero_count; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
- if (abs_coeff >= zbins[rc != 0]) {
- const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
- if (abs_qcoeff)
- eob = i;
- }
- }
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
-void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block,
- const int16_t *zbin_ptr, const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- int tmp;
- int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
- tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
- quant_shift_ptr[rc != 0]) >> 15;
-
- qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-
- if (tmp)
- eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
- intptr_t n_coeffs, int skip_block,
- const int16_t *zbin_ptr,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
- const int16_t *quant_shift_ptr,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr,
- uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
- const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
- ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
- const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
-
- int idx = 0;
- int idx_arr[1024];
- int i, eob = -1;
- (void)iscan;
-
- memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
- memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
- if (!skip_block) {
- // Pre-scan pass
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int coeff = coeff_ptr[rc];
-
- // If the coefficient is out of the base ZBIN range, keep it for
- // quantization.
- if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
- idx_arr[idx++] = i;
- }
-
- // Quantization pass: only process the coefficients selected in
- // pre-scan pass. Note: idx can be zero.
- for (i = 0; i < idx; i++) {
- const int rc = scan[idx_arr[i]];
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = (coeff >> 31);
- const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- const int64_t tmp1 = abs_coeff
- + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
- const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
- qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
- dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
- if (abs_qcoeff)
- eob = idx_arr[i];
- }
- }
- *eob_ptr = eob + 1;
-}
-#endif
-
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
MACROBLOCKD *const xd = &x->e_mbd;
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 55e5469..6132036 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -37,34 +37,9 @@
DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
} QUANTS;
-void vp9_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
- int n_coeffs, int skip_block,
- const int16_t *round_ptr, const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr);
-void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
- int skip_block,
- const int16_t *round_ptr,
- const int16_t quant_ptr,
- tran_low_t *qcoeff_ptr,
- tran_low_t *dqcoeff_ptr,
- const int16_t dequant_ptr,
- uint16_t *eob_ptr);
-#endif
-
struct VP9_COMP;
struct VP9Common;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index be09bca..2be2a64 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -281,11 +281,14 @@
// Assume we do not need any constraint lower than 4K 20 fps
static const double factor_safe = 3840 * 2160 * 20.0;
const double factor = width * height * framerate;
+ const double default_interval =
+ MIN(MAX_GF_INTERVAL, MAX(MIN_GF_INTERVAL, (int)(framerate * 0.125)));
if (factor <= factor_safe)
- return MIN_GF_INTERVAL;
+ return (int)default_interval;
else
- return (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5);
+ return (int)MAX(default_interval,
+ (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
// Note this logic makes:
// 4K24: 5
// 4K30: 6
@@ -294,6 +297,7 @@
int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
int interval = MIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
+ interval += (interval & 0x01); // Round to even value
return MAX(interval, min_gf_interval);
}
@@ -1351,9 +1355,11 @@
}
// Trigger the resizing of the next frame if it is scaled.
- cpi->resize_pending =
- rc->next_frame_size_selector != rc->frame_size_selector;
- rc->frame_size_selector = rc->next_frame_size_selector;
+ if (oxcf->pass != 0) {
+ cpi->resize_pending =
+ rc->next_frame_size_selector != rc->frame_size_selector;
+ rc->frame_size_selector = rc->next_frame_size_selector;
+ }
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1628,9 +1634,9 @@
vp9_rc_set_frame_target(cpi, target);
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
- cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
+ cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
else
- cpi->resize_state = 0;
+ cpi->resize_pending = 0;
}
int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1693,7 +1699,6 @@
if (rc->max_gf_interval == 0)
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
- rc->max_gf_interval += (rc->max_gf_interval & 0x01);
// Extended interval for genuinely static scenes
rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
@@ -1805,12 +1810,12 @@
cpi->resize_count = 0;
return 0;
}
- // Resize based on average QP over some window.
+ // Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key.
if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
const int window = (int)(5 * cpi->framerate);
cpi->resize_avg_qp += cm->base_qindex;
- if (cpi->rc.buffer_level < 0)
+ if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
++cpi->resize_buffer_underflow;
++cpi->resize_count;
// Check for resize action every "window" frames.
@@ -1821,11 +1826,13 @@
// Resize back up if average QP is low, and we are currently in a resized
// down state.
if (cpi->resize_state == 0 &&
- cpi->resize_buffer_underflow > (cpi->resize_count >> 3)) {
+ cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
resize_now = 1;
+ cpi->resize_state = 1;
} else if (cpi->resize_state == 1 &&
avg_qp < 40 * cpi->rc.worst_quality / 100) {
resize_now = -1;
+ cpi->resize_state = 0;
}
// Reset for next window measurement.
cpi->resize_avg_qp = 0;
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index bc7cb34..3f6de42 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -93,7 +93,7 @@
for (j = 0; j < REF_TYPES; ++j)
for (k = 0; k < COEF_BANDS; ++k)
for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- vp9_prob probs[ENTROPY_NODES];
+ vpx_prob probs[ENTROPY_NODES];
vp9_model_to_full_probs(p[t][i][j][k][l], probs);
vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
vp9_coef_tree);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 9985f89..8ae6783 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -641,7 +641,7 @@
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
+ vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
@@ -654,7 +654,7 @@
int64_t best_rd = INT64_MAX;
TX_SIZE best_tx = max_tx_size;
- const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
@@ -760,7 +760,8 @@
return 0;
}
-static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
+static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
+ int row, int col,
PREDICTION_MODE *best_mode,
const int *bmode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
@@ -770,18 +771,14 @@
PREDICTION_MODE mode;
MACROBLOCKD *const xd = &x->e_mbd;
int64_t best_rd = rd_thresh;
-
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
- const uint8_t *src_init = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
- src_stride)];
- uint8_t *dst_init = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
- dst_stride)];
+ const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
+ uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
-
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
@@ -790,8 +787,6 @@
uint16_t best_dst16[8 * 8];
#endif
- assert(ib < 4);
-
memcpy(ta, a, sizeof(ta));
memcpy(tl, l, sizeof(tl));
xd->mi[0]->mbmi.tx_size = TX_4X4;
@@ -819,7 +814,7 @@
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- const int block = ib + idy * 2 + idx;
+ const int block = (row + idy) * 2 + (col + idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8,
@@ -827,11 +822,11 @@
p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
- vp9_predict_intra_block(xd, block, 1,
- TX_4X4, mode,
+ vp9_predict_intra_block(xd, 1, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
- dst, dst_stride, idx, idy, 0);
+ dst, dst_stride,
+ col + idx, row + idy, 0);
vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
dst, dst_stride, xd->bd);
if (xd->lossless) {
@@ -850,7 +845,10 @@
int64_t unused;
const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
- vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
+ if (tx_type == DCT_DCT)
+ vp9_highbd_fdct4x4(src_diff, coeff, 8);
+ else
+ vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors,
@@ -920,18 +918,17 @@
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
- const int block = ib + idy * 2 + idx;
+ const int block = (row + idy) * 2 + (col + idx);
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff =
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0]->bmi[block].as_mode = mode;
- vp9_predict_intra_block(xd, block, 1,
- TX_4X4, mode,
+ vp9_predict_intra_block(xd, 1, TX_4X4, mode,
x->skip_encode ? src : dst,
x->skip_encode ? src_stride : dst_stride,
- dst, dst_stride, idx, idy, 0);
+ dst, dst_stride, col + idx, row + idy, 0);
vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
if (xd->lossless) {
@@ -1030,9 +1027,9 @@
bmode_costs = cpi->y_mode_costs[A][L];
}
- this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
- t_above + idx, t_left + idy, &r, &ry, &d,
- bsize, best_rd - total_rd);
+ this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
+ bmode_costs, t_above + idx, t_left + idy,
+ &r, &ry, &d, bsize, best_rd - total_rd);
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
@@ -2121,7 +2118,7 @@
int segment_id,
unsigned int *ref_costs_single,
unsigned int *ref_costs_comp,
- vp9_prob *comp_mode_p) {
+ vpx_prob *comp_mode_p) {
int seg_ref_active = segfeature_active(&cm->seg, segment_id,
SEG_LVL_REF_FRAME);
if (seg_ref_active) {
@@ -2129,8 +2126,8 @@
memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
*comp_mode_p = 128;
} else {
- vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
- vp9_prob comp_inter_p = 128;
+ vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
+ vpx_prob comp_inter_p = 128;
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
@@ -2142,8 +2139,8 @@
ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
if (cm->reference_mode != COMPOUND_REFERENCE) {
- vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
- vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
+ vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
+ vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
@@ -2162,7 +2159,7 @@
ref_costs_single[ALTREF_FRAME] = 512;
}
if (cm->reference_mode != SINGLE_REFERENCE) {
- vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
+ vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
if (cm->reference_mode == REFERENCE_MODE_SELECT)
@@ -3006,7 +3003,7 @@
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
- vp9_prob comp_mode_p;
+ vpx_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
unsigned int best_pred_sse = UINT_MAX;
PREDICTION_MODE best_intra_mode = DC_PRED;
@@ -3699,7 +3696,7 @@
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
- vp9_prob comp_mode_p;
+ vpx_prob comp_mode_p;
INTERP_FILTER best_filter = SWITCHABLE;
int64_t this_rd = INT64_MAX;
int rate2 = 0;
@@ -3815,7 +3812,7 @@
MB_MODE_INFO best_mbmode;
int ref_index, best_ref_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
- vp9_prob comp_mode_p;
+ vpx_prob comp_mode_p;
INTERP_FILTER tmp_best_filter = SWITCHABLE;
int rate_uv_intra, rate_uv_tokenonly;
int64_t dist_uv;
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 1f0d4df..c5c50a2 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -49,7 +49,7 @@
}
// Based on set of segment counts calculate a probability tree
-static void calc_segtree_probs(int *segcounts, vp9_prob *segment_tree_probs) {
+static void calc_segtree_probs(int *segcounts, vpx_prob *segment_tree_probs) {
// Work out probabilities of each segment
const int c01 = segcounts[0] + segcounts[1];
const int c23 = segcounts[2] + segcounts[3];
@@ -66,7 +66,7 @@
}
// Based on set of segment counts and probabilities calculate a cost estimate
-static int cost_segmap(int *segcounts, vp9_prob *probs) {
+static int cost_segmap(int *segcounts, vpx_prob *probs) {
const int c01 = segcounts[0] + segcounts[1];
const int c23 = segcounts[2] + segcounts[3];
const int c45 = segcounts[4] + segcounts[5];
@@ -207,9 +207,9 @@
int no_pred_segcounts[MAX_SEGMENTS] = { 0 };
int t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
- vp9_prob no_pred_tree[SEG_TREE_PROBS];
- vp9_prob t_pred_tree[SEG_TREE_PROBS];
- vp9_prob t_nopred_prob[PREDICTION_PROBS];
+ vpx_prob no_pred_tree[SEG_TREE_PROBS];
+ vpx_prob t_pred_tree[SEG_TREE_PROBS];
+ vpx_prob t_nopred_prob[PREDICTION_PROBS];
// Set default state for the segment tree probabilities and the
// temporal coding probabilities
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index e544f9b..bf06fd0 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -118,10 +118,11 @@
if (speed >= 1) {
if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
vp9_internal_image_edge(cpi)) {
- sf->use_square_partition_only = frame_is_boosted(cpi);
+ sf->use_square_partition_only = !frame_is_boosted(cpi);
} else {
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
+
sf->less_rectangular_check = 1;
sf->use_rd_breakout = 1;
diff --git a/vp9/encoder/vp9_subexp.c b/vp9/encoder/vp9_subexp.c
index b345b16..799f179 100644
--- a/vp9/encoder/vp9_subexp.c
+++ b/vp9/encoder/vp9_subexp.c
@@ -7,13 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vpx_dsp/bitwriter.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
-
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_subexp.h"
-#include "vp9/encoder/vp9_writer.h"
#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
@@ -79,50 +78,50 @@
return i;
}
-static int prob_diff_update_cost(vp9_prob newp, vp9_prob oldp) {
+static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) {
int delp = remap_prob(newp, oldp);
return update_bits[delp] * 256;
}
-static void encode_uniform(vp9_writer *w, int v) {
+static void encode_uniform(vpx_writer *w, int v) {
const int l = 8;
const int m = (1 << l) - 191;
if (v < m) {
- vp9_write_literal(w, v, l - 1);
+ vpx_write_literal(w, v, l - 1);
} else {
- vp9_write_literal(w, m + ((v - m) >> 1), l - 1);
- vp9_write_literal(w, (v - m) & 1, 1);
+ vpx_write_literal(w, m + ((v - m) >> 1), l - 1);
+ vpx_write_literal(w, (v - m) & 1, 1);
}
}
-static INLINE int write_bit_gte(vp9_writer *w, int word, int test) {
- vp9_write_literal(w, word >= test, 1);
+static INLINE int write_bit_gte(vpx_writer *w, int word, int test) {
+ vpx_write_literal(w, word >= test, 1);
return word >= test;
}
-static void encode_term_subexp(vp9_writer *w, int word) {
+static void encode_term_subexp(vpx_writer *w, int word) {
if (!write_bit_gte(w, word, 16)) {
- vp9_write_literal(w, word, 4);
+ vpx_write_literal(w, word, 4);
} else if (!write_bit_gte(w, word, 32)) {
- vp9_write_literal(w, word - 16, 4);
+ vpx_write_literal(w, word - 16, 4);
} else if (!write_bit_gte(w, word, 64)) {
- vp9_write_literal(w, word - 32, 5);
+ vpx_write_literal(w, word - 32, 5);
} else {
encode_uniform(w, word - 64);
}
}
-void vp9_write_prob_diff_update(vp9_writer *w, vp9_prob newp, vp9_prob oldp) {
+void vp9_write_prob_diff_update(vpx_writer *w, vpx_prob newp, vpx_prob oldp) {
const int delp = remap_prob(newp, oldp);
encode_term_subexp(w, delp);
}
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
- vp9_prob oldp, vp9_prob *bestp,
- vp9_prob upd) {
+ vpx_prob oldp, vpx_prob *bestp,
+ vpx_prob upd) {
const int old_b = cost_branch256(ct, oldp);
int bestsavings = 0;
- vp9_prob newp, bestnewp = oldp;
+ vpx_prob newp, bestnewp = oldp;
const int step = *bestp > oldp ? -1 : 1;
for (newp = *bestp; newp != oldp; newp += step) {
@@ -139,15 +138,15 @@
}
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
- const vp9_prob *oldp,
- vp9_prob *bestp,
- vp9_prob upd,
+ const vpx_prob *oldp,
+ vpx_prob *bestp,
+ vpx_prob upd,
int stepsize) {
int i, old_b, new_b, update_b, savings, bestsavings, step;
int newp;
- vp9_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
+ vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
vp9_model_to_full_probs(oldp, oldplist);
- memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES);
+ memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
old_b += cost_branch256(ct + 2 * i, oldplist[i]);
old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]);
@@ -197,18 +196,18 @@
return bestsavings;
}
-void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
+void vp9_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp,
const unsigned int ct[2]) {
- const vp9_prob upd = DIFF_UPDATE_PROB;
- vp9_prob newp = get_binary_prob(ct[0], ct[1]);
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = get_binary_prob(ct[0], ct[1]);
const int savings = vp9_prob_diff_update_savings_search(ct, *oldp, &newp,
upd);
assert(newp >= 1);
if (savings > 0) {
- vp9_write(w, 1, upd);
+ vpx_write(w, 1, upd);
vp9_write_prob_diff_update(w, newp, *oldp);
*oldp = newp;
} else {
- vp9_write(w, 0, upd);
+ vpx_write(w, 0, upd);
}
}
diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h
index 6fbb747..b968232 100644
--- a/vp9/encoder/vp9_subexp.h
+++ b/vp9/encoder/vp9_subexp.h
@@ -16,25 +16,25 @@
extern "C" {
#endif
-#include "vp9/common/vp9_prob.h"
+#include "vpx_dsp/prob.h"
-struct vp9_writer;
+struct vpx_writer;
-void vp9_write_prob_diff_update(struct vp9_writer *w,
- vp9_prob newp, vp9_prob oldp);
+void vp9_write_prob_diff_update(struct vpx_writer *w,
+ vpx_prob newp, vpx_prob oldp);
-void vp9_cond_prob_diff_update(struct vp9_writer *w, vp9_prob *oldp,
+void vp9_cond_prob_diff_update(struct vpx_writer *w, vpx_prob *oldp,
const unsigned int ct[2]);
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
- vp9_prob oldp, vp9_prob *bestp,
- vp9_prob upd);
+ vpx_prob oldp, vpx_prob *bestp,
+ vpx_prob upd);
int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
- const vp9_prob *oldp,
- vp9_prob *bestp,
- vp9_prob upd,
+ const vpx_prob *oldp,
+ vpx_prob *bestp,
+ vpx_prob upd,
int stepsize);
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 181a99c..a1c076a 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -52,7 +52,7 @@
/ 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
-const vp9_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
-EOB_TOKEN, 2, // 0 = EOB
-ZERO_TOKEN, 4, // 1 = ZERO
-ONE_TOKEN, 6, // 2 = ONE
@@ -66,12 +66,12 @@
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
};
-static const vp9_tree_index cat1[2] = {0, 0};
-static const vp9_tree_index cat2[4] = {2, 2, 0, 0};
-static const vp9_tree_index cat3[6] = {2, 2, 4, 4, 0, 0};
-static const vp9_tree_index cat4[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vp9_tree_index cat5[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vp9_tree_index cat6[28] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
+static const vpx_tree_index cat1[2] = {0, 0};
+static const vpx_tree_index cat2[4] = {2, 2, 0, 0};
+static const vpx_tree_index cat3[6] = {2, 2, 4, 4, 0, 0};
+static const vpx_tree_index cat4[8] = {2, 2, 4, 4, 6, 6, 0, 0};
+static const vpx_tree_index cat5[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
+static const vpx_tree_index cat6[28] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 0, 0};
static const int16_t zero_cost[] = {0};
@@ -367,20 +367,20 @@
#endif
#if CONFIG_VP9_HIGHBITDEPTH
-static const vp9_tree_index cat1_high10[2] = {0, 0};
-static const vp9_tree_index cat2_high10[4] = {2, 2, 0, 0};
-static const vp9_tree_index cat3_high10[6] = {2, 2, 4, 4, 0, 0};
-static const vp9_tree_index cat4_high10[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vp9_tree_index cat5_high10[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vp9_tree_index cat6_high10[32] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
+static const vpx_tree_index cat1_high10[2] = {0, 0};
+static const vpx_tree_index cat2_high10[4] = {2, 2, 0, 0};
+static const vpx_tree_index cat3_high10[6] = {2, 2, 4, 4, 0, 0};
+static const vpx_tree_index cat4_high10[8] = {2, 2, 4, 4, 6, 6, 0, 0};
+static const vpx_tree_index cat5_high10[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
+static const vpx_tree_index cat6_high10[32] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
30, 30, 0, 0};
-static const vp9_tree_index cat1_high12[2] = {0, 0};
-static const vp9_tree_index cat2_high12[4] = {2, 2, 0, 0};
-static const vp9_tree_index cat3_high12[6] = {2, 2, 4, 4, 0, 0};
-static const vp9_tree_index cat4_high12[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vp9_tree_index cat5_high12[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vp9_tree_index cat6_high12[36] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
+static const vpx_tree_index cat1_high12[2] = {0, 0};
+static const vpx_tree_index cat2_high12[4] = {2, 2, 0, 0};
+static const vpx_tree_index cat3_high12[6] = {2, 2, 4, 4, 0, 0};
+static const vpx_tree_index cat4_high12[8] = {2, 2, 4, 4, 6, 6, 0, 0};
+static const vpx_tree_index cat5_high12[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
+static const vpx_tree_index cat6_high12[36] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
30, 30, 32, 32, 34, 34, 0, 0};
#endif
@@ -457,7 +457,7 @@
aoff, loff);
}
-static INLINE void add_token(TOKENEXTRA **t, const vp9_prob *context_tree,
+static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
int32_t extra, uint8_t token,
uint8_t skip_eob_node,
unsigned int *counts) {
@@ -470,7 +470,7 @@
}
static INLINE void add_token_no_extra(TOKENEXTRA **t,
- const vp9_prob *context_tree,
+ const vpx_prob *context_tree,
uint8_t token,
uint8_t skip_eob_node,
unsigned int *counts) {
@@ -511,7 +511,7 @@
const int ref = is_inter_block(mbmi);
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
td->rd_counts.coef_counts[tx_size][type][ref];
- vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ vpx_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
cpi->common.fc->coef_probs[tx_size][type][ref];
unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
td->counts->eob_branch[tx_size][type][ref];
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 81cc2e1..11b78ba 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -35,14 +35,14 @@
} TOKENVALUE;
typedef struct {
- const vp9_prob *context_tree;
+ const vpx_prob *context_tree;
EXTRABIT extra;
uint8_t token;
uint8_t skip_eob_node;
} TOKENEXTRA;
-extern const vp9_tree_index vp9_coef_tree[];
-extern const vp9_tree_index vp9_coef_con_tree[];
+extern const vpx_tree_index vp9_coef_tree[];
+extern const vpx_tree_index vp9_coef_con_tree[];
extern const struct vp9_token vp9_coef_encodings[];
int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
diff --git a/vp9/encoder/vp9_treewriter.c b/vp9/encoder/vp9_treewriter.c
index bb04b40..0fc078e 100644
--- a/vp9/encoder/vp9_treewriter.c
+++ b/vp9/encoder/vp9_treewriter.c
@@ -10,13 +10,13 @@
#include "vp9/encoder/vp9_treewriter.h"
-static void tree2tok(struct vp9_token *tokens, const vp9_tree_index *tree,
+static void tree2tok(struct vp9_token *tokens, const vpx_tree_index *tree,
int i, int v, int l) {
v += v;
++l;
do {
- const vp9_tree_index j = tree[i++];
+ const vpx_tree_index j = tree[i++];
if (j <= 0) {
tokens[-j].value = v;
tokens[-j].len = l;
@@ -27,11 +27,11 @@
}
void vp9_tokens_from_tree(struct vp9_token *tokens,
- const vp9_tree_index *tree) {
+ const vpx_tree_index *tree) {
tree2tok(tokens, tree, 0, 0, 0);
}
-static unsigned int convert_distribution(unsigned int i, vp9_tree tree,
+static unsigned int convert_distribution(unsigned int i, vpx_tree tree,
unsigned int branch_ct[][2],
const unsigned int num_events[]) {
unsigned int left, right;
@@ -51,7 +51,7 @@
return left + right;
}
-void vp9_tree_probs_from_distribution(vp9_tree tree,
+void vp9_tree_probs_from_distribution(vpx_tree tree,
unsigned int branch_ct[/* n-1 */][2],
const unsigned int num_events[/* n */]) {
convert_distribution(0, tree, branch_ct, num_events);
diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h
index 4a76d87..0f89350 100644
--- a/vp9/encoder/vp9_treewriter.h
+++ b/vp9/encoder/vp9_treewriter.h
@@ -11,13 +11,13 @@
#ifndef VP9_ENCODER_VP9_TREEWRITER_H_
#define VP9_ENCODER_VP9_TREEWRITER_H_
-#include "vp9/encoder/vp9_writer.h"
+#include "vpx_dsp/bitwriter.h"
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_tree_probs_from_distribution(vp9_tree tree,
+void vp9_tree_probs_from_distribution(vpx_tree tree,
unsigned int branch_ct[ /* n - 1 */ ][2],
const unsigned int num_events[ /* n */ ]);
@@ -26,20 +26,20 @@
int len;
};
-void vp9_tokens_from_tree(struct vp9_token*, const vp9_tree_index *);
+void vp9_tokens_from_tree(struct vp9_token*, const vpx_tree_index *);
-static INLINE void vp9_write_tree(vp9_writer *w, const vp9_tree_index *tree,
- const vp9_prob *probs, int bits, int len,
- vp9_tree_index i) {
+static INLINE void vp9_write_tree(vpx_writer *w, const vpx_tree_index *tree,
+ const vpx_prob *probs, int bits, int len,
+ vpx_tree_index i) {
do {
const int bit = (bits >> --len) & 1;
- vp9_write(w, bit, probs[i >> 1]);
+ vpx_write(w, bit, probs[i >> 1]);
i = tree[i + bit];
} while (len);
}
-static INLINE void vp9_write_token(vp9_writer *w, const vp9_tree_index *tree,
- const vp9_prob *probs,
+static INLINE void vp9_write_token(vpx_writer *w, const vpx_tree_index *tree,
+ const vpx_prob *probs,
const struct vp9_token *token) {
vp9_write_tree(w, tree, probs, token->value, token->len, 0);
}
diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
index 5074d31..02773d8 100644
--- a/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -10,9 +10,6 @@
#include <emmintrin.h> // SSE2
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_idct.h" // for cospi constants
-#include "vp9/encoder/x86/vp9_dct_sse2.h"
#include "vp9/encoder/vp9_dct.h"
#include "vpx_ports/mem.h"
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index cff4fcb..c82f154 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -2266,108 +2266,6 @@
store_output(&in1, output);
}
-#if CONFIG_VP9_HIGHBITDEPTH
-/* These SSE2 versions of the FHT functions only actually use SSE2 in the
- * DCT_DCT case in all other cases, they revert to C code which is identical
- * to that used by the C versions of them.
- */
-
-void vp9_highbd_fht4x4_sse2(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- if (tx_type == DCT_DCT) {
- vp9_highbd_fdct4x4_sse2(input, output, stride);
- } else {
- tran_low_t out[4 * 4];
- tran_low_t *outptr = &out[0];
- int i, j;
- tran_low_t temp_in[4], temp_out[4];
- const transform_2d ht = FHT_4[tx_type];
-
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * stride + i] * 16;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
- }
- }
-}
-
-void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- if (tx_type == DCT_DCT) {
- vp9_highbd_fdct8x8_sse2(input, output, stride);
- } else {
- tran_low_t out[64];
- tran_low_t *outptr = &out[0];
- int i, j;
- tran_low_t temp_in[8], temp_out[8];
- const transform_2d ht = FHT_8[tx_type];
-
- // Columns
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- outptr[j * 8 + i] = temp_out[j];
- }
-
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
- }
- }
-}
-
-void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
- int stride, int tx_type) {
- if (tx_type == DCT_DCT) {
- vp9_highbd_fdct16x16_sse2(input, output, stride);
- } else {
- tran_low_t out[256];
- tran_low_t *outptr = &out[0];
- int i, j;
- tran_low_t temp_in[16], temp_out[16];
- const transform_2d ht = FHT_16[tx_type];
-
- // Columns
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
- }
-
- // Rows
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j + i * 16];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i * 16] = temp_out[j];
- }
- }
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
/*
* The DCTnxn functions are defined using the macros below. The main code for
* them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &
@@ -2377,14 +2275,6 @@
#define DCT_HIGH_BIT_DEPTH 0
-#define FDCT4x4_2D vp9_fdct4x4_sse2
-#define FDCT8x8_2D vp9_fdct8x8_sse2
-#define FDCT16x16_2D vp9_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_sse2_impl.h"
-#undef FDCT4x4_2D
-#undef FDCT8x8_2D
-#undef FDCT16x16_2D
-
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h"
@@ -2404,14 +2294,6 @@
#define DCT_HIGH_BIT_DEPTH 1
-#define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
-#define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
-#define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT
-#undef FDCT4x4_2D
-#undef FDCT8x8_2D
-#undef FDCT16x16_2D
-
#define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
index 3a29aba..29074e7 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
@@ -13,49 +13,9 @@
; of the macro definitions are originally derived from the ffmpeg project.
; The current version applies to x86 64-bit only.
-SECTION_RODATA
-
-pw_11585x2: times 8 dw 23170
-pd_8192: times 4 dd 8192
-
-%macro TRANSFORM_COEFFS 2
-pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
-pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
-%endmacro
-
-TRANSFORM_COEFFS 11585, 11585
-TRANSFORM_COEFFS 15137, 6270
-TRANSFORM_COEFFS 16069, 3196
-TRANSFORM_COEFFS 9102, 13623
-
SECTION .text
%if ARCH_X86_64
-%macro SUM_SUB 3
- psubw m%3, m%1, m%2
- paddw m%1, m%2
- SWAP %2, %3
-%endmacro
-
-; butterfly operation
-%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
- pmaddwd m%1, m%3, %5
- pmaddwd m%2, m%3, %6
- paddd m%1, %4
- paddd m%2, %4
- psrad m%1, 14
- psrad m%2, 14
-%endmacro
-
-%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
- punpckhwd m%6, m%2, m%1
- MUL_ADD_2X %7, %6, %6, %5, [pw_%4_%3], [pw_%3_m%4]
- punpcklwd m%2, m%1
- MUL_ADD_2X %1, %2, %2, %5, [pw_%4_%3], [pw_%3_m%4]
- packssdw m%1, m%7
- packssdw m%2, m%6
-%endmacro
-
; matrix transpose
%macro INTERLEAVE_2X 4
punpckh%1 m%4, m%2, m%3
@@ -83,103 +43,6 @@
SWAP %4, %7
%endmacro
-; 1D forward 8x8 DCT transform
-%macro FDCT8_1D 1
- SUM_SUB 0, 7, 9
- SUM_SUB 1, 6, 9
- SUM_SUB 2, 5, 9
- SUM_SUB 3, 4, 9
-
- SUM_SUB 0, 3, 9
- SUM_SUB 1, 2, 9
- SUM_SUB 6, 5, 9
-%if %1 == 0
- SUM_SUB 0, 1, 9
-%endif
-
- BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
-
- pmulhrsw m6, m12
- pmulhrsw m5, m12
-%if %1 == 0
- pmulhrsw m0, m12
- pmulhrsw m1, m12
-%else
- BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10
- SWAP 0, 1
-%endif
-
- SUM_SUB 4, 5, 9
- SUM_SUB 7, 6, 9
- BUTTERFLY_4X 4, 7, 3196, 16069, m8, 9, 10
- BUTTERFLY_4X 5, 6, 13623, 9102, m8, 9, 10
- SWAP 1, 4
- SWAP 3, 6
-%endmacro
-
-%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2
- psraw m%3, m%1, 15
- psraw m%4, m%2, 15
- psubw m%1, m%3
- psubw m%2, m%4
- psraw m%1, 1
- psraw m%2, 1
-%endmacro
-
-INIT_XMM ssse3
-cglobal fdct8x8, 3, 5, 13, input, output, stride
-
- mova m8, [pd_8192]
- mova m12, [pw_11585x2]
- pxor m11, m11
-
- lea r3, [2 * strideq]
- lea r4, [4 * strideq]
- mova m0, [inputq]
- mova m1, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m2, [inputq]
- mova m3, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m4, [inputq]
- mova m5, [inputq + r3]
- lea inputq, [inputq + r4]
- mova m6, [inputq]
- mova m7, [inputq + r3]
-
- ; left shift by 2 to increase forward transformation precision
- psllw m0, 2
- psllw m1, 2
- psllw m2, 2
- psllw m3, 2
- psllw m4, 2
- psllw m5, 2
- psllw m6, 2
- psllw m7, 2
-
- ; column transform
- FDCT8_1D 0
- TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
-
- FDCT8_1D 1
- TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
-
- DIVIDE_ROUND_2X 0, 1, 9, 10
- DIVIDE_ROUND_2X 2, 3, 9, 10
- DIVIDE_ROUND_2X 4, 5, 9, 10
- DIVIDE_ROUND_2X 6, 7, 9, 10
-
- mova [outputq + 0], m0
- mova [outputq + 16], m1
- mova [outputq + 32], m2
- mova [outputq + 48], m3
- mova [outputq + 64], m4
- mova [outputq + 80], m5
- mova [outputq + 96], m6
- mova [outputq + 112], m7
-
- RET
-
%macro HMD8_1D 0
psubw m8, m0, m1
psubw m9, m2, m3
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c
index 71fdfd7..2071dfe 100644
--- a/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -14,214 +14,6 @@
#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
-void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t* zbin_ptr,
- const int16_t* round_ptr, const int16_t* quant_ptr,
- const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
- int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
- uint16_t* eob_ptr,
- const int16_t* scan_ptr,
- const int16_t* iscan_ptr) {
- __m128i zero;
- (void)scan_ptr;
-
- coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
- qcoeff_ptr += n_coeffs;
- dqcoeff_ptr += n_coeffs;
- n_coeffs = -n_coeffs;
- zero = _mm_setzero_si128();
- if (!skip_block) {
- __m128i eob;
- __m128i zbin;
- __m128i round, quant, dequant, shift;
- {
- __m128i coeff0, coeff1;
-
- // Setup global values
- {
- __m128i pw_1;
- zbin = _mm_load_si128((const __m128i*)zbin_ptr);
- round = _mm_load_si128((const __m128i*)round_ptr);
- quant = _mm_load_si128((const __m128i*)quant_ptr);
- pw_1 = _mm_set1_epi16(1);
- zbin = _mm_sub_epi16(zbin, pw_1);
- dequant = _mm_load_si128((const __m128i*)dequant_ptr);
- shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
- }
-
- {
- __m128i coeff0_sign, coeff1_sign;
- __m128i qcoeff0, qcoeff1;
- __m128i qtmp0, qtmp1;
- __m128i cmp_mask0, cmp_mask1;
- // Do DC and first 15 AC
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
-
- // Poor man's sign extract
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
- qcoeff0 = _mm_adds_epi16(qcoeff0, round);
- round = _mm_unpackhi_epi64(round, round);
- qcoeff1 = _mm_adds_epi16(qcoeff1, round);
- qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
- quant = _mm_unpackhi_epi64(quant, quant);
- qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
- qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
- qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
- qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
- shift = _mm_unpackhi_epi64(shift, shift);
- qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
-
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- // Mask out zbin threshold coeffs
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
- coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
- dequant = _mm_unpackhi_epi64(dequant, dequant);
- coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
- }
-
- {
- // Scan for eob
- __m128i zero_coeff0, zero_coeff1;
- __m128i nzero_coeff0, nzero_coeff1;
- __m128i iscan0, iscan1;
- __m128i eob1;
- zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
- zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
- nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
- nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
- // Add one to convert from indices to counts
- iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
- iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
- eob = _mm_and_si128(iscan0, nzero_coeff0);
- eob1 = _mm_and_si128(iscan1, nzero_coeff1);
- eob = _mm_max_epi16(eob, eob1);
- }
- n_coeffs += 8 * 2;
- }
-
- // AC only loop
- while (n_coeffs < 0) {
- __m128i coeff0, coeff1;
- {
- __m128i coeff0_sign, coeff1_sign;
- __m128i qcoeff0, qcoeff1;
- __m128i qtmp0, qtmp1;
- __m128i cmp_mask0, cmp_mask1;
-
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
-
- // Poor man's sign extract
- coeff0_sign = _mm_srai_epi16(coeff0, 15);
- coeff1_sign = _mm_srai_epi16(coeff1, 15);
- qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
- cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
- qcoeff0 = _mm_adds_epi16(qcoeff0, round);
- qcoeff1 = _mm_adds_epi16(qcoeff1, round);
- qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
- qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
- qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
- qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
- qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
- qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
-
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
-
- // Mask out zbin threshold coeffs
- qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
- qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
-
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
-
- coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
- coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
-
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
- }
-
- {
- // Scan for eob
- __m128i zero_coeff0, zero_coeff1;
- __m128i nzero_coeff0, nzero_coeff1;
- __m128i iscan0, iscan1;
- __m128i eob0, eob1;
- zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
- zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
- nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
- nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
- // Add one to convert from indices to counts
- iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
- iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
- eob0 = _mm_and_si128(iscan0, nzero_coeff0);
- eob1 = _mm_and_si128(iscan1, nzero_coeff1);
- eob0 = _mm_max_epi16(eob0, eob1);
- eob = _mm_max_epi16(eob, eob0);
- }
- n_coeffs += 8 * 2;
- }
-
- // Accumulate EOB
- {
- __m128i eob_shuffled;
- eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
- eob = _mm_max_epi16(eob, eob_shuffled);
- eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
- eob = _mm_max_epi16(eob, eob_shuffled);
- *eob_ptr = _mm_extract_epi16(eob, 1);
- }
- } else {
- do {
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
- n_coeffs += 8 * 2;
- } while (n_coeffs < 0);
- *eob_ptr = 0;
- }
-}
-
void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 449d52b..ec2e87c 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -15,206 +15,6 @@
SECTION .text
-; TODO(yunqingwang)fix quantize_b code for skip=1 case.
-%macro QUANTIZE_FN 2
-cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
- shift, qcoeff, dqcoeff, dequant, \
- eob, scan, iscan
- cmp dword skipm, 0
- jne .blank
-
- ; actual quantize loop - setup pointers, rounders, etc.
- movifnidn coeffq, coeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, dequantmp
- movifnidn zbinq, zbinmp
- movifnidn roundq, roundmp
- movifnidn quantq, quantmp
- mova m0, [zbinq] ; m0 = zbin
- mova m1, [roundq] ; m1 = round
- mova m2, [quantq] ; m2 = quant
-%ifidn %1, b_32x32
- pcmpeqw m5, m5
- psrlw m5, 15
- paddw m0, m5
- paddw m1, m5
- psrlw m0, 1 ; m0 = (m0 + 1) / 2
- psrlw m1, 1 ; m1 = (m1 + 1) / 2
-%endif
- mova m3, [r2q] ; m3 = dequant
- psubw m0, [pw_1]
- mov r2, shiftmp
- mov r3, qcoeffmp
- mova m4, [r2] ; m4 = shift
- mov r4, dqcoeffmp
- mov r5, iscanmp
-%ifidn %1, b_32x32
- psllw m4, 1
-%endif
- pxor m5, m5 ; m5 = dedicated zero
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
- lea coeffq, [ coeffq+ncoeffq*2]
- lea iscanq, [ iscanq+ncoeffq*2]
- lea qcoeffq, [ qcoeffq+ncoeffq*2]
- lea dqcoeffq, [dqcoeffq+ncoeffq*2]
- neg ncoeffq
-
- ; get DC and first 15 AC coeffs
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- punpckhqdq m0, m0
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
- paddsw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddsw m11, m1 ; m11 += round
- pmulhw m8, m6, m2 ; m8 = m6*q>>16
- punpckhqdq m2, m2
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m8, m6 ; m8 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m8, m4 ; m8 = m8*qsh>>16
- punpckhqdq m4, m4
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m8, m9 ; m8 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m8, m7
- pand m13, m12
- mova [qcoeffq+ncoeffq*2+ 0], m8
- mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
- pabsw m8, m8
- pabsw m13, m13
-%endif
- pmullw m8, m3 ; dqc[i] = qc[i] * q
- punpckhqdq m3, m3
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m8, 1
- psrlw m13, 1
- psignw m8, m9
- psignw m13, m10
-%endif
- mova [dqcoeffq+ncoeffq*2+ 0], m8
- mova [dqcoeffq+ncoeffq*2+16], m13
- pcmpeqw m8, m5 ; m8 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m8, m6 ; m8 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jz .accumulate_eob
-
-.ac_only_loop:
- mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
- mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
- pabsw m6, m9 ; m6 = abs(m9)
- pabsw m11, m10 ; m11 = abs(m10)
- pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
- pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-%ifidn %1, b_32x32
- pmovmskb r6d, m7
- pmovmskb r2d, m12
- or r6, r2
- jz .skip_iter
-%endif
- paddsw m6, m1 ; m6 += round
- paddsw m11, m1 ; m11 += round
- pmulhw m14, m6, m2 ; m14 = m6*q>>16
- pmulhw m13, m11, m2 ; m13 = m11*q>>16
- paddw m14, m6 ; m14 += m6
- paddw m13, m11 ; m13 += m11
- pmulhw m14, m4 ; m14 = m14*qsh>>16
- pmulhw m13, m4 ; m13 = m13*qsh>>16
- psignw m14, m9 ; m14 = reinsert sign
- psignw m13, m10 ; m13 = reinsert sign
- pand m14, m7
- pand m13, m12
- mova [qcoeffq+ncoeffq*2+ 0], m14
- mova [qcoeffq+ncoeffq*2+16], m13
-%ifidn %1, b_32x32
- pabsw m14, m14
- pabsw m13, m13
-%endif
- pmullw m14, m3 ; dqc[i] = qc[i] * q
- pmullw m13, m3 ; dqc[i] = qc[i] * q
-%ifidn %1, b_32x32
- psrlw m14, 1
- psrlw m13, 1
- psignw m14, m9
- psignw m13, m10
-%endif
- mova [dqcoeffq+ncoeffq*2+ 0], m14
- mova [dqcoeffq+ncoeffq*2+16], m13
- pcmpeqw m14, m5 ; m14 = c[i] == 0
- pcmpeqw m13, m5 ; m13 = c[i] == 0
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
- psubw m6, m7 ; m6 = scan[i] + 1
- psubw m11, m12 ; m11 = scan[i] + 1
- pandn m14, m6 ; m14 = max(eob)
- pandn m13, m11 ; m13 = max(eob)
- pmaxsw m8, m14
- pmaxsw m8, m13
- add ncoeffq, mmsize
- jl .ac_only_loop
-
-%ifidn %1, b_32x32
- jmp .accumulate_eob
-.skip_iter:
- mova [qcoeffq+ncoeffq*2+ 0], m5
- mova [qcoeffq+ncoeffq*2+16], m5
- mova [dqcoeffq+ncoeffq*2+ 0], m5
- mova [dqcoeffq+ncoeffq*2+16], m5
- add ncoeffq, mmsize
- jl .ac_only_loop
-%endif
-
-.accumulate_eob:
- ; horizontally accumulate/max eobs and write into [eob] memory pointer
- mov r2, eobmp
- pshufd m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0xe
- pmaxsw m8, m7
- pshuflw m7, m8, 0x1
- pmaxsw m8, m7
- pextrw r6, m8, 0
- mov [r2], r6
- RET
-
- ; skip-block, i.e. just write all zeroes
-.blank:
- mov r0, dqcoeffmp
- movifnidn ncoeffq, ncoeffmp
- mov r2, qcoeffmp
- mov r3, eobmp
- DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
- lea dqcoeffq, [dqcoeffq+ncoeffq*2]
- lea qcoeffq, [ qcoeffq+ncoeffq*2]
- neg ncoeffq
- pxor m7, m7
-.blank_loop:
- mova [dqcoeffq+ncoeffq*2+ 0], m7
- mova [dqcoeffq+ncoeffq*2+16], m7
- mova [qcoeffq+ncoeffq*2+ 0], m7
- mova [qcoeffq+ncoeffq*2+16], m7
- add ncoeffq, mmsize
- jl .blank_loop
- mov word [eobq], 0
- RET
-%endmacro
-
-INIT_XMM ssse3
-QUANTIZE_FN b, 7
-QUANTIZE_FN b_32x32, 7
-
%macro QUANTIZE_FP 2
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
shift, qcoeff, dqcoeff, dequant, \
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index bcab558..c8cf973 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -38,8 +38,6 @@
VP9_COMMON_SRCS-yes += common/vp9_onyxc_int.h
VP9_COMMON_SRCS-yes += common/vp9_pred_common.h
VP9_COMMON_SRCS-yes += common/vp9_pred_common.c
-VP9_COMMON_SRCS-yes += common/vp9_prob.h
-VP9_COMMON_SRCS-yes += common/vp9_prob.c
VP9_COMMON_SRCS-yes += common/vp9_quant_common.h
VP9_COMMON_SRCS-yes += common/vp9_reconinter.h
VP9_COMMON_SRCS-yes += common/vp9_reconintra.h
@@ -54,7 +52,6 @@
VP9_COMMON_SRCS-yes += common/vp9_tile_common.h
VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
-VP9_COMMON_SRCS-yes += common/vp9_loopfilter_filters.c
VP9_COMMON_SRCS-yes += common/vp9_thread_common.c
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h
@@ -69,14 +66,11 @@
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
-VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
-VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_sse2.asm
-VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
@@ -95,7 +89,6 @@
endif
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_loopfilter_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_high_subpixel_bilinear_sse2.asm
ifeq ($(CONFIG_USE_X86INC),yes)
@@ -123,13 +116,6 @@
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans32_cols_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans32_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
# common (msa)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
@@ -147,10 +133,6 @@
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
@@ -165,16 +147,12 @@
endif
endif
-VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_16_neon_asm$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_loopfilter_8_neon_asm$(ASM)
-VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON_ASM) += common/arm/neon/vp9_save_reg_neon$(ASM)
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
endif
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon.c
# neon with assembly and intrinsics implementations. If both are available
# prefer assembly.
@@ -193,7 +171,6 @@
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon_asm$(ASM)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon_asm$(ASM)
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM)
else
ifeq ($(HAVE_NEON), yes)
@@ -211,11 +188,6 @@
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct4x4_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_1_add_neon.c
VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct8x8_add_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_16_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c
-# TODO(johannkoenig): re-enable when chromium build is fixed
-# # https://code.google.com/p/chromium/issues/detail?id=443839
-#VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c
endif # HAVE_NEON
endif # HAVE_NEON_ASM
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d2d9288..f155b9a 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -173,9 +173,12 @@
RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
RANGE_CHECK(extra_cfg, min_gf_interval, 0, (MAX_LAG_BUFFERS - 1));
RANGE_CHECK(extra_cfg, max_gf_interval, 0, (MAX_LAG_BUFFERS - 1));
+ if (extra_cfg->max_gf_interval > 0) {
+ RANGE_CHECK(extra_cfg, max_gf_interval, 2, (MAX_LAG_BUFFERS - 1));
+ }
if (extra_cfg->min_gf_interval > 0 && extra_cfg->max_gf_interval > 0) {
RANGE_CHECK(extra_cfg, max_gf_interval, extra_cfg->min_gf_interval,
- (MAX_LAG_BUFFERS - 1));
+ (MAX_LAG_BUFFERS - 1));
}
if (cfg->rc_resize_allowed == 1) {
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 5b62c3ec..96ede3c 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -17,6 +17,7 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
+#include "vpx_dsp/bitreader_buffer.h"
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -24,7 +25,6 @@
#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_decodeframe.h"
-#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/vp9_iface_common.h"
@@ -145,11 +145,11 @@
}
static int parse_bitdepth_colorspace_sampling(
- BITSTREAM_PROFILE profile, struct vp9_read_bit_buffer *rb) {
+ BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) {
vpx_color_space_t color_space;
if (profile >= PROFILE_2)
rb->bit_offset += 1; // Bit-depth 10 or 12.
- color_space = (vpx_color_space_t)vp9_rb_read_literal(rb, 3);
+ color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3);
if (color_space != VPX_CS_SRGB) {
rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range.
if (profile == PROFILE_1 || profile == PROFILE_3) {
@@ -191,8 +191,8 @@
{
int show_frame;
int error_resilient;
- struct vp9_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
- const int frame_marker = vp9_rb_read_literal(&rb, 2);
+ struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
+ const int frame_marker = vpx_rb_read_literal(&rb, 2);
const BITSTREAM_PROFILE profile = vp9_read_profile(&rb);
if (frame_marker != VP9_FRAME_MARKER)
@@ -204,17 +204,17 @@
if ((profile >= 2 && data_sz <= 1) || data_sz < 1)
return VPX_CODEC_UNSUP_BITSTREAM;
- if (vp9_rb_read_bit(&rb)) { // show an existing frame
- vp9_rb_read_literal(&rb, 3); // Frame buffer to show.
+ if (vpx_rb_read_bit(&rb)) { // show an existing frame
+ vpx_rb_read_literal(&rb, 3); // Frame buffer to show.
return VPX_CODEC_OK;
}
if (data_sz <= 8)
return VPX_CODEC_UNSUP_BITSTREAM;
- si->is_kf = !vp9_rb_read_bit(&rb);
- show_frame = vp9_rb_read_bit(&rb);
- error_resilient = vp9_rb_read_bit(&rb);
+ si->is_kf = !vpx_rb_read_bit(&rb);
+ show_frame = vpx_rb_read_bit(&rb);
+ error_resilient = vpx_rb_read_bit(&rb);
if (si->is_kf) {
if (!vp9_read_sync_code(&rb))
@@ -224,7 +224,7 @@
return VPX_CODEC_UNSUP_BITSTREAM;
vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
} else {
- intra_only_flag = show_frame ? 0 : vp9_rb_read_bit(&rb);
+ intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb);
rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 94cc7ba..50d52ed 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -37,10 +37,6 @@
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_fastssim.c
VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
VP9_CX_SRCS-yes += encoder/vp9_block.h
-VP9_CX_SRCS-yes += encoder/vp9_writer.h
-VP9_CX_SRCS-yes += encoder/vp9_writer.c
-VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c
-VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
VP9_CX_SRCS-yes += encoder/vp9_encodemb.h
VP9_CX_SRCS-yes += encoder/vp9_encodemv.h
@@ -104,7 +100,6 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
@@ -125,7 +120,6 @@
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.h
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2_impl.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_impl.h
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index c105adb..0e9cf16 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -21,10 +21,6 @@
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
-VP9_DX_SRCS-yes += decoder/vp9_reader.h
-VP9_DX_SRCS-yes += decoder/vp9_reader.c
-VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.c
-VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
VP9_DX_SRCS-yes += decoder/vp9_decodemv.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.h
VP9_DX_SRCS-yes += decoder/vp9_dthread.c
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 013c67a..0c6846b 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -733,9 +733,19 @@
VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int)
VPX_CTRL_USE_TYPE(VP9E_SET_MIN_GF_INTERVAL, unsigned int)
+
+/*!\brief
+ *
+ * TODO(debargha) : add support of the control in ffmpeg
+ */
#define VPX_CTRL_VP9E_SET_MIN_GF_INTERVAL
+
VPX_CTRL_USE_TYPE(VP9E_SET_MAX_GF_INTERVAL, unsigned int)
+/*!\brief
+ *
+ * TODO(debargha) : add support of the control in ffmpeg
+ */
#define VPX_CTRL_VP9E_SET_MAX_GF_INTERVAL
VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *)
diff --git a/vpx_dsp/arm/fwd_txfm_neon.c b/vpx_dsp/arm/fwd_txfm_neon.c
new file mode 100644
index 0000000..66faea5
--- /dev/null
+++ b/vpx_dsp/arm/fwd_txfm_neon.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+#include "vp9/common/vp9_idct.h"
+
+void vp9_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
+ int i;
+ // stage 1
+ int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2);
+ int16x8_t input_1 = vshlq_n_s16(vld1q_s16(&input[1 * stride]), 2);
+ int16x8_t input_2 = vshlq_n_s16(vld1q_s16(&input[2 * stride]), 2);
+ int16x8_t input_3 = vshlq_n_s16(vld1q_s16(&input[3 * stride]), 2);
+ int16x8_t input_4 = vshlq_n_s16(vld1q_s16(&input[4 * stride]), 2);
+ int16x8_t input_5 = vshlq_n_s16(vld1q_s16(&input[5 * stride]), 2);
+ int16x8_t input_6 = vshlq_n_s16(vld1q_s16(&input[6 * stride]), 2);
+ int16x8_t input_7 = vshlq_n_s16(vld1q_s16(&input[7 * stride]), 2);
+ for (i = 0; i < 2; ++i) {
+ int16x8_t out_0, out_1, out_2, out_3, out_4, out_5, out_6, out_7;
+ const int16x8_t v_s0 = vaddq_s16(input_0, input_7);
+ const int16x8_t v_s1 = vaddq_s16(input_1, input_6);
+ const int16x8_t v_s2 = vaddq_s16(input_2, input_5);
+ const int16x8_t v_s3 = vaddq_s16(input_3, input_4);
+ const int16x8_t v_s4 = vsubq_s16(input_3, input_4);
+ const int16x8_t v_s5 = vsubq_s16(input_2, input_5);
+ const int16x8_t v_s6 = vsubq_s16(input_1, input_6);
+ const int16x8_t v_s7 = vsubq_s16(input_0, input_7);
+ // fdct4(step, step);
+ int16x8_t v_x0 = vaddq_s16(v_s0, v_s3);
+ int16x8_t v_x1 = vaddq_s16(v_s1, v_s2);
+ int16x8_t v_x2 = vsubq_s16(v_s1, v_s2);
+ int16x8_t v_x3 = vsubq_s16(v_s0, v_s3);
+ // fdct4(step, step);
+ int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
+ int32x4_t v_t0_hi = vaddl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
+ int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1));
+ int32x4_t v_t1_hi = vsubl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1));
+ int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_24_64);
+ int32x4_t v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_24_64);
+ int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_24_64);
+ int32x4_t v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_24_64);
+ v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), (int16_t)cospi_8_64);
+ v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
+ v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
+ v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
+ v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64);
+ v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64);
+ v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64);
+ v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64);
+ {
+ const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
+ const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
+ const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
+ const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
+ const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
+ const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
+ const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
+ const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
+ out_0 = vcombine_s16(a, c); // 00 01 02 03 40 41 42 43
+ out_2 = vcombine_s16(e, g); // 20 21 22 23 60 61 62 63
+ out_4 = vcombine_s16(b, d); // 04 05 06 07 44 45 46 47
+ out_6 = vcombine_s16(f, h); // 24 25 26 27 64 65 66 67
+ }
+ // Stage 2
+ v_x0 = vsubq_s16(v_s6, v_s5);
+ v_x1 = vaddq_s16(v_s6, v_s5);
+ v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), (int16_t)cospi_16_64);
+ v_t0_hi = vmull_n_s16(vget_high_s16(v_x0), (int16_t)cospi_16_64);
+ v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_16_64);
+ v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_16_64);
+ {
+ const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
+ const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
+ const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
+ const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
+ const int16x8_t ab = vcombine_s16(a, b);
+ const int16x8_t cd = vcombine_s16(c, d);
+ // Stage 3
+ v_x0 = vaddq_s16(v_s4, ab);
+ v_x1 = vsubq_s16(v_s4, ab);
+ v_x2 = vsubq_s16(v_s7, cd);
+ v_x3 = vaddq_s16(v_s7, cd);
+ }
+ // Stage 4
+ v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_4_64);
+ v_t0_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_4_64);
+ v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), (int16_t)cospi_28_64);
+ v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), (int16_t)cospi_28_64);
+ v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), (int16_t)cospi_12_64);
+ v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), (int16_t)cospi_12_64);
+ v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), (int16_t)cospi_20_64);
+ v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), (int16_t)cospi_20_64);
+ v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), (int16_t)cospi_12_64);
+ v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), (int16_t)cospi_12_64);
+ v_t2_lo = vmlsl_n_s16(v_t2_lo, vget_low_s16(v_x1), (int16_t)cospi_20_64);
+ v_t2_hi = vmlsl_n_s16(v_t2_hi, vget_high_s16(v_x1), (int16_t)cospi_20_64);
+ v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), (int16_t)cospi_28_64);
+ v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), (int16_t)cospi_28_64);
+ v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x0), (int16_t)cospi_4_64);
+ v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x0), (int16_t)cospi_4_64);
+ {
+ const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
+ const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
+ const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS);
+ const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS);
+ const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS);
+ const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS);
+ const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS);
+ const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS);
+ out_1 = vcombine_s16(a, c); // 10 11 12 13 50 51 52 53
+ out_3 = vcombine_s16(e, g); // 30 31 32 33 70 71 72 73
+ out_5 = vcombine_s16(b, d); // 14 15 16 17 54 55 56 57
+ out_7 = vcombine_s16(f, h); // 34 35 36 37 74 75 76 77
+ }
+ // transpose 8x8
+ {
+ // 00 01 02 03 40 41 42 43
+ // 10 11 12 13 50 51 52 53
+ // 20 21 22 23 60 61 62 63
+ // 30 31 32 33 70 71 72 73
+ // 04 05 06 07 44 45 46 47
+ // 14 15 16 17 54 55 56 57
+ // 24 25 26 27 64 65 66 67
+ // 34 35 36 37 74 75 76 77
+ const int32x4x2_t r02_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_0),
+ vreinterpretq_s32_s16(out_2));
+ const int32x4x2_t r13_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_1),
+ vreinterpretq_s32_s16(out_3));
+ const int32x4x2_t r46_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_4),
+ vreinterpretq_s32_s16(out_6));
+ const int32x4x2_t r57_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_5),
+ vreinterpretq_s32_s16(out_7));
+ const int16x8x2_t r01_s16 =
+ vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[0]),
+ vreinterpretq_s16_s32(r13_s32.val[0]));
+ const int16x8x2_t r23_s16 =
+ vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[1]),
+ vreinterpretq_s16_s32(r13_s32.val[1]));
+ const int16x8x2_t r45_s16 =
+ vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[0]),
+ vreinterpretq_s16_s32(r57_s32.val[0]));
+ const int16x8x2_t r67_s16 =
+ vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[1]),
+ vreinterpretq_s16_s32(r57_s32.val[1]));
+ input_0 = r01_s16.val[0];
+ input_1 = r01_s16.val[1];
+ input_2 = r23_s16.val[0];
+ input_3 = r23_s16.val[1];
+ input_4 = r45_s16.val[0];
+ input_5 = r45_s16.val[1];
+ input_6 = r67_s16.val[0];
+ input_7 = r67_s16.val[1];
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ } // for
+ {
+ // from vp9_dct_sse2.c
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const int16x8_t sign_in0 = vshrq_n_s16(input_0, 15);
+ const int16x8_t sign_in1 = vshrq_n_s16(input_1, 15);
+ const int16x8_t sign_in2 = vshrq_n_s16(input_2, 15);
+ const int16x8_t sign_in3 = vshrq_n_s16(input_3, 15);
+ const int16x8_t sign_in4 = vshrq_n_s16(input_4, 15);
+ const int16x8_t sign_in5 = vshrq_n_s16(input_5, 15);
+ const int16x8_t sign_in6 = vshrq_n_s16(input_6, 15);
+ const int16x8_t sign_in7 = vshrq_n_s16(input_7, 15);
+ input_0 = vhsubq_s16(input_0, sign_in0);
+ input_1 = vhsubq_s16(input_1, sign_in1);
+ input_2 = vhsubq_s16(input_2, sign_in2);
+ input_3 = vhsubq_s16(input_3, sign_in3);
+ input_4 = vhsubq_s16(input_4, sign_in4);
+ input_5 = vhsubq_s16(input_5, sign_in5);
+ input_6 = vhsubq_s16(input_6, sign_in6);
+ input_7 = vhsubq_s16(input_7, sign_in7);
+ // store results
+ vst1q_s16(&final_output[0 * 8], input_0);
+ vst1q_s16(&final_output[1 * 8], input_1);
+ vst1q_s16(&final_output[2 * 8], input_2);
+ vst1q_s16(&final_output[3 * 8], input_3);
+ vst1q_s16(&final_output[4 * 8], input_4);
+ vst1q_s16(&final_output[5 * 8], input_5);
+ vst1q_s16(&final_output[6 * 8], input_6);
+ vst1q_s16(&final_output[7 * 8], input_7);
+ }
+}
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm b/vpx_dsp/arm/loopfilter_16_neon.asm
similarity index 94%
rename from vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm
rename to vpx_dsp/arm/loopfilter_16_neon.asm
index 5b8ec20..5a8fdd6 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon_asm.asm
+++ b/vpx_dsp/arm/loopfilter_16_neon.asm
@@ -8,12 +8,12 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_4_dual_neon|
+ EXPORT |vpx_lpf_horizontal_4_dual_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
+;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
; const uint8_t *blimit0,
; const uint8_t *limit0,
; const uint8_t *thresh0,
@@ -29,7 +29,7 @@
; sp+8 const uint8_t *limit1,
; sp+12 const uint8_t *thresh1,
-|vp9_lpf_horizontal_4_dual_neon| PROC
+|vpx_lpf_horizontal_4_dual_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh0
@@ -66,7 +66,7 @@
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
- bl vp9_loop_filter_neon_16
+ bl vpx_loop_filter_neon_16
vst1.u8 {q5}, [r2@64], r1 ; store op1
vst1.u8 {q6}, [r3@64], r1 ; store op0
@@ -76,9 +76,9 @@
vpop {d8-d15} ; restore neon registers
pop {pc}
- ENDP ; |vp9_lpf_horizontal_4_dual_neon|
+ ENDP ; |vpx_lpf_horizontal_4_dual_neon|
-; void vp9_loop_filter_neon_16();
+; void vpx_loop_filter_neon_16();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. This function uses
; registers d8-d15, so the calling function must save those registers.
@@ -101,7 +101,7 @@
; q6 op0
; q7 oq0
; q8 oq1
-|vp9_loop_filter_neon_16| PROC
+|vpx_loop_filter_neon_16| PROC
; filter_mask
vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2)
@@ -194,6 +194,6 @@
veor q8, q12, q10 ; *oq1 = u^0x80
bx lr
- ENDP ; |vp9_loop_filter_neon_16|
+ ENDP ; |vpx_loop_filter_neon_16|
END
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vpx_dsp/arm/loopfilter_16_neon.c
similarity index 93%
rename from vp9/common/arm/neon/vp9_loopfilter_16_neon.c
rename to vpx_dsp/arm/loopfilter_16_neon.c
index c69ee10..d24e6ad 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
+++ b/vpx_dsp/arm/loopfilter_16_neon.c
@@ -10,11 +10,11 @@
#include <arm_neon.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-static INLINE void vp9_loop_filter_neon_16(
+static INLINE void loop_filter_neon_16(
uint8x16_t qblimit, // blimit
uint8x16_t qlimit, // limit
uint8x16_t qthresh, // thresh
@@ -124,7 +124,7 @@
return;
}
-void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -163,9 +163,9 @@
s += p;
q10u8 = vld1q_u8(s);
- vp9_loop_filter_neon_16(qblimit, qlimit, qthresh,
- q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,
- &q5u8, &q6u8, &q7u8, &q8u8);
+ loop_filter_neon_16(qblimit, qlimit, qthresh,
+ q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8,
+ &q5u8, &q6u8, &q7u8, &q8u8);
s -= (p * 5);
vst1q_u8(s, q5u8);
diff --git a/vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm b/vpx_dsp/arm/loopfilter_4_neon.asm
similarity index 91%
rename from vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm
rename to vpx_dsp/arm/loopfilter_4_neon.asm
index 7738e0d..e45e34c 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_4_neon_asm.asm
+++ b/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_4_neon|
- EXPORT |vp9_lpf_vertical_4_neon|
+ EXPORT |vpx_lpf_horizontal_4_neon|
+ EXPORT |vpx_lpf_vertical_4_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_horizontal_4_neon(uint8_t *s,
+; void vpx_lpf_horizontal_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -32,7 +32,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_horizontal_4_neon| PROC
+|vpx_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -41,7 +41,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
- beq end_vp9_lf_h_edge
+ beq end_vpx_lf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -62,7 +62,7 @@
sub r2, r2, r1, lsl #1
sub r3, r3, r1, lsl #1
- bl vp9_loop_filter_neon
+ bl vpx_loop_filter_neon
vst1.u8 {d4}, [r2@64], r1 ; store op1
vst1.u8 {d5}, [r3@64], r1 ; store op0
@@ -73,16 +73,16 @@
subs r12, r12, #1
bne count_lf_h_loop
-end_vp9_lf_h_edge
+end_vpx_lf_h_edge
pop {pc}
- ENDP ; |vp9_lpf_horizontal_4_neon|
+ ENDP ; |vpx_lpf_horizontal_4_neon|
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_vertical_4_neon(uint8_t *s,
+; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -95,7 +95,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_vertical_4_neon| PROC
+|vpx_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -105,7 +105,7 @@
ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
- beq end_vp9_lf_v_edge
+ beq end_vpx_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@@ -135,7 +135,7 @@
vtrn.8 d7, d16
vtrn.8 d17, d18
- bl vp9_loop_filter_neon
+ bl vpx_loop_filter_neon
sub r0, r0, #2
@@ -154,11 +154,11 @@
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
-end_vp9_lf_v_edge
+end_vpx_lf_v_edge
pop {pc}
- ENDP ; |vp9_lpf_vertical_4_neon|
+ ENDP ; |vpx_lpf_vertical_4_neon|
-; void vp9_loop_filter_neon();
+; void vpx_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@@ -182,7 +182,7 @@
; d5 op0
; d6 oq0
; d7 oq1
-|vp9_loop_filter_neon| PROC
+|vpx_loop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@@ -272,6 +272,6 @@
veor d7, d20, d18 ; *oq1 = u^0x80
bx lr
- ENDP ; |vp9_loop_filter_neon|
+ ENDP ; |vpx_loop_filter_neon|
END
diff --git a/vp9/common/arm/neon/vp9_loopfilter_4_neon.c b/vpx_dsp/arm/loopfilter_4_neon.c
similarity index 89%
rename from vp9/common/arm/neon/vp9_loopfilter_4_neon.c
rename to vpx_dsp/arm/loopfilter_4_neon.c
index fd9db61..7ad411a 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_4_neon.c
+++ b/vpx_dsp/arm/loopfilter_4_neon.c
@@ -10,9 +10,9 @@
#include <arm_neon.h>
-#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
-static INLINE void vp9_loop_filter_neon(
+static INLINE void loop_filter_neon(
uint8x8_t dblimit, // flimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@@ -110,19 +110,19 @@
return;
}
-void vp9_lpf_horizontal_4_neon(
- unsigned char *src,
+void vpx_lpf_horizontal_4_neon(
+ uint8_t *src,
int pitch,
- unsigned char *blimit,
- unsigned char *limit,
- unsigned char *thresh,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
int count) {
int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
- if (count == 0) // end_vp9_lf_h_edge
+ if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -149,9 +149,9 @@
s += pitch;
d18u8 = vld1_u8(s);
- vp9_loop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d4u8, &d5u8, &d6u8, &d7u8);
+ loop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d4u8, &d5u8, &d6u8, &d7u8);
s -= (pitch * 5);
vst1_u8(s, d4u8);
@@ -165,12 +165,12 @@
return;
}
-void vp9_lpf_vertical_4_neon(
- unsigned char *src,
+void vpx_lpf_vertical_4_neon(
+ uint8_t *src,
int pitch,
- unsigned char *blimit,
- unsigned char *limit,
- unsigned char *thresh,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
int count) {
int i, pitch8;
uint8_t *s;
@@ -181,7 +181,7 @@
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result;
- if (count == 0) // end_vp9_lf_h_edge
+ if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -244,9 +244,9 @@
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
- vp9_loop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d4u8, &d5u8, &d6u8, &d7u8);
+ loop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d4u8, &d5u8, &d6u8, &d7u8);
d4Result.val[0] = d4u8;
d4Result.val[1] = d5u8;
diff --git a/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm b/vpx_dsp/arm/loopfilter_8_neon.asm
similarity index 94%
rename from vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm
rename to vpx_dsp/arm/loopfilter_8_neon.asm
index 91aaec0..e81734c 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm
+++ b/vpx_dsp/arm/loopfilter_8_neon.asm
@@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_8_neon|
- EXPORT |vp9_lpf_vertical_8_neon|
+ EXPORT |vpx_lpf_horizontal_8_neon|
+ EXPORT |vpx_lpf_vertical_8_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
+; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
+; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
@@ -30,7 +30,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_horizontal_8_neon| PROC
+|vpx_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -39,7 +39,7 @@
add r1, r1, r1 ; double pitch
cmp r12, #0
- beq end_vp9_mblf_h_edge
+ beq end_vpx_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
@@ -60,7 +60,7 @@
sub r3, r3, r1, lsl #1
sub r2, r2, r1, lsl #2
- bl vp9_mbloop_filter_neon
+ bl vpx_mbloop_filter_neon
vst1.u8 {d0}, [r2@64], r1 ; store op2
vst1.u8 {d1}, [r3@64], r1 ; store op1
@@ -73,12 +73,12 @@
subs r12, r12, #1
bne count_mblf_h_loop
-end_vp9_mblf_h_edge
+end_vpx_mblf_h_edge
pop {r4-r5, pc}
- ENDP ; |vp9_lpf_horizontal_8_neon|
+ ENDP ; |vpx_lpf_horizontal_8_neon|
-; void vp9_lpf_vertical_8_neon(uint8_t *s,
+; void vpx_lpf_vertical_8_neon(uint8_t *s,
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
@@ -91,7 +91,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_lpf_vertical_8_neon| PROC
+|vpx_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -101,7 +101,7 @@
ldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
- beq end_vp9_mblf_v_edge
+ beq end_vpx_mblf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh
@@ -134,7 +134,7 @@
sub r2, r0, #3
add r3, r0, #1
- bl vp9_mbloop_filter_neon
+ bl vpx_mbloop_filter_neon
;store op2, op1, op0, oq0
vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1
@@ -161,11 +161,11 @@
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_mblf_v_loop
-end_vp9_mblf_v_edge
+end_vpx_mblf_v_edge
pop {r4-r5, pc}
- ENDP ; |vp9_lpf_vertical_8_neon|
+ ENDP ; |vpx_lpf_vertical_8_neon|
-; void vp9_mbloop_filter_neon();
+; void vpx_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store. The function does not use
; registers d8-d15.
@@ -191,7 +191,7 @@
; d3 oq0
; d4 oq1
; d5 oq2
-|vp9_mbloop_filter_neon| PROC
+|vpx_mbloop_filter_neon| PROC
; filter_mask
vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2)
vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1)
@@ -446,6 +446,6 @@
bx lr
- ENDP ; |vp9_mbloop_filter_neon|
+ ENDP ; |vpx_mbloop_filter_neon|
END
diff --git a/vp9/common/arm/neon/vp9_loopfilter_8_neon.c b/vpx_dsp/arm/loopfilter_8_neon.c
similarity index 93%
rename from vp9/common/arm/neon/vp9_loopfilter_8_neon.c
rename to vpx_dsp/arm/loopfilter_8_neon.c
index 33068a8..a887e2e 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_8_neon.c
+++ b/vpx_dsp/arm/loopfilter_8_neon.c
@@ -10,9 +10,9 @@
#include <arm_neon.h>
-#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
-static INLINE void vp9_mbloop_filter_neon(
+static INLINE void mbloop_filter_neon(
uint8x8_t dblimit, // mblimit
uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh
@@ -263,12 +263,12 @@
return;
}
-void vp9_lpf_horizontal_8_neon(
- unsigned char *src,
+void vpx_lpf_horizontal_8_neon(
+ uint8_t *src,
int pitch,
- unsigned char *blimit,
- unsigned char *limit,
- unsigned char *thresh,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
int count) {
int i;
uint8_t *s, *psrc;
@@ -276,7 +276,7 @@
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
- if (count == 0) // end_vp9_mblf_h_edge
+ if (count == 0) // end_vpx_mblf_h_edge
return;
dblimit = vld1_u8(blimit);
@@ -303,9 +303,9 @@
s += pitch;
d18u8 = vld1_u8(s);
- vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
+ mbloop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
s -= (pitch * 6);
vst1_u8(s, d0u8);
@@ -323,12 +323,12 @@
return;
}
-void vp9_lpf_vertical_8_neon(
- unsigned char *src,
+void vpx_lpf_vertical_8_neon(
+ uint8_t *src,
int pitch,
- unsigned char *blimit,
- unsigned char *limit,
- unsigned char *thresh,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
int count) {
int i;
uint8_t *s;
@@ -403,9 +403,9 @@
d17u8 = d2tmp11.val[0];
d18u8 = d2tmp11.val[1];
- vp9_mbloop_filter_neon(dblimit, dlimit, dthresh,
- d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
- &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
+ mbloop_filter_neon(dblimit, dlimit, dthresh,
+ d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8,
+ &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8);
d4Result.val[0] = d0u8;
d4Result.val[1] = d1u8;
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vpx_dsp/arm/loopfilter_mb_neon.asm
similarity index 96%
rename from vp9/common/arm/neon/vp9_mb_lpf_neon.asm
rename to vpx_dsp/arm/loopfilter_mb_neon.asm
index 5fe2bba..20d9cfb 100644
--- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm
@@ -8,13 +8,13 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_lpf_horizontal_16_neon|
- EXPORT |vp9_lpf_vertical_16_neon|
+ EXPORT |vpx_lpf_horizontal_16_neon|
+ EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
+; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh
@@ -24,7 +24,7 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_lpf_horizontal_16_neon| PROC
+|vpx_lpf_horizontal_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -54,7 +54,7 @@
vld1.u8 {d14}, [r8@64], r1 ; q6
vld1.u8 {d15}, [r8@64], r1 ; q7
- bl vp9_wide_mbfilter_neon
+ bl vpx_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
@@ -115,9 +115,9 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_lpf_horizontal_16_neon|
+ ENDP ; |vpx_lpf_horizontal_16_neon|
-; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
+; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh)
@@ -126,7 +126,7 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_lpf_vertical_16_neon| PROC
+|vpx_lpf_vertical_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -176,7 +176,7 @@
vtrn.8 d12, d13
vtrn.8 d14, d15
- bl vp9_wide_mbfilter_neon
+ bl vpx_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
@@ -279,9 +279,9 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_lpf_vertical_16_neon|
+ ENDP ; |vpx_lpf_vertical_16_neon|
-; void vp9_wide_mbfilter_neon();
+; void vpx_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
; necessary load, transpose (if necessary) and store.
;
@@ -305,7 +305,7 @@
; d13 q5
; d14 q6
; d15 q7
-|vp9_wide_mbfilter_neon| PROC
+|vpx_wide_mbfilter_neon| PROC
mov r7, #0
; filter_mask
@@ -601,6 +601,6 @@
vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m)
bx lr
- ENDP ; |vp9_wide_mbfilter_neon|
+ ENDP ; |vpx_wide_mbfilter_neon|
END
diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.c b/vpx_dsp/arm/loopfilter_neon.c
similarity index 70%
rename from vp9/common/arm/neon/vp9_loopfilter_neon.c
rename to vpx_dsp/arm/loopfilter_neon.c
index 31fcc63..eff87d2 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_neon.c
+++ b/vpx_dsp/arm/loopfilter_neon.c
@@ -10,49 +10,49 @@
#include <arm_neon.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
#if HAVE_NEON_ASM
-void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
+void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
- vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
- vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
#endif // HAVE_NEON_ASM
diff --git a/vp9/decoder/vp9_reader.c b/vpx_dsp/bitreader.c
similarity index 79%
rename from vp9/decoder/vp9_reader.c
rename to vpx_dsp/bitreader.c
index 9a46cd7..4420fad 100644
--- a/vp9/decoder/vp9_reader.c
+++ b/vpx_dsp/bitreader.c
@@ -9,23 +9,15 @@
*/
#include <stdlib.h>
+#include "./vpx_config.h"
+
+#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/prob.h"
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
-
-#include "./vpx_config.h"
-#include "vp9/decoder/vp9_reader.h"
-
#include "vpx_util/endian_inl.h"
-#if CONFIG_BIG_ENDIAN
-#define BIGENDIFY64(X) (X)
-#define BIGENDIFY32(X) (X)
-#else
-#define BIGENDIFY64(X) BSwap64(X)
-#define BIGENDIFY32(X) BSwap32(X)
-#endif
-
-int vp9_reader_init(vp9_reader *r,
+int vpx_reader_init(vpx_reader *r,
const uint8_t *buffer,
size_t size,
vpx_decrypt_cb decrypt_cb,
@@ -40,12 +32,12 @@
r->range = 255;
r->decrypt_cb = decrypt_cb;
r->decrypt_state = decrypt_state;
- vp9_reader_fill(r);
- return vp9_read_bit(r) != 0; // marker bit
+ vpx_reader_fill(r);
+ return vpx_read_bit(r) != 0; // marker bit
}
}
-void vp9_reader_fill(vp9_reader *r) {
+void vpx_reader_fill(vpx_reader *r) {
const uint8_t *const buffer_end = r->buffer_end;
const uint8_t *buffer = r->buffer;
const uint8_t *buffer_start = buffer;
@@ -62,13 +54,16 @@
buffer_start = r->clear_buffer;
}
if (bits_left > BD_VALUE_SIZE) {
-#if UINTPTR_MAX == 0xffffffffffffffff
- BD_VALUE big_endian_values = BIGENDIFY64(*((const BD_VALUE *) buffer));
-#else
- BD_VALUE big_endian_values = BIGENDIFY32(*((const BD_VALUE *) buffer));
-#endif
const int bits = (shift & 0xfffffff8) + CHAR_BIT;
- const BD_VALUE nv = big_endian_values >> (BD_VALUE_SIZE - bits);
+ BD_VALUE nv;
+ BD_VALUE big_endian_values;
+ memcpy(&big_endian_values, buffer, sizeof(BD_VALUE));
+#if SIZE_MAX == 0xffffffffffffffffULL
+ big_endian_values = HToBE64(big_endian_values);
+#else
+ big_endian_values = HToBE32(big_endian_values);
+#endif
+ nv = big_endian_values >> (BD_VALUE_SIZE - bits);
count += bits;
buffer += (bits >> 3);
value = r->value | (nv << (shift & 0x7));
@@ -97,7 +92,7 @@
r->count = count;
}
-const uint8_t *vp9_reader_find_end(vp9_reader *r) {
+const uint8_t *vpx_reader_find_end(vpx_reader *r) {
// Find the end of the coded buffer
while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) {
r->count -= CHAR_BIT;
diff --git a/vp9/decoder/vp9_reader.h b/vpx_dsp/bitreader.h
similarity index 77%
rename from vp9/decoder/vp9_reader.h
rename to vpx_dsp/bitreader.h
index 4959985..e817c8b 100644
--- a/vp9/decoder/vp9_reader.h
+++ b/vpx_dsp/bitreader.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_READER_H_
-#define VP9_DECODER_VP9_READER_H_
+#ifndef VPX_DSP_BITREADER_H_
+#define VPX_DSP_BITREADER_H_
#include <stddef.h>
#include <limits.h>
@@ -18,8 +18,7 @@
#include "vpx_ports/mem.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_integer.h"
-
-#include "vp9/common/vp9_prob.h"
+#include "vpx_dsp/prob.h"
#ifdef __cplusplus
extern "C" {
@@ -44,19 +43,19 @@
vpx_decrypt_cb decrypt_cb;
void *decrypt_state;
uint8_t clear_buffer[sizeof(BD_VALUE) + 1];
-} vp9_reader;
+} vpx_reader;
-int vp9_reader_init(vp9_reader *r,
+int vpx_reader_init(vpx_reader *r,
const uint8_t *buffer,
size_t size,
vpx_decrypt_cb decrypt_cb,
void *decrypt_state);
-void vp9_reader_fill(vp9_reader *r);
+void vpx_reader_fill(vpx_reader *r);
-const uint8_t *vp9_reader_find_end(vp9_reader *r);
+const uint8_t *vpx_reader_find_end(vpx_reader *r);
-static INLINE int vp9_reader_has_error(vp9_reader *r) {
+static INLINE int vpx_reader_has_error(vpx_reader *r) {
// Check if we have reached the end of the buffer.
//
// Variable 'count' stores the number of bits in the 'value' buffer, minus
@@ -74,7 +73,7 @@
return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS;
}
-static INLINE int vp9_read(vp9_reader *r, int prob) {
+static INLINE int vpx_read(vpx_reader *r, int prob) {
unsigned int bit = 0;
BD_VALUE value;
BD_VALUE bigsplit;
@@ -83,7 +82,7 @@
unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT;
if (r->count < 0)
- vp9_reader_fill(r);
+ vpx_reader_fill(r);
value = r->value;
count = r->count;
@@ -99,7 +98,7 @@
}
{
- register unsigned int shift = vp9_norm[range];
+ register unsigned int shift = vpx_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
@@ -111,24 +110,24 @@
return bit;
}
-static INLINE int vp9_read_bit(vp9_reader *r) {
- return vp9_read(r, 128); // vp9_prob_half
+static INLINE int vpx_read_bit(vpx_reader *r) {
+ return vpx_read(r, 128); // vpx_prob_half
}
-static INLINE int vp9_read_literal(vp9_reader *r, int bits) {
+static INLINE int vpx_read_literal(vpx_reader *r, int bits) {
int literal = 0, bit;
for (bit = bits - 1; bit >= 0; bit--)
- literal |= vp9_read_bit(r) << bit;
+ literal |= vpx_read_bit(r) << bit;
return literal;
}
-static INLINE int vp9_read_tree(vp9_reader *r, const vp9_tree_index *tree,
- const vp9_prob *probs) {
- vp9_tree_index i = 0;
+static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree,
+ const vpx_prob *probs) {
+ vpx_tree_index i = 0;
- while ((i = tree[i + vp9_read(r, probs[i >> 1])]) > 0)
+ while ((i = tree[i + vpx_read(r, probs[i >> 1])]) > 0)
continue;
return -i;
@@ -138,4 +137,4 @@
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_READER_H_
+#endif // VPX_DSP_BITREADER_H_
diff --git a/vp9/decoder/vp9_read_bit_buffer.c b/vpx_dsp/bitreader_buffer.c
similarity index 67%
rename from vp9/decoder/vp9_read_bit_buffer.c
rename to vpx_dsp/bitreader_buffer.c
index c3b38a9..fb04ee6 100644
--- a/vp9/decoder/vp9_read_bit_buffer.c
+++ b/vpx_dsp/bitreader_buffer.c
@@ -7,13 +7,13 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/decoder/vp9_read_bit_buffer.h"
+#include "./bitreader_buffer.h"
-size_t vp9_rb_bytes_read(struct vp9_read_bit_buffer *rb) {
+size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) {
return (rb->bit_offset + 7) >> 3;
}
-int vp9_rb_read_bit(struct vp9_read_bit_buffer *rb) {
+int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) {
const size_t off = rb->bit_offset;
const size_t p = off >> 3;
const int q = 7 - (int)(off & 0x7);
@@ -27,15 +27,15 @@
}
}
-int vp9_rb_read_literal(struct vp9_read_bit_buffer *rb, int bits) {
+int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits) {
int value = 0, bit;
for (bit = bits - 1; bit >= 0; bit--)
- value |= vp9_rb_read_bit(rb) << bit;
+ value |= vpx_rb_read_bit(rb) << bit;
return value;
}
-int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb,
+int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb,
int bits) {
- const int value = vp9_rb_read_literal(rb, bits);
- return vp9_rb_read_bit(rb) ? -value : value;
+ const int value = vpx_rb_read_literal(rb, bits);
+ return vpx_rb_read_bit(rb) ? -value : value;
}
diff --git a/vpx_dsp/bitreader_buffer.h b/vpx_dsp/bitreader_buffer.h
new file mode 100644
index 0000000..03b156b
--- /dev/null
+++ b/vpx_dsp/bitreader_buffer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_BITREADER_BUFFER_H_
+#define VPX_DSP_BITREADER_BUFFER_H_
+
+#include <limits.h>
+
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*vpx_rb_error_handler)(void *data);
+
+struct vpx_read_bit_buffer {
+ const uint8_t *bit_buffer;
+ const uint8_t *bit_buffer_end;
+ size_t bit_offset;
+
+ void *error_handler_data;
+ vpx_rb_error_handler error_handler;
+};
+
+size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb);
+
+int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb);
+
+int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits);
+
+int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_BITREADER_BUFFER_H_
diff --git a/vp9/encoder/vp9_writer.c b/vpx_dsp/bitwriter.c
similarity index 76%
rename from vp9/encoder/vp9_writer.c
rename to vpx_dsp/bitwriter.c
index ff461f2..5b232e3 100644
--- a/vp9/encoder/vp9_writer.c
+++ b/vpx_dsp/bitwriter.c
@@ -9,23 +9,23 @@
*/
#include <assert.h>
-#include "vp9/encoder/vp9_writer.h"
-#include "vp9/common/vp9_entropy.h"
-void vp9_start_encode(vp9_writer *br, uint8_t *source) {
+#include "./bitwriter.h"
+
+void vpx_start_encode(vpx_writer *br, uint8_t *source) {
br->lowvalue = 0;
br->range = 255;
br->count = -24;
br->buffer = source;
br->pos = 0;
- vp9_write_bit(br, 0);
+ vpx_write_bit(br, 0);
}
-void vp9_stop_encode(vp9_writer *br) {
+void vpx_stop_encode(vpx_writer *br) {
int i;
for (i = 0; i < 32; i++)
- vp9_write_bit(br, 0);
+ vpx_write_bit(br, 0);
// Ensure there's no ambigous collision with any index marker bytes
if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0)
diff --git a/vp9/encoder/vp9_writer.h b/vpx_dsp/bitwriter.h
similarity index 70%
rename from vp9/encoder/vp9_writer.h
rename to vpx_dsp/bitwriter.h
index e347ea4..f6ca9b9 100644
--- a/vp9/encoder/vp9_writer.h
+++ b/vpx_dsp/bitwriter.h
@@ -8,29 +8,29 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_WRITER_H_
-#define VP9_ENCODER_VP9_WRITER_H_
+#ifndef VPX_DSP_BITWRITER_H_
+#define VPX_DSP_BITWRITER_H_
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_prob.h"
+#include "vpx_dsp/prob.h"
#ifdef __cplusplus
extern "C" {
#endif
-typedef struct vp9_writer {
+typedef struct vpx_writer {
unsigned int lowvalue;
unsigned int range;
int count;
unsigned int pos;
uint8_t *buffer;
-} vp9_writer;
+} vpx_writer;
-void vp9_start_encode(vp9_writer *bc, uint8_t *buffer);
-void vp9_stop_encode(vp9_writer *bc);
+void vpx_start_encode(vpx_writer *bc, uint8_t *buffer);
+void vpx_stop_encode(vpx_writer *bc);
-static INLINE void vp9_write(vp9_writer *br, int bit, int probability) {
+static INLINE void vpx_write(vpx_writer *br, int bit, int probability) {
unsigned int split;
int count = br->count;
unsigned int range = br->range;
@@ -46,7 +46,7 @@
range = br->range - split;
}
- shift = vp9_norm[range];
+ shift = vpx_norm[range];
range <<= shift;
count += shift;
@@ -78,21 +78,21 @@
br->range = range;
}
-static INLINE void vp9_write_bit(vp9_writer *w, int bit) {
- vp9_write(w, bit, 128); // vp9_prob_half
+static INLINE void vpx_write_bit(vpx_writer *w, int bit) {
+ vpx_write(w, bit, 128); // vpx_prob_half
}
-static INLINE void vp9_write_literal(vp9_writer *w, int data, int bits) {
+static INLINE void vpx_write_literal(vpx_writer *w, int data, int bits) {
int bit;
for (bit = bits - 1; bit >= 0; bit--)
- vp9_write_bit(w, 1 & (data >> bit));
+ vpx_write_bit(w, 1 & (data >> bit));
}
-#define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8)
+#define vpx_write_prob(w, v) vpx_write_literal((w), (v), 8)
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_WRITER_H_
+#endif // VPX_DSP_BITWRITER_H_
diff --git a/vp9/encoder/vp9_write_bit_buffer.c b/vpx_dsp/bitwriter_buffer.c
similarity index 75%
rename from vp9/encoder/vp9_write_bit_buffer.c
rename to vpx_dsp/bitwriter_buffer.c
index 6d55e84..0dfb859 100644
--- a/vp9/encoder/vp9_write_bit_buffer.c
+++ b/vpx_dsp/bitwriter_buffer.c
@@ -9,13 +9,14 @@
*/
#include <limits.h>
-#include "vp9/encoder/vp9_write_bit_buffer.h"
-size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb) {
+#include "./bitwriter_buffer.h"
+
+size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb) {
return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0);
}
-void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) {
+void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit) {
const int off = (int)wb->bit_offset;
const int p = off / CHAR_BIT;
const int q = CHAR_BIT - 1 - off % CHAR_BIT;
@@ -28,8 +29,8 @@
wb->bit_offset = off + 1;
}
-void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) {
+void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits) {
int bit;
for (bit = bits - 1; bit >= 0; bit--)
- vp9_wb_write_bit(wb, (data >> bit) & 1);
+ vpx_wb_write_bit(wb, (data >> bit) & 1);
}
diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vpx_dsp/bitwriter_buffer.h
similarity index 62%
rename from vp9/encoder/vp9_write_bit_buffer.h
rename to vpx_dsp/bitwriter_buffer.h
index 59f9bbe..9397668 100644
--- a/vp9/encoder/vp9_write_bit_buffer.h
+++ b/vpx_dsp/bitwriter_buffer.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
-#define VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
+#ifndef VPX_DSP_BITWRITER_BUFFER_H_
+#define VPX_DSP_BITWRITER_BUFFER_H_
#include "vpx/vpx_integer.h"
@@ -17,20 +17,20 @@
extern "C" {
#endif
-struct vp9_write_bit_buffer {
+struct vpx_write_bit_buffer {
uint8_t *bit_buffer;
size_t bit_offset;
};
-size_t vp9_wb_bytes_written(const struct vp9_write_bit_buffer *wb);
+size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb);
-void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit);
+void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit);
-void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits);
+void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits);
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_
+#endif // VPX_DSP_BITWRITER_BUFFER_H_
diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c
new file mode 100644
index 0000000..bdb55ee
--- /dev/null
+++ b/vpx_dsp/fwd_txfm.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_dsp/fwd_txfm.h"
+
+void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we transpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ int pass;
+ // We need an intermediate buffer between passes.
+ tran_low_t intermediate[4 * 4];
+ const int16_t *in_pass0 = input;
+ const tran_low_t *in = NULL;
+ tran_low_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t input[4]; // canbe16
+ tran_high_t step[4]; // canbe16
+ tran_high_t temp1, temp2; // needs32
+ int i;
+ for (i = 0; i < 4; ++i) {
+ // Load inputs.
+ if (0 == pass) {
+ input[0] = in_pass0[0 * stride] * 16;
+ input[1] = in_pass0[1 * stride] * 16;
+ input[2] = in_pass0[2 * stride] * 16;
+ input[3] = in_pass0[3 * stride] * 16;
+ if (i == 0 && input[0]) {
+ input[0] += 1;
+ }
+ } else {
+ input[0] = in[0 * 4];
+ input[1] = in[1 * 4];
+ input[2] = in[2 * 4];
+ input[3] = in[3 * 4];
+ }
+ // Transform.
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ out[0] = (tran_low_t)fdct_round_shift(temp1);
+ out[2] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ out[1] = (tran_low_t)fdct_round_shift(temp1);
+ out[3] = (tran_low_t)fdct_round_shift(temp2);
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in_pass0++;
+ in++;
+ out += 4;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
+ }
+
+ {
+ int i, j;
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
+ }
+ }
+}
+
+void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
+ int i, j;
+ tran_low_t intermediate[64];
+ int pass;
+ tran_low_t *output = intermediate;
+ const tran_low_t *in = NULL;
+
+ // Transform columns
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ if (pass == 0) {
+ s0 = (input[0 * stride] + input[7 * stride]) * 4;
+ s1 = (input[1 * stride] + input[6 * stride]) * 4;
+ s2 = (input[2 * stride] + input[5 * stride]) * 4;
+ s3 = (input[3 * stride] + input[4 * stride]) * 4;
+ s4 = (input[3 * stride] - input[4 * stride]) * 4;
+ s5 = (input[2 * stride] - input[5 * stride]) * 4;
+ s6 = (input[1 * stride] - input[6 * stride]) * 4;
+ s7 = (input[0 * stride] - input[7 * stride]) * 4;
+ ++input;
+ } else {
+ s0 = in[0 * 8] + in[7 * 8];
+ s1 = in[1 * 8] + in[6 * 8];
+ s2 = in[2 * 8] + in[5 * 8];
+ s3 = in[3 * 8] + in[4 * 8];
+ s4 = in[3 * 8] - in[4 * 8];
+ s5 = in[2 * 8] - in[5 * 8];
+ s6 = in[1 * 8] - in[6 * 8];
+ s7 = in[0 * 8] - in[7 * 8];
+ ++in;
+ }
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0] = (tran_low_t)fdct_round_shift(t0);
+ output[2] = (tran_low_t)fdct_round_shift(t2);
+ output[4] = (tran_low_t)fdct_round_shift(t1);
+ output[6] = (tran_low_t)fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1] = (tran_low_t)fdct_round_shift(t0);
+ output[3] = (tran_low_t)fdct_round_shift(t2);
+ output[5] = (tran_low_t)fdct_round_shift(t1);
+ output[7] = (tran_low_t)fdct_round_shift(t3);
+ output += 8;
+ }
+ in = intermediate;
+ output = final_output;
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ final_output[j + i * 8] /= 2;
+ }
+}
+
+void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we transpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ int pass;
+ // We need an intermediate buffer between passes.
+ tran_low_t intermediate[256];
+ const int16_t *in_pass0 = input;
+ const tran_low_t *in = NULL;
+ tran_low_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t step1[8]; // canbe16
+ tran_high_t step2[8]; // canbe16
+ tran_high_t step3[8]; // canbe16
+ tran_high_t input[8]; // canbe16
+ tran_high_t temp1, temp2; // needs32
+ int i;
+ for (i = 0; i < 16; i++) {
+ if (0 == pass) {
+ // Calculate input for the first 8 results.
+ input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
+ input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
+ input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
+ input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
+ input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
+ input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
+ input[6] = (in_pass0[6 * stride] + in_pass0[ 9 * stride]) * 4;
+ input[7] = (in_pass0[7 * stride] + in_pass0[ 8 * stride]) * 4;
+ // Calculate input for the next 8 results.
+ step1[0] = (in_pass0[7 * stride] - in_pass0[ 8 * stride]) * 4;
+ step1[1] = (in_pass0[6 * stride] - in_pass0[ 9 * stride]) * 4;
+ step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
+ step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
+ step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
+ step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
+ step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
+ step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+ } else {
+ // Calculate input for the first 8 results.
+ input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
+ input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
+ input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
+ input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
+ input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
+ input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
+ input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2);
+ input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2);
+ // Calculate input for the next 8 results.
+ step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2);
+ step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2);
+ step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
+ step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
+ step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
+ step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
+ step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
+ step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+ }
+ // Work on the first eight values; fdct8(input, even_results);
+ {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+ t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+ out[0] = (tran_low_t)fdct_round_shift(t0);
+ out[4] = (tran_low_t)fdct_round_shift(t2);
+ out[8] = (tran_low_t)fdct_round_shift(t1);
+ out[12] = (tran_low_t)fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ out[2] = (tran_low_t)fdct_round_shift(t0);
+ out[6] = (tran_low_t)fdct_round_shift(t2);
+ out[10] = (tran_low_t)fdct_round_shift(t1);
+ out[14] = (tran_low_t)fdct_round_shift(t3);
+ }
+ // Work on the next eight values; step1 -> odd_results
+ {
+ // step 2
+ temp1 = (step1[5] - step1[2]) * cospi_16_64;
+ temp2 = (step1[4] - step1[3]) * cospi_16_64;
+ step2[2] = fdct_round_shift(temp1);
+ step2[3] = fdct_round_shift(temp2);
+ temp1 = (step1[4] + step1[3]) * cospi_16_64;
+ temp2 = (step1[5] + step1[2]) * cospi_16_64;
+ step2[4] = fdct_round_shift(temp1);
+ step2[5] = fdct_round_shift(temp2);
+ // step 3
+ step3[0] = step1[0] + step2[3];
+ step3[1] = step1[1] + step2[2];
+ step3[2] = step1[1] - step2[2];
+ step3[3] = step1[0] - step2[3];
+ step3[4] = step1[7] - step2[4];
+ step3[5] = step1[6] - step2[5];
+ step3[6] = step1[6] + step2[5];
+ step3[7] = step1[7] + step2[4];
+ // step 4
+ temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+ temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
+ step2[1] = fdct_round_shift(temp1);
+ step2[2] = fdct_round_shift(temp2);
+ temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
+ temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+ step2[5] = fdct_round_shift(temp1);
+ step2[6] = fdct_round_shift(temp2);
+ // step 5
+ step1[0] = step3[0] + step2[1];
+ step1[1] = step3[0] - step2[1];
+ step1[2] = step3[3] + step2[2];
+ step1[3] = step3[3] - step2[2];
+ step1[4] = step3[4] - step2[5];
+ step1[5] = step3[4] + step2[5];
+ step1[6] = step3[7] - step2[6];
+ step1[7] = step3[7] + step2[6];
+ // step 6
+ temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+ temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+ out[1] = (tran_low_t)fdct_round_shift(temp1);
+ out[9] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+ temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+ out[5] = (tran_low_t)fdct_round_shift(temp1);
+ out[13] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+ temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+ out[3] = (tran_low_t)fdct_round_shift(temp1);
+ out[11] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+ temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+ out[7] = (tran_low_t)fdct_round_shift(temp1);
+ out[15] = (tran_low_t)fdct_round_shift(temp2);
+ }
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in++;
+ in_pass0++;
+ out += 16;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp9_fdct4x4_c(input, output, stride);
+}
+
+void vp9_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+ int stride) {
+ vp9_fdct8x8_c(input, final_output, stride);
+}
+
+void vp9_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp9_fdct16x16_c(input, output, stride);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/fwd_txfm.h b/vpx_dsp/fwd_txfm.h
new file mode 100644
index 0000000..fa54403
--- /dev/null
+++ b/vpx_dsp/fwd_txfm.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/vp9_idct.h"
+
+static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
+ tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ // TODO(debargha, peter.derivaz): Find new bounds for this assert
+ // and make the bounds consts.
+ // assert(INT16_MIN <= rv && rv <= INT16_MAX);
+ return rv;
+}
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vpx_dsp/loopfilter.c
similarity index 91%
rename from vp9/common/vp9_loopfilter_filters.c
rename to vpx_dsp/loopfilter.c
index 3cf4c32..dc8aca5 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vpx_dsp/loopfilter.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -9,10 +9,8 @@
*/
#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
static INLINE int8_t signed_char_clamp(int t) {
return (int8_t)clamp(t, -128, 127);
@@ -117,7 +115,7 @@
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
-void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count) {
int i;
@@ -134,15 +132,15 @@
}
}
-void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -159,12 +157,12 @@
}
}
-void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -189,7 +187,7 @@
}
}
-void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -209,15 +207,15 @@
}
}
-void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -234,12 +232,12 @@
}
}
-void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -294,7 +292,7 @@
}
}
-void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
int i;
@@ -343,12 +341,12 @@
}
}
-void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
}
-void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
}
@@ -448,7 +446,7 @@
*op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift);
}
-void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
+void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int count, int bd) {
int i;
@@ -471,7 +469,7 @@
}
}
-void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -479,11 +477,11 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
-void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -500,7 +498,7 @@
}
}
-void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
+void vpx_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -508,8 +506,8 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@@ -534,7 +532,7 @@
}
}
-void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -556,7 +554,7 @@
}
}
-void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -564,11 +562,11 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
}
-void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -588,7 +586,7 @@
}
}
-void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
+void vpx_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -596,8 +594,8 @@
const uint8_t *limit1,
const uint8_t *thresh1,
int bd) {
- vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
- vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
+ vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1, bd);
}
@@ -664,7 +662,7 @@
}
}
-void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count, int bd) {
int i;
@@ -729,13 +727,13 @@
}
}
-void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
+void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int bd) {
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
}
-void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
diff --git a/vpx_dsp/mips/common_dspr2.h b/vpx_dsp/mips/common_dspr2.h
new file mode 100644
index 0000000..8278101
--- /dev/null
+++ b/vpx_dsp/mips/common_dspr2.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_COMMON_MIPS_DSPR2_H_
+#define VPX_COMMON_MIPS_DSPR2_H_
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if HAVE_DSPR2
+#define CROP_WIDTH 512
+
+static INLINE void prefetch_load(const unsigned char *src) {
+ __asm__ __volatile__ (
+ "pref 0, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+/* prefetch data for store */
+static INLINE void prefetch_store(unsigned char *dst) {
+ __asm__ __volatile__ (
+ "pref 1, 0(%[dst]) \n\t"
+ :
+ : [dst] "r" (dst)
+ );
+}
+
+static INLINE void prefetch_load_streamed(const unsigned char *src) {
+ __asm__ __volatile__ (
+ "pref 4, 0(%[src]) \n\t"
+ :
+ : [src] "r" (src)
+ );
+}
+
+/* prefetch data for store */
+static INLINE void prefetch_store_streamed(unsigned char *dst) {
+ __asm__ __volatile__ (
+ "pref 5, 0(%[dst]) \n\t"
+ :
+ : [dst] "r" (dst)
+ );
+}
+#endif // #if HAVE_DSPR2
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_COMMON_MIPS_DSPR2_H_
diff --git a/vp9/common/mips/msa/vp9_loopfilter_16_msa.c b/vpx_dsp/mips/loopfilter_16_msa.c
similarity index 95%
rename from vp9/common/mips/msa/vp9_loopfilter_16_msa.c
rename to vpx_dsp/mips/loopfilter_16_msa.c
index aeaa48e..b7c9f7b 100644
--- a/vp9/common/mips/msa/vp9_loopfilter_16_msa.c
+++ b/vpx_dsp/mips/loopfilter_16_msa.c
@@ -9,9 +9,9 @@
*/
#include "vpx_ports/mem.h"
-#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+#include "vpx_dsp/mips/loopfilter_msa.h"
-int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
uint8_t *filter48,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -79,7 +79,7 @@
}
}
-void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
+void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@@ -405,7 +405,7 @@
}
}
-void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -415,15 +415,15 @@
(void)count;
- early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
+ early_exit = vpx_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- vp9_hz_lpf_t16_16w(src, pitch, filter48);
+ vpx_hz_lpf_t16_16w(src, pitch, filter48);
}
}
-void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -643,13 +643,13 @@
}
}
} else {
- vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
+ vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
thresh_ptr, count);
}
}
-static void vp9_transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
- uint8_t *output, int32_t out_pitch) {
+static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@@ -673,8 +673,8 @@
ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
}
-static void vp9_transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
- uint8_t *output, int32_t out_pitch) {
+static void transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@@ -685,8 +685,8 @@
ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
}
-static void vp9_transpose_16x16(uint8_t *input, int32_t in_pitch,
- uint8_t *output, int32_t out_pitch) {
+static void transpose_16x16(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
@@ -744,7 +744,7 @@
ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
}
-int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
+int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch_org,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -812,7 +812,7 @@
}
}
-int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16i8 zero = { 0 };
v16u8 filter8, flat, flat2;
@@ -1032,7 +1032,7 @@
}
}
-void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@@ -1040,23 +1040,23 @@
DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
uint8_t *filter48 = &transposed_input[16 * 16];
- vp9_transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
+ transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
- early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
+ early_exit = vpx_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
+ early_exit = vpx_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {
- vp9_transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
+ transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
}
}
}
-int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
+int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
uint8_t *src_org, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
@@ -1134,7 +1134,7 @@
}
}
-int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
uint8_t *filter48) {
v16u8 flat, flat2, filter8;
v16i8 zero = { 0 };
@@ -1455,7 +1455,7 @@
}
}
-void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr) {
@@ -1463,18 +1463,18 @@
DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
uint8_t *filter48 = &transposed_input[16 * 16];
- vp9_transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
+ transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
- early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
+ early_exit = vpx_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
&filter48[0], src, pitch, b_limit_ptr,
limit_ptr, thresh_ptr);
if (0 == early_exit) {
- early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
+ early_exit = vpx_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
&filter48[0]);
if (0 == early_exit) {
- vp9_transpose_16x16(transposed_input, 16, (src - 8), pitch);
+ transpose_16x16(transposed_input, 16, (src - 8), pitch);
}
}
}
diff --git a/vp9/common/mips/msa/vp9_loopfilter_4_msa.c b/vpx_dsp/mips/loopfilter_4_msa.c
similarity index 94%
rename from vp9/common/mips/msa/vp9_loopfilter_4_msa.c
rename to vpx_dsp/mips/loopfilter_4_msa.c
index 7f69135..daf5f38 100644
--- a/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
+++ b/vpx_dsp/mips/loopfilter_4_msa.c
@@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+#include "vpx_dsp/mips/loopfilter_msa.h"
-void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -39,7 +39,7 @@
SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
}
-void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
@@ -71,7 +71,7 @@
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
}
-void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -102,7 +102,7 @@
ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
}
-void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0_ptr,
const uint8_t *limit0_ptr,
const uint8_t *thresh0_ptr,
diff --git a/vp9/common/mips/msa/vp9_loopfilter_8_msa.c b/vpx_dsp/mips/loopfilter_8_msa.c
similarity index 97%
rename from vp9/common/mips/msa/vp9_loopfilter_8_msa.c
rename to vpx_dsp/mips/loopfilter_8_msa.c
index 26a858d..00b6db5 100644
--- a/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
+++ b/vpx_dsp/mips/loopfilter_8_msa.c
@@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+#include "vpx_dsp/mips/loopfilter_msa.h"
-void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -83,7 +83,7 @@
}
}
-void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -158,7 +158,7 @@
}
}
-void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
@@ -237,7 +237,7 @@
}
}
-void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
+void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit0,
const uint8_t *limit0,
const uint8_t *thresh0,
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vpx_dsp/mips/loopfilter_filters_dspr2.c
similarity index 85%
rename from vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
rename to vpx_dsp/mips/loopfilter_filters_dspr2.c
index 3df7f4c..99a96d8 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -50,7 +49,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
/* loop filter designed to work using chars so that we can make maximum use
of 8 bit simd instructions. */
@@ -88,14 +87,14 @@
: [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6)
);
- vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
- pm1, p0, p3, p4, p5, p6,
- thresh_vec, &hev, &mask);
+ filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2,
+ pm1, p0, p3, p4, p5, p6,
+ thresh_vec, &hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
- vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
+ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
__asm__ __volatile__ (
"sw %[p1], (%[s1]) \n\t"
@@ -114,7 +113,7 @@
}
}
-void vp9_lpf_vertical_4_dspr2(unsigned char *s,
+void vpx_lpf_vertical_4_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -144,7 +143,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -217,14 +216,14 @@
* mask will be zero and filtering is not needed
*/
if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
- vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
- p0, p3, p4, p5, p6, thresh_vec,
- &hev, &mask);
+ filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1,
+ p0, p3, p4, p5, p6, thresh_vec,
+ &hev, &mask);
/* if mask == 0 do filtering is not needed */
if (mask) {
/* filtering */
- vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
+ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
/* unpack processed 4x4 neighborhood
* don't use transpose on output data
@@ -307,56 +306,56 @@
}
}
-void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
+void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
- vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
+ vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
1);
}
-void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
+void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
- vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
- vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
+ vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
}
#endif // #if HAVE_DSPR2
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/vpx_dsp/mips/loopfilter_filters_dspr2.h
similarity index 85%
rename from vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
rename to vpx_dsp/mips/loopfilter_filters_dspr2.h
index 675db65..4a1506b 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.h
@@ -13,10 +13,10 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
#ifdef __cplusplus
extern "C" {
@@ -24,10 +24,10 @@
#if HAVE_DSPR2
/* inputs & outputs are quad-byte vectors */
-static INLINE void vp9_filter_dspr2(uint32_t mask, uint32_t hev,
- uint32_t *ps1, uint32_t *ps0,
- uint32_t *qs0, uint32_t *qs1) {
- int32_t vp9_filter_l, vp9_filter_r;
+static INLINE void filter_dspr2(uint32_t mask, uint32_t hev,
+ uint32_t *ps1, uint32_t *ps0,
+ uint32_t *qs0, uint32_t *qs1) {
+ int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@@ -73,34 +73,34 @@
hev_r = hev_r & HWM;
__asm__ __volatile__ (
- /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
- /* vp9_filter &= hev; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
+ /* vpx_filter &= hev; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
- /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
- /* vp9_filter &= mask; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
+ /* vpx_filter &= mask; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
- : [vp9_filter_l] "=&r" (vp9_filter_l),
- [vp9_filter_r] "=&r" (vp9_filter_r),
+ : [vpx_filter_l] "=&r" (vpx_filter_l),
+ [vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@@ -113,13 +113,13 @@
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
- /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
+ /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
+ "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
- /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
+ /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@@ -142,23 +142,23 @@
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
- [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
+ [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
- /* (vp9_filter += 1) >>= 1 */
+ /* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
- /* vp9_filter &= ~hev; */
+ /* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
- /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
+ /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
- /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
+ /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@@ -196,12 +196,12 @@
*qs1 = vqs1 ^ N128;
}
-static INLINE void vp9_filter1_dspr2(uint32_t mask, uint32_t hev,
- uint32_t ps1, uint32_t ps0,
- uint32_t qs0, uint32_t qs1,
- uint32_t *p1_f0, uint32_t *p0_f0,
- uint32_t *q0_f0, uint32_t *q1_f0) {
- int32_t vp9_filter_l, vp9_filter_r;
+static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev,
+ uint32_t ps1, uint32_t ps0,
+ uint32_t qs0, uint32_t qs1,
+ uint32_t *p1_f0, uint32_t *p0_f0,
+ uint32_t *q0_f0, uint32_t *q1_f0) {
+ int32_t vpx_filter_l, vpx_filter_r;
int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
int32_t subr_r, subr_l;
uint32_t t1, t2, HWM, t3;
@@ -247,34 +247,34 @@
hev_r = hev_r & HWM;
__asm__ __volatile__ (
- /* vp9_filter = vp8_signed_char_clamp(ps1 - qs1); */
- "subq_s.ph %[vp9_filter_l], %[vps1_l], %[vqs1_l] \n\t"
- "subq_s.ph %[vp9_filter_r], %[vps1_r], %[vqs1_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */
+ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t"
+ "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t"
/* qs0 - ps0 */
"subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t"
"subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t"
- /* vp9_filter &= hev; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[hev_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[hev_r] \n\t"
+ /* vpx_filter &= hev; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t"
- /* vp9_filter = vp8_signed_char_clamp(vp9_filter + 3 * (qs0 - ps0)); */
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_l], %[hev_l], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
"xor %[invhev_r], %[hev_r], %[HWM] \n\t"
- "addq_s.ph %[vp9_filter_l], %[vp9_filter_l], %[subr_l] \n\t"
- "addq_s.ph %[vp9_filter_r], %[vp9_filter_r], %[subr_r] \n\t"
+ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t"
+ "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t"
- /* vp9_filter &= mask; */
- "and %[vp9_filter_l], %[vp9_filter_l], %[mask_l] \n\t"
- "and %[vp9_filter_r], %[vp9_filter_r], %[mask_r] \n\t"
+ /* vpx_filter &= mask; */
+ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t"
+ "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t"
- : [vp9_filter_l] "=&r" (vp9_filter_l),
- [vp9_filter_r] "=&r" (vp9_filter_r),
+ : [vpx_filter_l] "=&r" (vpx_filter_l),
+ [vpx_filter_r] "=&r" (vpx_filter_r),
[subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r),
[invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r)
: [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l),
@@ -286,13 +286,13 @@
/* save bottom 3 bits so that we round one side +4 and the other +3 */
__asm__ __volatile__ (
- /* Filter2 = vp8_signed_char_clamp(vp9_filter + 3) >>= 3; */
- "addq_s.ph %[Filter1_l], %[vp9_filter_l], %[t2] \n\t"
- "addq_s.ph %[Filter1_r], %[vp9_filter_r], %[t2] \n\t"
+ /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */
+ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t"
+ "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t"
- /* Filter1 = vp8_signed_char_clamp(vp9_filter + 4) >>= 3; */
- "addq_s.ph %[Filter2_l], %[vp9_filter_l], %[t1] \n\t"
- "addq_s.ph %[Filter2_r], %[vp9_filter_r], %[t1] \n\t"
+ /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */
+ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t"
+ "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t"
"shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t"
"shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t"
@@ -315,23 +315,23 @@
[vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r),
[vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r)
: [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM),
- [vp9_filter_l] "r" (vp9_filter_l), [vp9_filter_r] "r" (vp9_filter_r)
+ [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r)
);
__asm__ __volatile__ (
- /* (vp9_filter += 1) >>= 1 */
+ /* (vpx_filter += 1) >>= 1 */
"addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t"
"addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t"
- /* vp9_filter &= ~hev; */
+ /* vpx_filter &= ~hev; */
"and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t"
"and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t"
- /* vps1 = vp8_signed_char_clamp(ps1 + vp9_filter); */
+ /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */
"addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t"
"addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t"
- /* vqs1 = vp8_signed_char_clamp(qs1 - vp9_filter); */
+ /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */
"subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t"
"subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t"
@@ -369,10 +369,10 @@
*q1_f0 = vqs1 ^ N128;
}
-static INLINE void vp9_mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
- uint32_t *op1, uint32_t *op0,
- uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3) {
+static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2,
+ uint32_t *op1, uint32_t *op0,
+ uint32_t *oq0, uint32_t *oq1,
+ uint32_t *oq2, uint32_t *oq3) {
/* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
@@ -446,14 +446,14 @@
*oq2 = res_oq2;
}
-static INLINE void vp9_mbfilter1_dspr2(uint32_t p3, uint32_t p2,
- uint32_t p1, uint32_t p0,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t *op2_f1,
- uint32_t *op1_f1, uint32_t *op0_f1,
- uint32_t *oq0_f1, uint32_t *oq1_f1,
- uint32_t *oq2_f1) {
+static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2,
+ uint32_t p1, uint32_t p0,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t *op2_f1,
+ uint32_t *op1_f1, uint32_t *op0_f1,
+ uint32_t *oq0_f1, uint32_t *oq1_f1,
+ uint32_t *oq2_f1) {
/* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
uint32_t res_op2, res_op1, res_op0;
uint32_t res_oq0, res_oq1, res_oq2;
@@ -524,14 +524,14 @@
*oq2_f1 = res_oq2;
}
-static INLINE void vp9_wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
- uint32_t *op5, uint32_t *op4,
- uint32_t *op3, uint32_t *op2,
- uint32_t *op1, uint32_t *op0,
- uint32_t *oq0, uint32_t *oq1,
- uint32_t *oq2, uint32_t *oq3,
- uint32_t *oq4, uint32_t *oq5,
- uint32_t *oq6, uint32_t *oq7) {
+static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6,
+ uint32_t *op5, uint32_t *op4,
+ uint32_t *op3, uint32_t *op2,
+ uint32_t *op1, uint32_t *op0,
+ uint32_t *oq0, uint32_t *oq1,
+ uint32_t *oq2, uint32_t *oq3,
+ uint32_t *oq4, uint32_t *oq5,
+ uint32_t *oq6, uint32_t *oq7) {
const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4;
const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h b/vpx_dsp/mips/loopfilter_macros_dspr2.h
similarity index 99%
rename from vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
rename to vpx_dsp/mips/loopfilter_macros_dspr2.h
index ca01a6a..994ff18 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_macros_dspr2.h
@@ -13,9 +13,9 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#ifdef __cplusplus
extern "C" {
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h b/vpx_dsp/mips/loopfilter_masks_dspr2.h
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
rename to vpx_dsp/mips/loopfilter_masks_dspr2.h
index 5b0d9cc..e82dfb7 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h
+++ b/vpx_dsp/mips/loopfilter_masks_dspr2.h
@@ -13,9 +13,9 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_onyxc_int.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
#ifdef __cplusplus
extern "C" {
@@ -24,13 +24,13 @@
#if HAVE_DSPR2
/* processing 4 pixels at the same time
* compute hev and mask in the same function */
-static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
- uint32_t p1, uint32_t p0,
- uint32_t p3, uint32_t p2,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t thresh, uint32_t *hev,
- uint32_t *mask) {
+static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
+ uint32_t p1, uint32_t p0,
+ uint32_t p3, uint32_t p2,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t thresh, uint32_t *hev,
+ uint32_t *mask) {
uint32_t c, r, r3, r_k;
uint32_t s1, s2, s3;
uint32_t ones = 0xFFFFFFFF;
@@ -129,16 +129,16 @@
*mask = s2;
}
-static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
- uint32_t flimit,
- uint32_t thresh,
- uint32_t p1, uint32_t p0,
- uint32_t p3, uint32_t p2,
- uint32_t q0, uint32_t q1,
- uint32_t q2, uint32_t q3,
- uint32_t *hev,
- uint32_t *mask,
- uint32_t *flat) {
+static INLINE void filter_hev_mask_flatmask4_dspr2(uint32_t limit,
+ uint32_t flimit,
+ uint32_t thresh,
+ uint32_t p1, uint32_t p0,
+ uint32_t p3, uint32_t p2,
+ uint32_t q0, uint32_t q1,
+ uint32_t q2, uint32_t q3,
+ uint32_t *hev,
+ uint32_t *mask,
+ uint32_t *flat) {
uint32_t c, r, r3, r_k, r_flat;
uint32_t s1, s2, s3;
uint32_t ones = 0xFFFFFFFF;
@@ -279,12 +279,12 @@
*flat = flat1;
}
-static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
- uint32_t p2, uint32_t p1,
- uint32_t p0, uint32_t q0,
- uint32_t q1, uint32_t q2,
- uint32_t q3, uint32_t q4,
- uint32_t *flat2) {
+static INLINE void flatmask5(uint32_t p4, uint32_t p3,
+ uint32_t p2, uint32_t p1,
+ uint32_t p0, uint32_t q0,
+ uint32_t q1, uint32_t q2,
+ uint32_t q3, uint32_t q4,
+ uint32_t *flat2) {
uint32_t c, r, r_k, r_flat;
uint32_t ones = 0xFFFFFFFF;
uint32_t flat_thresh = 0x01010101;
diff --git a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_dspr2.c
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_dspr2.c
index 7cd0b63..4138f56 100644
--- a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -53,7 +52,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
for (i = 0; i < 2; i++) {
sp3 = s - (pitch << 2);
@@ -81,13 +80,13 @@
[sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
"sw %[p1_f0], (%[sp1]) \n\t"
@@ -104,13 +103,13 @@
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@@ -130,18 +129,18 @@
);
} else if ((flat != 0) && (mask != 0)) {
/* filtering */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -319,7 +318,7 @@
}
}
-void vp9_lpf_vertical_8_dspr2(unsigned char *s,
+void vpx_lpf_vertical_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -351,7 +350,7 @@
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -451,39 +450,39 @@
:
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
if ((flat == 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((mask & flat) == 0xFFFFFFFF) {
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat != 0) && (mask != 0)) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
index 6c94674..8a48650 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
+void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -58,7 +57,7 @@
);
/* prefetch data for store */
- vp9_prefetch_store(s);
+ prefetch_store(s);
for (i = 0; i < (2 * count); i++) {
sp7 = s - (pitch << 3);
@@ -110,17 +109,17 @@
[sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
- vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
+ flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
__asm__ __volatile__ (
"sw %[p1_f0], (%[sp1]) \n\t"
@@ -139,17 +138,17 @@
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
COMBINE_LEFT_RIGHT_0TO2()
COMBINE_LEFT_RIGHT_3TO6()
@@ -189,13 +188,13 @@
/* f1 */
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
COMBINE_LEFT_RIGHT_0TO2()
@@ -215,18 +214,18 @@
);
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0+f1 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -399,36 +398,36 @@
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 + f2 */
/* f0 function */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* f1 function */
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
- q0_l, q1_l, q2_l, q3_l,
- &p2_l_f1, &p1_l_f1, &p0_l_f1,
- &q0_l_f1, &q1_l_f1, &q2_l_f1);
+ mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
+ q0_l, q1_l, q2_l, q3_l,
+ &p2_l_f1, &p1_l_f1, &p0_l_f1,
+ &q0_l_f1, &q1_l_f1, &q2_l_f1);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
- q0_r, q1_r, q2_r, q3_r,
- &p2_r_f1, &p1_r_f1, &p0_r_f1,
- &q0_r_f1, &q1_r_f1, &q2_r_f1);
+ mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
+ q0_r, q1_r, q2_r, q3_r,
+ &p2_r_f1, &p1_r_f1, &p0_r_f1,
+ &q0_r_f1, &q1_r_f1, &q2_r_f1);
/* f2 function */
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
if (mask & flat & flat2 & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
similarity index 90%
rename from vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
rename to vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
index 851fc6c..e580f01 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c
@@ -10,17 +10,16 @@
#include <stdlib.h>
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_loopfilter.h"
-#include "vp9/common/vp9_onyxc_int.h"
-#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
-#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/mips/common_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_filters_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_macros_dspr2.h"
+#include "vpx_dsp/mips/loopfilter_masks_dspr2.h"
+#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vp9_lpf_vertical_16_dspr2(uint8_t *s,
+void vpx_lpf_vertical_16_dspr2(uint8_t *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
@@ -55,7 +54,7 @@
: [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
);
- vp9_prefetch_store(s + pitch);
+ prefetch_store(s + pitch);
for (i = 0; i < 2; i++) {
s1 = s;
@@ -248,61 +247,61 @@
:
);
- vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
- p1, p0, p3, p2, q0, q1, q2, q3,
- &hev, &mask, &flat);
+ filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
+ p1, p0, p3, p2, q0, q1, q2, q3,
+ &hev, &mask, &flat);
- vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
+ flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
/* f0 */
if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
((flat2 != 0) && (flat == 0) && (mask != 0))) {
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
STORE_F0()
} else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
(mask == 0xFFFFFFFF)) {
/* f2 */
PACK_LEFT_0TO3()
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_0TO3()
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
STORE_F2()
} else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
/* f1 */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
STORE_F1()
} else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
/* f0 + f1 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
/* left 2 element operation */
PACK_LEFT_0TO3()
- vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l);
+ mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l);
/* right 2 element operation */
PACK_RIGHT_0TO3()
- vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r);
+ mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r);
if (mask & flat & 0x000000FF) {
__asm__ __volatile__ (
@@ -466,32 +465,32 @@
}
} else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
/* f0+f1+f2 */
- vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
- &p1_f0, &p0_f0, &q0_f0, &q1_f0);
+ filter1_dspr2(mask, hev, p1, p0, q0, q1,
+ &p1_f0, &p0_f0, &q0_f0, &q1_f0);
PACK_LEFT_0TO3()
- vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
- q0_l, q1_l, q2_l, q3_l,
- &p2_l_f1, &p1_l_f1, &p0_l_f1,
- &q0_l_f1, &q1_l_f1, &q2_l_f1);
+ mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
+ q0_l, q1_l, q2_l, q3_l,
+ &p2_l_f1, &p1_l_f1, &p0_l_f1,
+ &q0_l_f1, &q1_l_f1, &q2_l_f1);
PACK_RIGHT_0TO3()
- vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
- q0_r, q1_r, q2_r, q3_r,
- &p2_r_f1, &p1_r_f1, &p0_r_f1,
- &q0_r_f1, &q1_r_f1, &q2_r_f1);
+ mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
+ q0_r, q1_r, q2_r, q3_r,
+ &p2_r_f1, &p1_r_f1, &p0_r_f1,
+ &q0_r_f1, &q1_r_f1, &q2_r_f1);
PACK_LEFT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
- &p3_l, &p2_l, &p1_l, &p0_l,
- &q0_l, &q1_l, &q2_l, &q3_l,
- &q4_l, &q5_l, &q6_l, &q7_l);
+ wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
+ &p3_l, &p2_l, &p1_l, &p0_l,
+ &q0_l, &q1_l, &q2_l, &q3_l,
+ &q4_l, &q5_l, &q6_l, &q7_l);
PACK_RIGHT_4TO7()
- vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
- &p3_r, &p2_r, &p1_r, &p0_r,
- &q0_r, &q1_r, &q2_r, &q3_r,
- &q4_r, &q5_r, &q6_r, &q7_r);
+ wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
+ &p3_r, &p2_r, &p1_r, &p0_r,
+ &q0_r, &q1_r, &q2_r, &q3_r,
+ &q4_r, &q5_r, &q6_r, &q7_r);
if (mask & flat & flat2 & 0x000000FF) {
__asm__ __volatile__ (
diff --git a/vp9/common/mips/msa/vp9_loopfilter_msa.h b/vpx_dsp/mips/loopfilter_msa.h
similarity index 98%
rename from vp9/common/mips/msa/vp9_loopfilter_msa.h
rename to vpx_dsp/mips/loopfilter_msa.h
index bfbe870..62b1706 100644
--- a/vp9/common/mips/msa/vp9_loopfilter_msa.h
+++ b/vpx_dsp/mips/loopfilter_msa.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
-#define VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
+#ifndef VPX_DSP_LOOPFILTER_MSA_H_
+#define VPX_DSP_LOOPFILTER_MSA_H_
#include "vpx_dsp/mips/macros_msa.h"
@@ -243,4 +243,4 @@
mask_out = limit_in < (v16u8)mask_out; \
mask_out = __msa_xori_b(mask_out, 0xff); \
}
-#endif /* VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ */
+#endif /* VPX_DSP_LOOPFILTER_MSA_H_ */
diff --git a/vpx_dsp/mips/sub_pixel_variance_msa.c b/vpx_dsp/mips/sub_pixel_variance_msa.c
index 5168192..798c343 100644
--- a/vpx_dsp/mips/sub_pixel_variance_msa.c
+++ b/vpx_dsp/mips/sub_pixel_variance_msa.c
@@ -41,6 +41,357 @@
#define VARIANCE_LARGE_WxH(sse, diff, shift) \
sse - (((int64_t)diff * diff) >> shift)
+static uint32_t avg_sse_diff_4width_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t height,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ uint32_t src0, src1, src2, src3;
+ uint32_t ref0, ref1, ref2, ref3;
+ v16u8 pred, src = { 0 };
+ v16u8 ref = { 0 };
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = (height >> 2); ht_cnt--;) {
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ LW4(src_ptr, src_stride, src0, src1, src2, src3);
+ src_ptr += (4 * src_stride);
+ LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
+ ref_ptr += (4 * ref_stride);
+
+ INSERT_W4_UB(src0, src1, src2, src3, src);
+ INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
+
+ src = __msa_aver_u_b(src, pred);
+ CALC_MSE_AVG_B(src, ref, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_8width_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t height,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src0, src1, src2, src3;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = (height >> 2); ht_cnt--;) {
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
+ src_ptr += (4 * src_stride);
+ LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
+ ref_ptr += (4 * ref_stride);
+
+ PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
+ src0, src1, ref0, ref1);
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_16width_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t height,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src, ref, pred;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = (height >> 2); ht_cnt--;) {
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ src = LD_UB(src_ptr);
+ src_ptr += src_stride;
+ ref = LD_UB(ref_ptr);
+ ref_ptr += ref_stride;
+ src = __msa_aver_u_b(src, pred);
+ CALC_MSE_AVG_B(src, ref, var, avg);
+
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ src = LD_UB(src_ptr);
+ src_ptr += src_stride;
+ ref = LD_UB(ref_ptr);
+ ref_ptr += ref_stride;
+ src = __msa_aver_u_b(src, pred);
+ CALC_MSE_AVG_B(src, ref, var, avg);
+
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ src = LD_UB(src_ptr);
+ src_ptr += src_stride;
+ ref = LD_UB(ref_ptr);
+ ref_ptr += ref_stride;
+ src = __msa_aver_u_b(src, pred);
+ CALC_MSE_AVG_B(src, ref, var, avg);
+
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ src = LD_UB(src_ptr);
+ src_ptr += src_stride;
+ ref = LD_UB(ref_ptr);
+ ref_ptr += ref_stride;
+ src = __msa_aver_u_b(src, pred);
+ CALC_MSE_AVG_B(src, ref, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_32width_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t height,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src0, src1, ref0, ref1, pred0, pred1;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = (height >> 2); ht_cnt--;) {
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_32x64_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src0, src1, ref0, ref1, pred0, pred1;
+ v8i16 avg0 = { 0 };
+ v8i16 avg1 = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = 16; ht_cnt--;) {
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB2(src_ptr, 16, src0, src1);
+ src_ptr += src_stride;
+ LD_UB2(ref_ptr, 16, ref0, ref1);
+ ref_ptr += ref_stride;
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+ }
+
+ vec = __msa_hadd_s_w(avg0, avg0);
+ vec += __msa_hadd_s_w(avg1, avg1);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_64x32_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src0, src1, src2, src3;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1, pred2, pred3;
+ v8i16 avg0 = { 0 };
+ v8i16 avg1 = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = 16; ht_cnt--;) {
+ LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
+ sec_pred += 64;
+ LD_UB4(src_ptr, 16, src0, src1, src2, src3);
+ src_ptr += src_stride;
+ LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
+ ref_ptr += ref_stride;
+ AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3,
+ src0, src1, src2, src3);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src2, ref2, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+ CALC_MSE_AVG_B(src3, ref3, var, avg1);
+
+ LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
+ sec_pred += 64;
+ LD_UB4(src_ptr, 16, src0, src1, src2, src3);
+ src_ptr += src_stride;
+ LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
+ ref_ptr += ref_stride;
+ AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3,
+ src0, src1, src2, src3);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src2, ref2, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+ CALC_MSE_AVG_B(src3, ref3, var, avg1);
+ }
+
+ vec = __msa_hadd_s_w(avg0, avg0);
+ vec += __msa_hadd_s_w(avg1, avg1);
+
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t avg_sse_diff_64x64_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ const uint8_t *sec_pred,
+ int32_t *diff) {
+ int32_t ht_cnt;
+ v16u8 src0, src1, src2, src3;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1, pred2, pred3;
+ v8i16 avg0 = { 0 };
+ v8i16 avg1 = { 0 };
+ v8i16 avg2 = { 0 };
+ v8i16 avg3 = { 0 };
+ v4i32 vec, var = { 0 };
+
+ for (ht_cnt = 32; ht_cnt--;) {
+ LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
+ sec_pred += 64;
+ LD_UB4(src_ptr, 16, src0, src1, src2, src3);
+ src_ptr += src_stride;
+ LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
+ ref_ptr += ref_stride;
+ AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3,
+ src0, src1, src2, src3);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+ CALC_MSE_AVG_B(src2, ref2, var, avg2);
+ CALC_MSE_AVG_B(src3, ref3, var, avg3);
+
+ LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
+ sec_pred += 64;
+ LD_UB4(src_ptr, 16, src0, src1, src2, src3);
+ src_ptr += src_stride;
+ LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
+ ref_ptr += ref_stride;
+ AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3,
+ src0, src1, src2, src3);
+ CALC_MSE_AVG_B(src0, ref0, var, avg0);
+ CALC_MSE_AVG_B(src1, ref1, var, avg1);
+ CALC_MSE_AVG_B(src2, ref2, var, avg2);
+ CALC_MSE_AVG_B(src3, ref3, var, avg3);
+ }
+
+ vec = __msa_hadd_s_w(avg0, avg0);
+ vec += __msa_hadd_s_w(avg1, avg1);
+ vec += __msa_hadd_s_w(avg2, avg2);
+ vec += __msa_hadd_s_w(avg3, avg3);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
static uint32_t sub_pixel_sse_diff_4width_h_msa(const uint8_t *src,
int32_t src_stride,
const uint8_t *dst,
@@ -691,6 +1042,756 @@
return sse;
}
+static uint32_t sub_pixel_avg_sse_diff_4width_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ uint32_t ref0, ref1, ref2, ref3;
+ v16u8 out, pred, filt0, ref = { 0 };
+ v16i8 src0, src1, src2, src3;
+ v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+ v8u16 vec0, vec1, vec2, vec3;
+ v8u16 const255;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+ vec0, vec1, vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3,
+ src0, src1, src2, src3);
+ ILVEV_W2_SB(src0, src1, src2, src3, src0, src2);
+ out = (v16u8)__msa_ilvev_d((v2i64)src2, (v2i64)src0);
+ out = __msa_aver_u_b(out, pred);
+ CALC_MSE_AVG_B(out, ref, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_8width_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16u8 out, pred, filt0;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16i8 src0, src1, src2, src3;
+ v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+ v8u16 vec0, vec1, vec2, vec3;
+ v8u16 const255;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+ LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+ vec0, vec1, vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3,
+ src0, src1, src2, src3);
+ out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0);
+
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ out = __msa_aver_u_b(out, pred);
+ CALC_MSE_AVG_B(out, ref0, var, avg);
+ out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2);
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ out = __msa_aver_u_b(out, pred);
+ CALC_MSE_AVG_B(out, ref1, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t subpel_avg_ssediff_16w_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff,
+ int32_t width) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+ v16u8 dst0, dst1, dst2, dst3;
+ v16u8 tmp0, tmp1, tmp2, tmp3;
+ v16u8 pred0, pred1, pred2, pred3, filt0;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
+ v8u16 const255;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ dst += (4 * dst_stride);
+ LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
+ sec_pred += (4 * width);
+
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+ out0, out1, out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+ out4, out5, out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_B4_UB(out1, out0, out3, out2, out5, out4, out7, out6,
+ tmp0, tmp1, tmp2, tmp3);
+ AVER_UB4_UB(tmp0, pred0, tmp1, pred1, tmp2, pred2, tmp3, pred3,
+ tmp0, tmp1, tmp2, tmp3);
+
+ CALC_MSE_AVG_B(tmp0, dst0, var, avg);
+ CALC_MSE_AVG_B(tmp1, dst1, var, avg);
+ CALC_MSE_AVG_B(tmp2, dst2, var, avg);
+ CALC_MSE_AVG_B(tmp3, dst3, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_16width_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ return subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height, diff, 16);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_32width_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[2];
+
+ for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height,
+ &diff0[loop_cnt], 32);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1];
+
+ return sse;
+}
+
+static uint32_t sub_pixel_avg_sse_diff_64width_h_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[4];
+
+ for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height,
+ &diff0[loop_cnt], 64);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
+
+ return sse;
+}
+
+static uint32_t sub_pixel_avg_sse_diff_4width_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ uint32_t ref0, ref1, ref2, ref3;
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 src10_r, src32_r, src21_r, src43_r;
+ v16u8 out, pred, ref = { 0 };
+ v16u8 src2110, src4332, filt0;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+ v8u16 tmp0, tmp1;
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
+ ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+ src10_r, src21_r, src32_r, src43_r);
+ ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+ DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+
+ out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+ out = __msa_aver_u_b(out, pred);
+ CALC_MSE_AVG_B(out, ref, var, avg);
+ src0 = src4;
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_8width_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1, filt0;
+ v8u16 vec0, vec1, vec2, vec3;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+ PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
+ ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3,
+ vec0, vec1, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+ tmp0, tmp1, tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
+ AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
+ CALC_MSE_AVG_B(src0, ref0, var, avg);
+ CALC_MSE_AVG_B(src1, ref1, var, avg);
+
+ src0 = src4;
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t subpel_avg_ssediff_16w_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff,
+ int32_t width) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1, pred2, pred3;
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 out0, out1, out2, out3, filt0;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter);
+ filt0 = (v16u8)__msa_fill_h(filtval);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+ LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
+ sec_pred += (4 * width);
+
+ ILVR_B2_UH(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UH(src1, src0, src2, src1, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ ILVR_B2_UH(src3, src2, src4, src3, vec4, vec6);
+ ILVL_B2_UH(src3, src2, src4, src3, vec5, vec7);
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
+
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
+
+ src0 = src4;
+ LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3,
+ out0, out1, out2, out3);
+
+ CALC_MSE_AVG_B(out0, ref0, var, avg);
+ CALC_MSE_AVG_B(out1, ref1, var, avg);
+ CALC_MSE_AVG_B(out2, ref2, var, avg);
+ CALC_MSE_AVG_B(out3, ref3, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_16width_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ return subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height, diff, 16);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_32width_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[2];
+
+ for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height,
+ &diff0[loop_cnt], 32);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1];
+
+ return sse;
+}
+
+static uint32_t sub_pixel_avg_sse_diff_64width_v_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter,
+ int32_t height,
+ int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[4];
+
+ for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter, height,
+ &diff0[loop_cnt], 64);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
+
+ return sse;
+}
+
+static uint32_t sub_pixel_avg_sse_diff_4width_hv_msa(
+ const uint8_t *src, int32_t src_stride,
+ const uint8_t *dst, int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz, const uint8_t *filter_vert,
+ int32_t height, int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ uint32_t ref0, ref1, ref2, ref3;
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
+ v16u8 filt_hz, filt_vt, vec0, vec1;
+ v16u8 out, pred, ref = { 0 };
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter_horiz);
+ filt_hz = (v16u8)__msa_fill_h(filtval);
+ filtval = LH(filter_vert);
+ filt_vt = (v16u8)__msa_fill_h(filtval);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+ pred = LD_UB(sec_pred);
+ sec_pred += 16;
+ LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+ INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+ hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
+ hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+ out = __msa_aver_u_b(out, pred);
+ CALC_MSE_AVG_B(out, ref, var, avg);
+ src0 = src4;
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_8width_hv_msa(
+ const uint8_t *src, int32_t src_stride,
+ const uint8_t *dst, int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz, const uint8_t *filter_vert,
+ int32_t height, int32_t *diff) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 pred0, pred1, out0, out1;
+ v16u8 filt_hz, filt_vt, vec0;
+ v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+ v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter_horiz);
+ filt_hz = (v16u8)__msa_fill_h(filtval);
+ filtval = LH(filter_vert);
+ filt_vt = (v16u8)__msa_fill_h(filtval);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+ LD_UB2(sec_pred, 16, pred0, pred1);
+ sec_pred += 32;
+ LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+ AVER_UB2_UB(out0, pred0, out1, pred1, out0, out1);
+
+ CALC_MSE_AVG_B(out0, ref0, var, avg);
+ CALC_MSE_AVG_B(out1, ref1, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t subpel_avg_ssediff_16w_hv_msa(const uint8_t *src,
+ int32_t src_stride,
+ const uint8_t *dst,
+ int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz,
+ const uint8_t *filter_vert,
+ int32_t height,
+ int32_t *diff,
+ int32_t width) {
+ int16_t filtval;
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 ref0, ref1, ref2, ref3;
+ v16u8 pred0, pred1, pred2, pred3;
+ v16u8 out0, out1, out2, out3;
+ v16u8 filt_hz, filt_vt, vec0, vec1;
+ v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
+ v8i16 avg = { 0 };
+ v4i32 vec, var = { 0 };
+
+ filtval = LH(filter_horiz);
+ filt_hz = (v16u8)__msa_fill_h(filtval);
+ filtval = LH(filter_vert);
+ filt_vt = (v16u8)__msa_fill_h(filtval);
+
+ LD_UB2(src, 8, src0, src1);
+ src += src_stride;
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src0, src2, src4, src6);
+ LD_UB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+ LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
+ sec_pred += (4 * width);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ out3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+
+ LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
+ dst += (4 * dst_stride);
+
+ AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3,
+ out0, out1, out2, out3);
+
+ CALC_MSE_AVG_B(out0, ref0, var, avg);
+ CALC_MSE_AVG_B(out1, ref1, var, avg);
+ CALC_MSE_AVG_B(out2, ref2, var, avg);
+ CALC_MSE_AVG_B(out3, ref3, var, avg);
+ }
+
+ vec = __msa_hadd_s_w(avg, avg);
+ *diff = HADD_SW_S32(vec);
+
+ return HADD_SW_S32(var);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_16width_hv_msa(
+ const uint8_t *src, int32_t src_stride,
+ const uint8_t *dst, int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz, const uint8_t *filter_vert,
+ int32_t height, int32_t *diff) {
+ return subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter_horiz, filter_vert,
+ height, diff, 16);
+}
+
+static uint32_t sub_pixel_avg_sse_diff_32width_hv_msa(
+ const uint8_t *src, int32_t src_stride,
+ const uint8_t *dst, int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz, const uint8_t *filter_vert,
+ int32_t height, int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[2];
+
+ for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter_horiz, filter_vert,
+ height, &diff0[loop_cnt], 32);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1];
+
+ return sse;
+}
+
+static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa(
+ const uint8_t *src, int32_t src_stride,
+ const uint8_t *dst, int32_t dst_stride,
+ const uint8_t *sec_pred,
+ const uint8_t *filter_horiz, const uint8_t *filter_vert,
+ int32_t height, int32_t *diff) {
+ uint32_t loop_cnt, sse = 0;
+ int32_t diff0[4];
+
+ for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
+ sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
+ sec_pred, filter_horiz, filter_vert,
+ height, &diff0[loop_cnt], 64);
+ src += 16;
+ dst += 16;
+ sec_pred += 16;
+ }
+
+ *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
+
+ return sse;
+}
+
#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
#define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
#define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
@@ -765,3 +1866,138 @@
VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 32);
VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64);
+
+#define VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht) \
+uint32_t vpx_sub_pixel_avg_variance##wd##x##ht##_msa( \
+ const uint8_t *src_ptr, int32_t src_stride, \
+ int32_t xoffset, int32_t yoffset, \
+ const uint8_t *ref_ptr, int32_t ref_stride, \
+ uint32_t *sse, const uint8_t *sec_pred) { \
+ int32_t diff; \
+ const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \
+ const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \
+ \
+ if (yoffset) { \
+ if (xoffset) { \
+ *sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, h_filter, \
+ v_filter, ht, &diff); \
+ } else { \
+ *sse = sub_pixel_avg_sse_diff_##wd##width_v_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, v_filter, \
+ ht, &diff); \
+ } \
+ } else { \
+ if (xoffset) { \
+ *sse = sub_pixel_avg_sse_diff_##wd##width_h_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, h_filter, \
+ ht, &diff); \
+ } else { \
+ *sse = avg_sse_diff_##wd##width_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, ht, &diff); \
+ } \
+ } \
+ \
+ return VARIANCE_##wd##Wx##ht##H(*sse, diff); \
+}
+
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 4);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 8);
+
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 4);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 8);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 16);
+
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 8);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 16);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 32);
+
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 16);
+VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 32);
+
+uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ int32_t xoffset,
+ int32_t yoffset,
+ const uint8_t *ref_ptr,
+ int32_t ref_stride,
+ uint32_t *sse,
+ const uint8_t *sec_pred) {
+ int32_t diff;
+ const uint8_t *h_filter = bilinear_filters_msa[xoffset];
+ const uint8_t *v_filter = bilinear_filters_msa[yoffset];
+
+ if (yoffset) {
+ if (xoffset) {
+ *sse = sub_pixel_avg_sse_diff_32width_hv_msa(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ sec_pred, h_filter,
+ v_filter, 64, &diff);
+ } else {
+ *sse = sub_pixel_avg_sse_diff_32width_v_msa(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ sec_pred, v_filter,
+ 64, &diff);
+ }
+ } else {
+ if (xoffset) {
+ *sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ sec_pred, h_filter,
+ 64, &diff);
+ } else {
+ *sse = avg_sse_diff_32x64_msa(src_ptr, src_stride, ref_ptr, ref_stride,
+ sec_pred, &diff);
+ }
+ }
+
+ return VARIANCE_32Wx64H(*sse, diff);
+}
+
+#define VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht) \
+uint32_t vpx_sub_pixel_avg_variance64x##ht##_msa(const uint8_t *src_ptr, \
+ int32_t src_stride, \
+ int32_t xoffset, \
+ int32_t yoffset, \
+ const uint8_t *ref_ptr, \
+ int32_t ref_stride, \
+ uint32_t *sse, \
+ const uint8_t *sec_pred) { \
+ int32_t diff; \
+ const uint8_t *h_filter = bilinear_filters_msa[xoffset]; \
+ const uint8_t *v_filter = bilinear_filters_msa[yoffset]; \
+ \
+ if (yoffset) { \
+ if (xoffset) { \
+ *sse = sub_pixel_avg_sse_diff_64width_hv_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, h_filter, \
+ v_filter, ht, &diff); \
+ } else { \
+ *sse = sub_pixel_avg_sse_diff_64width_v_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, v_filter, \
+ ht, &diff); \
+ } \
+ } else { \
+ if (xoffset) { \
+ *sse = sub_pixel_avg_sse_diff_64width_h_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, h_filter, \
+ ht, &diff); \
+ } else { \
+ *sse = avg_sse_diff_64x##ht##_msa(src_ptr, src_stride, \
+ ref_ptr, ref_stride, \
+ sec_pred, &diff); \
+ } \
+ } \
+ \
+ return VARIANCE_64Wx##ht##H(*sse, diff); \
+}
+
+VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(32);
+VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(64);
diff --git a/vp9/common/vp9_prob.c b/vpx_dsp/prob.c
similarity index 83%
rename from vp9/common/vp9_prob.c
rename to vpx_dsp/prob.c
index 3b7b9bf..639d24d 100644
--- a/vp9/common/vp9_prob.c
+++ b/vpx_dsp/prob.c
@@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9/common/vp9_prob.h"
+#include "./prob.h"
-const uint8_t vp9_norm[256] = {
+const uint8_t vpx_norm[256] = {
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -30,10 +30,10 @@
};
static unsigned int tree_merge_probs_impl(unsigned int i,
- const vp9_tree_index *tree,
- const vp9_prob *pre_probs,
+ const vpx_tree_index *tree,
+ const vpx_prob *pre_probs,
const unsigned int *counts,
- vp9_prob *probs) {
+ vpx_prob *probs) {
const int l = tree[i];
const unsigned int left_count = (l <= 0)
? counts[-l]
@@ -47,7 +47,7 @@
return left_count + right_count;
}
-void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
- const unsigned int *counts, vp9_prob *probs) {
+void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs,
+ const unsigned int *counts, vpx_prob *probs) {
tree_merge_probs_impl(0, tree, pre_probs, counts, probs);
}
diff --git a/vp9/common/vp9_prob.h b/vpx_dsp/prob.h
similarity index 70%
rename from vp9/common/vp9_prob.h
rename to vpx_dsp/prob.h
index c69c62c..729f90a 100644
--- a/vp9/common/vp9_prob.h
+++ b/vpx_dsp/prob.h
@@ -8,64 +8,63 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_PROB_H_
-#define VP9_COMMON_VP9_PROB_H_
+#ifndef VPX_DSP_PROB_H_
+#define VPX_DSP_PROB_H_
#include "./vpx_config.h"
+#include "./vpx_dsp_common.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
-
#ifdef __cplusplus
extern "C" {
#endif
-typedef uint8_t vp9_prob;
+typedef uint8_t vpx_prob;
#define MAX_PROB 255
-#define vp9_prob_half ((vp9_prob) 128)
+#define vpx_prob_half ((vpx_prob) 128)
-typedef int8_t vp9_tree_index;
+typedef int8_t vpx_tree_index;
#define TREE_SIZE(leaf_count) (2 * (leaf_count) - 2)
-#define vp9_complement(x) (255 - x)
+#define vpx_complement(x) (255 - x)
#define MODE_MV_COUNT_SAT 20
/* We build coding trees compactly in arrays.
- Each node of the tree is a pair of vp9_tree_indices.
+ Each node of the tree is a pair of vpx_tree_indices.
Array index often references a corresponding probability table.
Index <= 0 means done encoding/decoding and value = -Index,
Index > 0 means need another bit, specification at index.
Nonnegative indices are always even; processing begins at node 0. */
-typedef const vp9_tree_index vp9_tree[];
+typedef const vpx_tree_index vpx_tree[];
-static INLINE vp9_prob clip_prob(int p) {
+static INLINE vpx_prob clip_prob(int p) {
return (p > 255) ? 255 : (p < 1) ? 1 : p;
}
-static INLINE vp9_prob get_prob(int num, int den) {
+static INLINE vpx_prob get_prob(int num, int den) {
return (den == 0) ? 128u : clip_prob(((int64_t)num * 256 + (den >> 1)) / den);
}
-static INLINE vp9_prob get_binary_prob(int n0, int n1) {
+static INLINE vpx_prob get_binary_prob(int n0, int n1) {
return get_prob(n0, n0 + n1);
}
/* This function assumes prob1 and prob2 are already within [1,255] range. */
-static INLINE vp9_prob weighted_prob(int prob1, int prob2, int factor) {
+static INLINE vpx_prob weighted_prob(int prob1, int prob2, int factor) {
return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8);
}
-static INLINE vp9_prob merge_probs(vp9_prob pre_prob,
+static INLINE vpx_prob merge_probs(vpx_prob pre_prob,
const unsigned int ct[2],
unsigned int count_sat,
unsigned int max_update_factor) {
- const vp9_prob prob = get_binary_prob(ct[0], ct[1]);
+ const vpx_prob prob = get_binary_prob(ct[0], ct[1]);
const unsigned int count = MIN(ct[0] + ct[1], count_sat);
const unsigned int factor = max_update_factor * count / count_sat;
return weighted_prob(pre_prob, prob, factor);
@@ -77,7 +76,7 @@
70, 76, 83, 89, 96, 102, 108, 115, 121, 128
};
-static INLINE vp9_prob mode_mv_merge_probs(vp9_prob pre_prob,
+static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob,
const unsigned int ct[2]) {
const unsigned int den = ct[0] + ct[1];
if (den == 0) {
@@ -85,20 +84,20 @@
} else {
const unsigned int count = MIN(den, MODE_MV_COUNT_SAT);
const unsigned int factor = count_to_update_factor[count];
- const vp9_prob prob =
+ const vpx_prob prob =
clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den);
return weighted_prob(pre_prob, prob, factor);
}
}
-void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
- const unsigned int *counts, vp9_prob *probs);
+void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs,
+ const unsigned int *counts, vpx_prob *probs);
-DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
+DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]);
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_PROB_H_
+#endif // VPX_DSP_PROB_H_
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
new file mode 100644
index 0000000..137f5bc
--- /dev/null
+++ b/vpx_dsp/quantize.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_dsp/quantize.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp, eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 16;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int coeff = coeff_ptr[0];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + round_ptr[0];
+ const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 16);
+ qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr;
+ if (abs_qcoeff)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp, eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1),
+ INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
+ int skip_block,
+ const int16_t *round_ptr,
+ const int16_t quant,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr,
+ uint16_t *eob_ptr) {
+ const int n_coeffs = 1024;
+ int eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int coeff = coeff_ptr[0];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
+ const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 15);
+ qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 2;
+ if (abs_qcoeff)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 16; // quantization
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ if (abs_coeff >= zbins[rc != 0]) {
+ const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+ if (abs_qcoeff)
+ eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1], 1)};
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
+ int idx = 0;
+ int idx_arr[1024];
+ int i, eob = -1;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+
+ // If the coefficient is out of the base ZBIN range, keep it for
+ // quantization.
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
+ idx_arr[idx++] = i;
+ }
+
+ // Quantization pass: only process the coefficients selected in
+ // pre-scan pass. Note: idx can be zero.
+ for (i = 0; i < idx; i++) {
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp1 = abs_coeff
+ + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+ if (abs_qcoeff)
+ eob = idx_arr[i];
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
diff --git a/vpx_dsp/quantize.h b/vpx_dsp/quantize.h
new file mode 100644
index 0000000..0ad1744
--- /dev/null
+++ b/vpx_dsp/quantize.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_QUANTIZE_H_
+#define VPX_DSP_QUANTIZE_H_
+
+#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr,
+ int n_coeffs, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
+ int skip_block,
+ const int16_t *round_ptr,
+ const int16_t quant_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t dequant_ptr,
+ uint16_t *eob_ptr);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_QUANTIZE_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 70a131c..07f2d6a 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -9,9 +9,86 @@
##
DSP_SRCS-yes += vpx_dsp.mk
+DSP_SRCS-yes += vpx_dsp_common.h
DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h
+# bit reader
+DSP_SRCS-yes += prob.h
+DSP_SRCS-yes += prob.c
+
+ifeq ($(CONFIG_ENCODERS),yes)
+DSP_SRCS-yes += bitwriter.h
+DSP_SRCS-yes += bitwriter.c
+DSP_SRCS-yes += bitwriter_buffer.c
+DSP_SRCS-yes += bitwriter_buffer.h
+endif
+
+ifeq ($(CONFIG_DECODERS),yes)
+DSP_SRCS-yes += bitreader.h
+DSP_SRCS-yes += bitreader.c
+DSP_SRCS-yes += bitreader_buffer.c
+DSP_SRCS-yes += bitreader_buffer.h
+endif
+
+# loop filters
+DSP_SRCS-yes += loopfilter.c
+
+DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/loopfilter_sse2.c
+DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c
+DSP_SRCS-$(HAVE_MMX) += x86/loopfilter_mmx.asm
+
+DSP_SRCS-$(HAVE_NEON) += arm/loopfilter_neon.c
+ifeq ($(HAVE_NEON_ASM),yes)
+DSP_SRCS-yes += arm/loopfilter_mb_neon$(ASM)
+DSP_SRCS-yes += arm/loopfilter_16_neon$(ASM)
+DSP_SRCS-yes += arm/loopfilter_8_neon$(ASM)
+DSP_SRCS-yes += arm/loopfilter_4_neon$(ASM)
+else
+ifeq ($(HAVE_NEON),yes)
+DSP_SRCS-yes += arm/loopfilter_16_neon.c
+DSP_SRCS-yes += arm/loopfilter_8_neon.c
+DSP_SRCS-yes += arm/loopfilter_4_neon.c
+endif # HAVE_NEON
+endif # HAVE_NEON_ASM
+
+DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_msa.h
+DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_16_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_8_msa.c
+DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_4_msa.c
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c
+endif # CONFIG_VP9_HIGHBITDEPTH
+
+# forward transform
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+DSP_SRCS-yes += fwd_txfm.c
+DSP_SRCS-yes += fwd_txfm.h
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
+DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3.asm
+endif
+DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
+endif # CONFIG_VP9_ENCODER
+
+# quantization
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+DSP_SRCS-yes += quantize.c
+DSP_SRCS-yes += quantize.h
+
+DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
+endif
+ifeq ($(ARCH_X86_64),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
+DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
+endif
+endif
+endif # CONFIG_VP9_ENCODER
+
ifeq ($(CONFIG_ENCODERS),yes)
DSP_SRCS-yes += sad.c
DSP_SRCS-yes += subtract.c
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
new file mode 100644
index 0000000..6793036
--- /dev/null
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_COMMON_H_
+#define VPX_DSP_COMMON_H_
+
+#include <stdlib.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+
+#if CONFIG_VP9_HIGHBITDEPTH
+// Note:
+// tran_low_t is the datatype used for final transform coefficients.
+// tran_high_t is the datatype used for intermediate transform stages.
+typedef int64_t tran_high_t;
+typedef int32_t tran_low_t;
+#else
+// Note:
+// tran_low_t is the datatype used for final transform coefficients.
+// tran_high_t is the datatype used for intermediate transform stages.
+typedef int32_t tran_high_t;
+typedef int16_t tran_low_t;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE uint8_t clip_pixel(int val) {
+ return (val > 255) ? 255 : (val < 0) ? 0 : val;
+}
+
+static INLINE int clamp(int value, int low, int high) {
+ return value < low ? low : (value > high ? high : value);
+}
+
+static INLINE double fclamp(double value, double low, double high) {
+ return value < low ? low : (value > high ? high : value);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_DSP_COMMON_H_
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 8e4e966..8a17440 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -5,12 +5,18 @@
*/
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
EOF
}
forward_decls qw/vpx_dsp_forward_decls/;
-# Functions which use x86inc.asm instead of x86_abi_support.asm
+# x86inc.asm had specific constraints. break it out so it's easy to disable.
+# zero all the variables to avoid tricky else conditions.
+$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
+ $avx2_x86inc = '';
+$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
+ $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
$mmx_x86inc = 'mmx';
$sse_x86inc = 'sse';
@@ -18,23 +24,165 @@
$ssse3_x86inc = 'ssse3';
$avx_x86inc = 'avx';
$avx2_x86inc = 'avx2';
-} else {
- $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
- $avx_x86inc = $avx2_x86inc = '';
+ if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64_x86inc = 'mmx';
+ $sse_x86_64_x86inc = 'sse';
+ $sse2_x86_64_x86inc = 'sse2';
+ $ssse3_x86_64_x86inc = 'ssse3';
+ $avx_x86_64_x86inc = 'avx';
+ $avx2_x86_64_x86inc = 'avx2';
+ }
}
-# Functions which are 64 bit only.
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
$mmx_x86_64 = 'mmx';
$sse2_x86_64 = 'sse2';
$ssse3_x86_64 = 'ssse3';
$avx_x86_64 = 'avx';
$avx2_x86_64 = 'avx2';
-} else {
- $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
- $avx_x86_64 = $avx2_x86_64 = '';
}
+#
+# Loopfilter
+#
+add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_vertical_16 sse2 neon_asm msa/;
+$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;
+
+add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm msa/;
+$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
+
+add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_vertical_8 sse2 neon msa/;
+
+add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm msa/;
+$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
+
+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_vertical_4 mmx neon msa/;
+
+add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_vertical_4_dual sse2 neon msa/;
+
+add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm msa/;
+$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
+
+add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_8 sse2 neon msa/;
+
+add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm msa/;
+$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
+
+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+specialize qw/vpx_lpf_horizontal_4 mmx neon msa/;
+
+add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
+specialize qw/vpx_lpf_horizontal_4_dual sse2 neon msa/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
+} # CONFIG_VP9_HIGHBITDEPTH
+
+#
+# Encoder functions.
+#
+
+#
+# Forward transform
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct4x4 sse2/;
+
+ add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct8x8 sse2/;
+
+ add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct16x16 sse2/;
+
+ add_proto qw/void vp9_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_highbd_fdct4x4 sse2/;
+
+ add_proto qw/void vp9_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_highbd_fdct8x8 sse2/;
+
+ add_proto qw/void vp9_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_highbd_fdct16x16 sse2/;
+} else {
+ add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct4x4 sse2/;
+
+ add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp9_fdct16x16 sse2/;
+} # CONFIG_VP9_HIGHBITDEPTH
+} # CONFIG_VP9_ENCODER
+
+#
+# Quantization
+#
+if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b/;
+
+ add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b_32x32/;
+
+ add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_highbd_quantize_b sse2/;
+
+ add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_highbd_quantize_b_32x32 sse2/;
+} else {
+ add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64_x86inc";
+
+ add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
+} # CONFIG_VP9_ENCODER
+} # CONFIG_VP9_HIGHBITDEPTH
+
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
#
# Block subtraction
@@ -526,43 +674,43 @@
specialize qw/vpx_sub_pixel_variance4x4 mmx msa/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x32 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/vpx_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/vpx_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
#
# Specialty Subpixel
diff --git a/vp9/encoder/x86/vp9_dct_sse2_impl.h b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
similarity index 98%
rename from vp9/encoder/x86/vp9_dct_sse2_impl.h
rename to vpx_dsp/x86/fwd_txfm_impl_sse2.h
index 86e9ecf..f67bb7a 100644
--- a/vp9/encoder/x86/vp9_dct_sse2_impl.h
+++ b/vpx_dsp/x86/fwd_txfm_impl_sse2.h
@@ -10,12 +10,15 @@
#include <emmintrin.h> // SSE2
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/x86/vp9_dct_sse2.h"
#include "vpx_ports/mem.h"
+// TODO(jingning) The high bit-depth functions need rework for performance.
+// After we properly fix the high bit-depth function implementations, this
+// file's dependency should be substantially simplified.
#if DCT_HIGH_BIT_DEPTH
#define ADD_EPI16 _mm_adds_epi16
#define SUB_EPI16 _mm_subs_epi16
diff --git a/vpx_dsp/x86/fwd_txfm_sse2.c b/vpx_dsp/x86/fwd_txfm_sse2.c
new file mode 100644
index 0000000..a868d22
--- /dev/null
+++ b/vpx_dsp/x86/fwd_txfm_sse2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+
+#define DCT_HIGH_BIT_DEPTH 0
+
+#define FDCT4x4_2D vp9_fdct4x4_sse2
+#define FDCT8x8_2D vp9_fdct8x8_sse2
+#define FDCT16x16_2D vp9_fdct16x16_sse2
+#include "vpx_dsp/x86/fwd_txfm_impl_sse2.h"
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+#undef DCT_HIGH_BIT_DEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_HIGH_BIT_DEPTH 1
+#define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
+#define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
+#define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
+#include "vpx_dsp/x86/fwd_txfm_impl_sse2.h" // NOLINT
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+#undef DCT_HIGH_BIT_DEPTH
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/x86/fwd_txfm_ssse3.asm b/vpx_dsp/x86/fwd_txfm_ssse3.asm
new file mode 100644
index 0000000..5f63546
--- /dev/null
+++ b/vpx_dsp/x86/fwd_txfm_ssse3.asm
@@ -0,0 +1,182 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the forward transformation. Part
+; of the macro definitions are originally derived from the ffmpeg project.
+; The current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192: times 4 dd 8192
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
+pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
+%endmacro
+
+TRANSFORM_COEFFS 11585, 11585
+TRANSFORM_COEFFS 15137, 6270
+TRANSFORM_COEFFS 16069, 3196
+TRANSFORM_COEFFS 9102, 13623
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+ psubw m%3, m%1, m%2
+ paddw m%1, m%2
+ SWAP %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+ pmaddwd m%1, m%3, %5
+ pmaddwd m%2, m%3, %6
+ paddd m%1, %4
+ paddd m%2, %4
+ psrad m%1, 14
+ psrad m%2, 14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+ punpckhwd m%6, m%2, m%1
+ MUL_ADD_2X %7, %6, %6, %5, [pw_%4_%3], [pw_%3_m%4]
+ punpcklwd m%2, m%1
+ MUL_ADD_2X %1, %2, %2, %5, [pw_%4_%3], [pw_%3_m%4]
+ packssdw m%1, m%7
+ packssdw m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+ punpckh%1 m%4, m%2, m%3
+ punpckl%1 m%2, m%3
+ SWAP %3, %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+ INTERLEAVE_2X wd, %1, %2, %9
+ INTERLEAVE_2X wd, %3, %4, %9
+ INTERLEAVE_2X wd, %5, %6, %9
+ INTERLEAVE_2X wd, %7, %8, %9
+
+ INTERLEAVE_2X dq, %1, %3, %9
+ INTERLEAVE_2X dq, %2, %4, %9
+ INTERLEAVE_2X dq, %5, %7, %9
+ INTERLEAVE_2X dq, %6, %8, %9
+
+ INTERLEAVE_2X qdq, %1, %5, %9
+ INTERLEAVE_2X qdq, %3, %7, %9
+ INTERLEAVE_2X qdq, %2, %6, %9
+ INTERLEAVE_2X qdq, %4, %8, %9
+
+ SWAP %2, %5
+ SWAP %4, %7
+%endmacro
+
+; 1D forward 8x8 DCT transform
+%macro FDCT8_1D 1
+ SUM_SUB 0, 7, 9
+ SUM_SUB 1, 6, 9
+ SUM_SUB 2, 5, 9
+ SUM_SUB 3, 4, 9
+
+ SUM_SUB 0, 3, 9
+ SUM_SUB 1, 2, 9
+ SUM_SUB 6, 5, 9
+%if %1 == 0
+ SUM_SUB 0, 1, 9
+%endif
+
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
+
+ pmulhrsw m6, m12
+ pmulhrsw m5, m12
+%if %1 == 0
+ pmulhrsw m0, m12
+ pmulhrsw m1, m12
+%else
+ BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10
+ SWAP 0, 1
+%endif
+
+ SUM_SUB 4, 5, 9
+ SUM_SUB 7, 6, 9
+ BUTTERFLY_4X 4, 7, 3196, 16069, m8, 9, 10
+ BUTTERFLY_4X 5, 6, 13623, 9102, m8, 9, 10
+ SWAP 1, 4
+ SWAP 3, 6
+%endmacro
+
+%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2
+ psraw m%3, m%1, 15
+ psraw m%4, m%2, 15
+ psubw m%1, m%3
+ psubw m%2, m%4
+ psraw m%1, 1
+ psraw m%2, 1
+%endmacro
+
+INIT_XMM ssse3
+cglobal fdct8x8, 3, 5, 13, input, output, stride
+
+ mova m8, [pd_8192]
+ mova m12, [pw_11585x2]
+ pxor m11, m11
+
+ lea r3, [2 * strideq]
+ lea r4, [4 * strideq]
+ mova m0, [inputq]
+ mova m1, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m2, [inputq]
+ mova m3, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m4, [inputq]
+ mova m5, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m6, [inputq]
+ mova m7, [inputq + r3]
+
+ ; left shift by 2 to increase forward transformation precision
+ psllw m0, 2
+ psllw m1, 2
+ psllw m2, 2
+ psllw m3, 2
+ psllw m4, 2
+ psllw m5, 2
+ psllw m6, 2
+ psllw m7, 2
+
+ ; column transform
+ FDCT8_1D 0
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ FDCT8_1D 1
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ DIVIDE_ROUND_2X 0, 1, 9, 10
+ DIVIDE_ROUND_2X 2, 3, 9, 10
+ DIVIDE_ROUND_2X 4, 5, 9, 10
+ DIVIDE_ROUND_2X 6, 7, 9, 10
+
+ mova [outputq + 0], m0
+ mova [outputq + 16], m1
+ mova [outputq + 32], m2
+ mova [outputq + 48], m3
+ mova [outputq + 64], m4
+ mova [outputq + 80], m5
+ mova [outputq + 96], m6
+ mova [outputq + 112], m7
+
+ RET
+%endif
diff --git a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/vpx_dsp/x86/highbd_loopfilter_sse2.c
similarity index 96%
rename from vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
rename to vpx_dsp/x86/highbd_loopfilter_sse2.c
index b40669c..c4fd5e1 100644
--- a/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c
@@ -10,9 +10,8 @@
#include <emmintrin.h> // SSE2
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_loopfilter.h"
#include "vpx_ports/emmintrin_compat.h"
static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
@@ -509,7 +508,7 @@
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -520,7 +519,7 @@
highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
}
-void vp9_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -688,7 +687,7 @@
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
filt = _mm_adds_epi16(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = signed_char_clamp_bd_sse2(filt, bd);
filt = _mm_and_si128(filt, mask);
@@ -757,7 +756,7 @@
_mm_store_si128((__m128i *)(s + 2 * p), q2);
}
-void vp9_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_8_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -765,12 +764,12 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
+ vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
1, bd);
}
-void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
@@ -892,7 +891,7 @@
filt = _mm_adds_epi16(filt, work_a);
filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd);
@@ -937,7 +936,7 @@
_mm_storeu_si128((__m128i *)(s + 1 * p), q1);
}
-void vp9_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_horizontal_4_dual_sse2(uint16_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -945,8 +944,8 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
int bd) {
- vp9_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vp9_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
+ vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
bd);
}
@@ -1055,7 +1054,7 @@
highbd_transpose(src1, in_p, dest1, out_p, 1);
}
-void vp9_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1072,7 +1071,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
+ vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@@ -1082,7 +1081,7 @@
highbd_transpose(src, 8, dst, p, 1);
}
-void vp9_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_4_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -1098,7 +1097,7 @@
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
+ vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1109,7 +1108,7 @@
highbd_transpose(src, 16, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1126,7 +1125,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
+ vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
bd);
src[0] = t_dst;
@@ -1136,7 +1135,7 @@
highbd_transpose(src, 8, dst, p, 1);
}
-void vp9_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_8_dual_sse2(uint16_t *s, int p,
const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
@@ -1152,7 +1151,7 @@
highbd_transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
+ vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
thresh0, blimit1, limit1, thresh1, bd);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1164,7 +1163,7 @@
highbd_transpose(src, 16, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
+void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
@@ -1193,7 +1192,7 @@
highbd_transpose(src, 8, dst, p, 2);
}
-void vp9_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
+void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
int p,
const uint8_t *blimit,
const uint8_t *limit,
diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
similarity index 97%
rename from vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
rename to vpx_dsp/x86/highbd_quantize_intrin_sse2.c
index 0174cfe..06c748d 100644
--- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@@ -10,8 +10,9 @@
#include <emmintrin.h>
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_common.h"
#if CONFIG_VP9_HIGHBITDEPTH
// from vp9_idct.h: typedef int32_t tran_low_t;
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vpx_dsp/x86/loopfilter_avx2.c
similarity index 98%
rename from vp9/common/x86/vp9_loopfilter_intrin_avx2.c
rename to vpx_dsp/x86/loopfilter_avx2.c
index 770a65f..23a97dd 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/vpx_dsp/x86/loopfilter_avx2.c
@@ -10,7 +10,7 @@
#include <immintrin.h> /* AVX2 */
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
@@ -103,7 +103,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ /* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -515,7 +515,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- /* (vp9_filter + 3 * (qs0 - ps0)) & mask */
+ /* (vpx_filter + 3 * (qs0 - ps0)) & mask */
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -976,7 +976,7 @@
}
}
-void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh, int count) {
if (count == 1)
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vpx_dsp/x86/loopfilter_mmx.asm
similarity index 98%
rename from vp9/common/x86/vp9_loopfilter_mmx.asm
rename to vpx_dsp/x86/loopfilter_mmx.asm
index f5f7d5a..b9c18b6 100644
--- a/vp9/common/x86/vp9_loopfilter_mmx.asm
+++ b/vpx_dsp/x86/loopfilter_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
-;void vp9_lpf_horizontal_4_mmx
+;void vpx_lpf_horizontal_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -21,8 +21,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
-sym(vp9_lpf_horizontal_4_mmx):
+global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
+sym(vpx_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -224,7 +224,7 @@
ret
-;void vp9_lpf_vertical_4_mmx
+;void vpx_lpf_vertical_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -233,8 +233,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_lpf_vertical_4_mmx) PRIVATE
-sym(vp9_lpf_vertical_4_mmx):
+global sym(vpx_lpf_vertical_4_mmx) PRIVATE
+sym(vpx_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vpx_dsp/x86/loopfilter_sse2.c
similarity index 97%
rename from vp9/common/x86/vp9_loopfilter_intrin_sse2.c
rename to vpx_dsp/x86/loopfilter_sse2.c
index fe8af54..ed10127 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -10,8 +10,8 @@
#include <emmintrin.h> // SSE2
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_loopfilter.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/emmintrin_compat.h"
static INLINE __m128i abs_diff(__m128i a, __m128i b) {
@@ -100,7 +100,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -495,7 +495,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
filter2 = _mm_adds_epi8(filt, t3);
@@ -717,7 +717,7 @@
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@@ -727,7 +727,7 @@
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
-void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
@@ -874,7 +874,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -943,7 +943,7 @@
}
}
-void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
+void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
const uint8_t *_blimit0,
const uint8_t *_limit0,
const uint8_t *_thresh0,
@@ -1115,7 +1115,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -1190,7 +1190,7 @@
}
}
-void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
+void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
const unsigned char *_blimit0,
const unsigned char *_limit0,
const unsigned char *_thresh0,
@@ -1286,7 +1286,7 @@
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
filt = _mm_adds_epi8(filt, work_a);
- // (vp9_filter + 3 * (qs0 - ps0)) & mask
+ // (vpx_filter + 3 * (qs0 - ps0)) & mask
filt = _mm_and_si128(filt, mask);
filter1 = _mm_adds_epi8(filt, t4);
@@ -1464,7 +1464,7 @@
} while (++idx8x8 < num_8x8_to_transpose);
}
-void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@@ -1478,7 +1478,7 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1489,7 +1489,7 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh, int count) {
@@ -1505,7 +1505,7 @@
transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
+ vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1514,7 +1514,7 @@
transpose(src, 8, dst, p, 1);
}
-void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0,
const uint8_t *thresh0,
const uint8_t *blimit1,
@@ -1528,7 +1528,7 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1540,7 +1540,7 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_16_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh) {
@@ -1568,7 +1568,7 @@
transpose(src, 8, dst, p, 2);
}
-void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
+void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) {
DECLARE_ALIGNED(16, unsigned char, t_dst[256]);
diff --git a/vpx_dsp/x86/quantize_sse2.c b/vpx_dsp/x86/quantize_sse2.c
new file mode 100644
index 0000000..8d51aeb
--- /dev/null
+++ b/vpx_dsp/x86/quantize_sse2.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t* zbin_ptr,
+ const int16_t* round_ptr, const int16_t* quant_ptr,
+ const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
+ int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan_ptr,
+ const int16_t* iscan_ptr) {
+ __m128i zero;
+ (void)scan_ptr;
+
+ coeff_ptr += n_coeffs;
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+ if (!skip_block) {
+ __m128i eob;
+ __m128i zbin;
+ __m128i round, quant, dequant, shift;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ __m128i pw_1;
+ zbin = _mm_load_si128((const __m128i*)zbin_ptr);
+ round = _mm_load_si128((const __m128i*)round_ptr);
+ quant = _mm_load_si128((const __m128i*)quant_ptr);
+ pw_1 = _mm_set1_epi16(1);
+ zbin = _mm_sub_epi16(zbin, pw_1);
+ dequant = _mm_load_si128((const __m128i*)dequant_ptr);
+ shift = _mm_load_si128((const __m128i*)quant_shift_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+ // Do DC and first 15 AC
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ shift = _mm_unpackhi_epi64(shift, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ __m128i cmp_mask0, cmp_mask1;
+
+ coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin);
+ cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin);
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ qtmp0 = _mm_add_epi16(qtmp0, qcoeff0);
+ qtmp1 = _mm_add_epi16(qtmp1, qcoeff1);
+ qcoeff0 = _mm_mulhi_epi16(qtmp0, shift);
+ qcoeff1 = _mm_mulhi_epi16(qtmp1, shift);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qcoeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ // Mask out zbin threshold coeffs
+ qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
+ qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
+
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/vpx_dsp/x86/quantize_ssse3_x86_64.asm b/vpx_dsp/x86/quantize_ssse3_x86_64.asm
new file mode 100644
index 0000000..3784d9d
--- /dev/null
+++ b/vpx_dsp/x86/quantize_ssse3_x86_64.asm
@@ -0,0 +1,216 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+
+SECTION .text
+
+; TODO(yunqingwang)fix quantize_b code for skip=1 case.
+%macro QUANTIZE_FN 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, \
+ eob, scan, iscan
+ cmp dword skipm, 0
+ jne .blank
+
+ ; actual quantize loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m0, [zbinq] ; m0 = zbin
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+%ifidn %1, b_32x32
+ pcmpeqw m5, m5
+ psrlw m5, 15
+ paddw m0, m5
+ paddw m1, m5
+ psrlw m0, 1 ; m0 = (m0 + 1) / 2
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ mova m3, [r2q] ; m3 = dequant
+ psubw m0, [pw_1]
+ mov r2, shiftmp
+ mov r3, qcoeffmp
+ mova m4, [r2] ; m4 = shift
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, b_32x32
+ psllw m4, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea iscanq, [ iscanq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ punpckhqdq m0, m0
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m8, m6 ; m8 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m8, m4 ; m8 = m8*qsh>>16
+ punpckhqdq m4, m4
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m8, m7
+ pand m13, m12
+ mova [qcoeffq+ncoeffq*2+ 0], m8
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m8
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jz .accumulate_eob
+
+.ac_only_loop:
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+%ifidn %1, b_32x32
+ pmovmskb r6d, m7
+ pmovmskb r2d, m12
+ or r6, r2
+ jz .skip_iter
+%endif
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m14, m6 ; m14 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m14, m4 ; m14 = m14*qsh>>16
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m14, m7
+ pand m13, m12
+ mova [qcoeffq+ncoeffq*2+ 0], m14
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; dqc[i] = qc[i] * q
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m14
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+%ifidn %1, b_32x32
+ jmp .accumulate_eob
+.skip_iter:
+ mova [qcoeffq+ncoeffq*2+ 0], m5
+ mova [qcoeffq+ncoeffq*2+16], m5
+ mova [dqcoeffq+ncoeffq*2+ 0], m5
+ mova [dqcoeffq+ncoeffq*2+16], m5
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+%endif
+
+.accumulate_eob:
+ ; horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ pextrw r6, m8, 0
+ mov [r2], r6
+ RET
+
+ ; skip-block, i.e. just write all zeroes
+.blank:
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+ DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ neg ncoeffq
+ pxor m7, m7
+.blank_loop:
+ mova [dqcoeffq+ncoeffq*2+ 0], m7
+ mova [dqcoeffq+ncoeffq*2+16], m7
+ mova [qcoeffq+ncoeffq*2+ 0], m7
+ mova [qcoeffq+ncoeffq*2+16], m7
+ add ncoeffq, mmsize
+ jl .blank_loop
+ mov word [eobq], 0
+ RET
+%endmacro
+
+INIT_XMM ssse3
+QUANTIZE_FN b, 7
+QUANTIZE_FN b_32x32, 7
diff --git a/vpx_util/endian_inl.h b/vpx_util/endian_inl.h
index 91753a6..12cc720 100644
--- a/vpx_util/endian_inl.h
+++ b/vpx_util/endian_inl.h
@@ -44,9 +44,13 @@
#if defined(WORDS_BIGENDIAN)
#define HToLE32 BSwap32
#define HToLE16 BSwap16
+#define HToBE64(x) (x)
+#define HToBE32(x) (x)
#else
#define HToLE32(x) (x)
#define HToLE16(x) (x)
+#define HToBE64(X) BSwap64(X)
+#define HToBE32(X) BSwap32(X)
#endif
// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64