Merge "Fix linker warnings for bilinear filters"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index bb7ab41..f361021 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1062,7 +1062,7 @@
setup_gnu_toolchain
add_cflags -use-msasm -use-asm
add_ldflags -i-static
- enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2
+ enabled x86_64 && add_cflags -ipo -static -O3
enabled x86_64 && AR=xiar
case ${tune_cpu} in
atom*)
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
index 6cc3684..9a8d97e 100755
--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -290,9 +290,11 @@
{
$(set_function_pointers c $ALL_ARCHS)
#if HAVE_DSPR2
+#if CONFIG_VP8
void dsputil_static_init();
dsputil_static_init();
#endif
+#endif
}
#endif
$(common_bottom)
diff --git a/build/make/thumb.pm b/build/make/thumb.pm
index f347287..e1f34c1 100644
--- a/build/make/thumb.pm
+++ b/build/make/thumb.pm
@@ -47,7 +47,7 @@
# this is used, it's used for two subsequent load instructions,
# where a hand-written version of it could merge two subsequent
# add and sub instructions.
- s/^(\s*)((ldr|str)(ne)?)(\s+)(r\d+),\s*\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6, [$7]\n$1add$4$5$7, $7, $8/g;
+ s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g;
# Convert register post indexing to a separate add instruction.
# This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
diff --git a/examples.mk b/examples.mk
index c17fac9..7b47ade 100644
--- a/examples.mk
+++ b/examples.mk
@@ -49,9 +49,9 @@
UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
-UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c
-vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder
+UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c
+vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
+vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder
# Clean up old ivfenc, ivfdec binaries.
ifeq ($(CONFIG_MSVS),yes)
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index 5fc5ed2..27cfe86 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -105,7 +105,7 @@
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
int size;
for (size = 4; size > 1; size--) {
- if (bin & 0x000000ff << ((size - 1) * 8))
+ if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
break;
}
Ebml_WriteID(glob, class_id);
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 7d49c12..0d19aa0 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -258,7 +258,7 @@
}
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
-typedef void (*idct_t)(int16_t *in, uint8_t *out, int stride);
+typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
@@ -509,7 +509,8 @@
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16DCT,
::testing::Values(
- make_tuple(&vp9_short_fdct16x16_sse2, &vp9_short_idct16x16_add_c, 0)));
+ make_tuple(&vp9_short_fdct16x16_sse2,
+ &vp9_short_idct16x16_add_sse2, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index ee6c9f6..7edb4d0 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,242 +13,309 @@
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
-#include "vpx_ports/mem.h"
+#include "test/util.h"
extern "C" {
+#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
}
-
-#include "test/acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
-void fdct8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
- int stride, int /*tx_type*/) {
+typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
+typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
+typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
+typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
+
+void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fdct8x8_c(in, out, stride);
}
-void idct8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
- int stride, int /*tx_type*/) {
- vp9_short_idct8x8_add_c(out, dst, stride >> 1);
-}
-void fht8x8(int16_t *in, int16_t *out, uint8_t* /*dst*/,
- int stride, int tx_type) {
- // TODO(jingning): need to refactor this to test both _c and _sse2 functions,
- // when we have all inverse dct functions done sse2.
-#if HAVE_SSE2
- vp9_short_fht8x8_sse2(in, out, stride >> 1, tx_type);
-#else
- vp9_short_fht8x8_c(in, out, stride >> 1, tx_type);
-#endif
-}
-void iht8x8_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- vp9_short_iht8x8_add_c(out, dst, stride >> 1, tx_type);
+
+void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
+ vp9_short_fht8x8_c(in, out, stride, tx_type);
}
-class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
+class FwdTrans8x8TestBase {
public:
- virtual ~FwdTrans8x8Test() {}
- virtual void SetUp() {
- tx_type_ = GetParam();
- if (tx_type_ == 0) {
- fwd_txfm = fdct8x8;
- inv_txfm = idct8x8_add;
- } else {
- fwd_txfm = fht8x8;
- inv_txfm = iht8x8_add;
- }
- }
- virtual void TearDown() { libvpx_test::ClearSystemState(); }
+ virtual ~FwdTrans8x8TestBase() {}
protected:
- void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- (*fwd_txfm)(in, out, dst, stride, tx_type);
- }
- void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
- int stride, int tx_type) {
- (*inv_txfm)(in, out, dst, stride, tx_type);
- }
+ virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
+ virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
- int tx_type_;
- void (*fwd_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
- void (*inv_txfm)(int16_t*, int16_t*, uint8_t*, int, int);
-};
+ void RunSignBiasCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
+ int count_sign_block[64][2];
+ const int count_test_block = 100000;
-TEST_P(FwdTrans8x8Test, SignBiasCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
- const int pitch = 16;
- int count_sign_block[64][2];
- const int count_test_block = 100000;
+ memset(count_sign_block, 0, sizeof(count_sign_block));
- memset(count_sign_block, 0, sizeof(count_sign_block));
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = rnd.Rand8() - rnd.Rand8();
+ REGISTER_STATE_CHECK(
+ RunFwdTxfm(test_input_block, test_output_block, pitch_));
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < 64; ++j)
- test_input_block[j] = rnd.Rand8() - rnd.Rand8();
- REGISTER_STATE_CHECK(
- RunFwdTxfm(test_input_block, test_output_block,
- NULL, pitch, tx_type_));
+ for (int j = 0; j < 64; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
for (int j = 0; j < 64; ++j) {
- if (test_output_block[j] < 0)
- ++count_sign_block[j][0];
- else if (test_output_block[j] > 0)
- ++count_sign_block[j][1];
+ const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+ const int max_diff = 1125;
+ EXPECT_LT(diff, max_diff)
+ << "Error: 8x8 FDCT/FHT has a sign bias > "
+ << 1. * max_diff / count_test_block * 100 << "%"
+ << " for input range [-255, 255] at index " << j
+ << " count0: " << count_sign_block[j][0]
+ << " count1: " << count_sign_block[j][1]
+ << " diff: " << diff;
+ }
+
+ memset(count_sign_block, 0, sizeof(count_sign_block));
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-15, 15].
+ for (int j = 0; j < 64; ++j)
+ test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+ REGISTER_STATE_CHECK(
+ RunFwdTxfm(test_input_block, test_output_block, pitch_));
+
+ for (int j = 0; j < 64; ++j) {
+ if (test_output_block[j] < 0)
+ ++count_sign_block[j][0];
+ else if (test_output_block[j] > 0)
+ ++count_sign_block[j][1];
+ }
+ }
+
+ for (int j = 0; j < 64; ++j) {
+ const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+ const int max_diff = 10000;
+ EXPECT_LT(diff, max_diff)
+ << "Error: 4x4 FDCT/FHT has a sign bias > "
+ << 1. * max_diff / count_test_block * 100 << "%"
+ << " for input range [-15, 15] at index " << j
+ << " count0: " << count_sign_block[j][0]
+ << " count1: " << count_sign_block[j][1]
+ << " diff: " << diff;
}
}
- for (int j = 0; j < 64; ++j) {
- const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
- const int max_diff = 1125;
- EXPECT_LT(diff, max_diff)
- << "Error: 8x8 FDCT/FHT has a sign bias > "
- << 1. * max_diff / count_test_block * 100 << "%"
- << " for input range [-255, 255] at index " << j
- << " count0: " << count_sign_block[j][0]
- << " count1: " << count_sign_block[j][1]
- << " diff: " << diff;
- }
-
- memset(count_sign_block, 0, sizeof(count_sign_block));
-
- for (int i = 0; i < count_test_block; ++i) {
- // Initialize a test block with input range [-15, 15].
- for (int j = 0; j < 64; ++j)
- test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
- REGISTER_STATE_CHECK(
- RunFwdTxfm(test_input_block, test_output_block,
- NULL, pitch, tx_type_));
-
- for (int j = 0; j < 64; ++j) {
- if (test_output_block[j] < 0)
- ++count_sign_block[j][0];
- else if (test_output_block[j] > 0)
- ++count_sign_block[j][1];
- }
- }
-
- for (int j = 0; j < 64; ++j) {
- const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
- const int max_diff = 10000;
- EXPECT_LT(diff, max_diff)
- << "Error: 4x4 FDCT/FHT has a sign bias > "
- << 1. * max_diff / count_test_block * 100 << "%"
- << " for input range [-15, 15] at index " << j
- << " count0: " << count_sign_block[j][0]
- << " count1: " << count_sign_block[j][1]
- << " diff: " << diff;
- }
-}
-
-TEST_P(FwdTrans8x8Test, RoundTripErrorCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int max_error = 0;
- int total_error = 0;
- const int count_test_block = 100000;
- for (int i = 0; i < count_test_block; ++i) {
+ void RunRoundTripErrorCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ int total_error = 0;
+ const int count_test_block = 100000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
- for (int j = 0; j < 64; ++j) {
- src[j] = rnd.Rand8();
- dst[j] = rnd.Rand8();
- }
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < 64; ++j)
- test_input_block[j] = src[j] - dst[j];
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j) {
+ src[j] = rnd.Rand8();
+ dst[j] = rnd.Rand8();
+ test_input_block[j] = src[j] - dst[j];
+ }
- const int pitch = 16;
- REGISTER_STATE_CHECK(
- RunFwdTxfm(test_input_block, test_temp_block,
- dst, pitch, tx_type_));
- for (int j = 0; j < 64; ++j) {
- if (test_temp_block[j] > 0) {
- test_temp_block[j] += 2;
- test_temp_block[j] /= 4;
- test_temp_block[j] *= 4;
- } else {
- test_temp_block[j] -= 2;
- test_temp_block[j] /= 4;
- test_temp_block[j] *= 4;
- }
- }
- REGISTER_STATE_CHECK(
- RunInvTxfm(test_input_block, test_temp_block,
- dst, pitch, tx_type_));
+ REGISTER_STATE_CHECK(
+ RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+ for (int j = 0; j < 64; ++j) {
+ if (test_temp_block[j] > 0) {
+ test_temp_block[j] += 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ } else {
+ test_temp_block[j] -= 2;
+ test_temp_block[j] /= 4;
+ test_temp_block[j] *= 4;
+ }
+ }
+ REGISTER_STATE_CHECK(
+ RunInvTxfm(test_temp_block, dst, pitch_));
- for (int j = 0; j < 64; ++j) {
- const int diff = dst[j] - src[j];
- const int error = diff * diff;
- if (max_error < error)
- max_error = error;
- total_error += error;
- }
- }
-
- EXPECT_GE(1, max_error)
- << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual roundtrip error > 1";
-
- EXPECT_GE(count_test_block/5, total_error)
- << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
- "error > 1/5 per block";
-}
-
-TEST_P(FwdTrans8x8Test, ExtremalCheck) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int max_error = 0;
- int total_error = 0;
- const int count_test_block = 100000;
- for (int i = 0; i < count_test_block; ++i) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
- DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
- DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
- DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
-
- for (int j = 0; j < 64; ++j) {
- src[j] = rnd.Rand8() % 2 ? 255 : 0;
- dst[j] = src[j] > 0 ? 0 : 255;
- }
- // Initialize a test block with input range [-255, 255].
- for (int j = 0; j < 64; ++j)
- test_input_block[j] = src[j] - dst[j];
-
- const int pitch = 16;
- REGISTER_STATE_CHECK(
- RunFwdTxfm(test_input_block, test_temp_block,
- dst, pitch, tx_type_));
- REGISTER_STATE_CHECK(
- RunInvTxfm(test_input_block, test_temp_block,
- dst, pitch, tx_type_));
-
- for (int j = 0; j < 64; ++j) {
- const int diff = dst[j] - src[j];
- const int error = diff * diff;
- if (max_error < error)
- max_error = error;
- total_error += error;
+ for (int j = 0; j < 64; ++j) {
+ const int diff = dst[j] - src[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
}
EXPECT_GE(1, max_error)
- << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has an"
- << " individual roundtrip error > 1";
+ << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
+ << " roundtrip error > 1";
EXPECT_GE(count_test_block/5, total_error)
- << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
- << " roundtrip error > 1/5 per block";
+ << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
+ << "error > 1/5 per block";
}
+
+ void RunExtremalCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int max_error = 0;
+ int total_error = 0;
+ const int count_test_block = 100000;
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ for (int j = 0; j < 64; ++j) {
+ src[j] = rnd.Rand8() % 2 ? 255 : 0;
+ dst[j] = src[j] > 0 ? 0 : 255;
+ test_input_block[j] = src[j] - dst[j];
+ }
+
+ REGISTER_STATE_CHECK(
+ RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+ REGISTER_STATE_CHECK(
+ RunInvTxfm(test_temp_block, dst, pitch_));
+
+ for (int j = 0; j < 64; ++j) {
+ const int diff = dst[j] - src[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ total_error += error;
+ }
+
+ EXPECT_GE(1, max_error)
+ << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
+ << "an individual roundtrip error > 1";
+
+ EXPECT_GE(count_test_block/5, total_error)
+ << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
+ << " roundtrip error > 1/5 per block";
+ }
+ }
+
+ int pitch_;
+ int tx_type_;
+ fht_t fwd_txfm_ref;
+};
+
+class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
+ public PARAMS(fdct_t, idct_t, int) {
+ public:
+ virtual ~FwdTrans8x8DCT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 16;
+ fwd_txfm_ref = fdct8x8_ref;
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
+ fwd_txfm_(in, out, stride);
+ }
+ void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride >> 1);
+ }
+
+ fdct_t fwd_txfm_;
+ idct_t inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
+ RunSignBiasCheck();
}
-INSTANTIATE_TEST_CASE_P(VP9, FwdTrans8x8Test, ::testing::Range(0, 4));
+TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
+ RunRoundTripErrorCheck();
+}
+
+TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
+ RunExtremalCheck();
+}
+
+class FwdTrans8x8HT : public FwdTrans8x8TestBase,
+ public PARAMS(fht_t, iht_t, int) {
+ public:
+ virtual ~FwdTrans8x8HT() {}
+
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ tx_type_ = GET_PARAM(2);
+ pitch_ = 8;
+ fwd_txfm_ref = fht8x8_ref;
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
+ fwd_txfm_(in, out, stride, tx_type_);
+ }
+ void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
+ inv_txfm_(out, dst, stride, tx_type_);
+ }
+
+ fht_t fwd_txfm_;
+ iht_t inv_txfm_;
+};
+
+TEST_P(FwdTrans8x8HT, SignBiasCheck) {
+ RunSignBiasCheck();
+}
+
+TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
+ RunRoundTripErrorCheck();
+}
+
+TEST_P(FwdTrans8x8HT, ExtremalCheck) {
+ RunExtremalCheck();
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+ C, FwdTrans8x8DCT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
+INSTANTIATE_TEST_CASE_P(
+ C, FwdTrans8x8HT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 0),
+ make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 1),
+ make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 2),
+ make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 3)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2, FwdTrans8x8DCT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
+INSTANTIATE_TEST_CASE_P(
+ SSE2, FwdTrans8x8HT,
+ ::testing::Values(
+ make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 0),
+ make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 1),
+ make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 2),
+ make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 3)));
+#endif
} // namespace
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
index f5f6d5b..5fba700 100644
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -34,13 +34,17 @@
}
protected:
- void SetupMacroblock(uint8_t *data, int block_size, int stride,
+ void SetupMacroblock(MACROBLOCKD *mbptr,
+ MODE_INFO *miptr,
+ uint8_t *data,
+ int block_size,
+ int stride,
int num_planes) {
- memset(&mb_, 0, sizeof(mb_));
- memset(&mi_, 0, sizeof(mi_));
- mb_.up_available = 1;
- mb_.left_available = 1;
- mb_.mode_info_context = &mi_;
+ mbptr_ = mbptr;
+ miptr_ = miptr;
+ mbptr_->up_available = 1;
+ mbptr_->left_available = 1;
+ mbptr_->mode_info_context = miptr_;
stride_ = stride;
block_size_ = block_size;
num_planes_ = num_planes;
@@ -63,14 +67,14 @@
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
void SetLeftUnavailable() {
- mb_.left_available = 0;
+ mbptr_->left_available = 0;
for (int p = 0; p < num_planes_; p++)
for (int i = -1; i < block_size_; ++i)
data_ptr_[p][stride_ * i - 1] = 129;
}
void SetTopUnavailable() {
- mb_.up_available = 0;
+ mbptr_->up_available = 0;
for (int p = 0; p < num_planes_; p++)
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
}
@@ -96,13 +100,13 @@
for (int p = 0; p < num_planes_; p++) {
// calculate expected DC
int expected;
- if (mb_.up_available || mb_.left_available) {
- int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
- mb_.left_available;
- if (mb_.up_available)
+ if (mbptr_->up_available || mbptr_->left_available) {
+ int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
+ mbptr_->left_available;
+ if (mbptr_->up_available)
for (int x = 0; x < block_size_; x++)
sum += data_ptr_[p][x - stride_];
- if (mb_.left_available)
+ if (mbptr_->left_available)
for (int y = 0; y < block_size_; y++)
sum += data_ptr_[p][y * stride_ - 1];
expected = (sum + (1 << (shift - 1))) >> shift;
@@ -209,8 +213,8 @@
}
}
- MACROBLOCKD mb_;
- MODE_INFO mi_;
+ MACROBLOCKD *mbptr_;
+ MODE_INFO *miptr_;
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
int stride_;
int block_size_;
@@ -228,12 +232,18 @@
protected IntraPredBase {
public:
static void SetUpTestCase() {
+ mb_ = reinterpret_cast<MACROBLOCKD*>(
+ vpx_memalign(32, sizeof(MACROBLOCKD)));
+ mi_ = reinterpret_cast<MODE_INFO*>(
+ vpx_memalign(32, sizeof(MODE_INFO)));
data_array_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
static void TearDownTestCase() {
vpx_free(data_array_);
+ vpx_free(mi_);
+ vpx_free(mb_);
data_array_ = NULL;
}
@@ -250,12 +260,12 @@
virtual void SetUp() {
pred_fn_ = GetParam();
- SetupMacroblock(data_array_, kBlockSize, kStride, 1);
+ SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
}
virtual void Predict(MB_PREDICTION_MODE mode) {
- mb_.mode_info_context->mbmi.mode = mode;
- REGISTER_STATE_CHECK(pred_fn_(&mb_,
+ mbptr_->mode_info_context->mbmi.mode = mode;
+ REGISTER_STATE_CHECK(pred_fn_(mbptr_,
data_ptr_[0] - kStride,
data_ptr_[0] - 1, kStride,
data_ptr_[0], kStride));
@@ -263,8 +273,12 @@
intra_pred_y_fn_t pred_fn_;
static uint8_t* data_array_;
+ static MACROBLOCKD * mb_;
+ static MODE_INFO *mi_;
};
+MACROBLOCKD* IntraPredYTest::mb_ = NULL;
+MODE_INFO* IntraPredYTest::mi_ = NULL;
uint8_t* IntraPredYTest::data_array_ = NULL;
TEST_P(IntraPredYTest, IntraPredTests) {
@@ -299,12 +313,18 @@
protected IntraPredBase {
public:
static void SetUpTestCase() {
+ mb_ = reinterpret_cast<MACROBLOCKD*>(
+ vpx_memalign(32, sizeof(MACROBLOCKD)));
+ mi_ = reinterpret_cast<MODE_INFO*>(
+ vpx_memalign(32, sizeof(MODE_INFO)));
data_array_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBufferSize));
}
static void TearDownTestCase() {
vpx_free(data_array_);
+ vpx_free(mi_);
+ vpx_free(mb_);
data_array_ = NULL;
}
@@ -322,12 +342,12 @@
virtual void SetUp() {
pred_fn_ = GetParam();
- SetupMacroblock(data_array_, kBlockSize, kStride, 2);
+ SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
}
virtual void Predict(MB_PREDICTION_MODE mode) {
- mb_.mode_info_context->mbmi.uv_mode = mode;
- pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
+ mbptr_->mode_info_context->mbmi.uv_mode = mode;
+ pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
data_ptr_[0], data_ptr_[1], kStride);
}
@@ -340,8 +360,12 @@
// We use 9 lines so we have one line above us for top-prediction.
// [0] = U, [1] = V
static uint8_t* data_array_;
+ static MACROBLOCKD* mb_;
+ static MODE_INFO* mi_;
};
+MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
+MODE_INFO* IntraPredUVTest::mi_ = NULL;
uint8_t* IntraPredUVTest::data_array_ = NULL;
TEST_P(IntraPredUVTest, IntraPredTests) {
diff --git a/test/variance_test.cc b/test/variance_test.cc
index ca53ffb..3f55601 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -78,34 +78,6 @@
return sse - (((int64_t) se * se) >> (l2w + l2h));
}
-static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
- const uint8_t *src,
- const uint8_t *second_pred,
- int l2w, int l2h,
- int xoff, int yoff,
- unsigned int *sse_ptr) {
- int se = 0;
- unsigned int sse = 0;
- const int w = 1 << l2w, h = 1 << l2h;
- for (int y = 0; y < h; y++) {
- for (int x = 0; x < w; x++) {
- // bilinear interpolation at a 16th pel step
- const int a1 = ref[(w + 1) * (y + 0) + x + 0];
- const int a2 = ref[(w + 1) * (y + 0) + x + 1];
- const int b1 = ref[(w + 1) * (y + 1) + x + 0];
- const int b2 = ref[(w + 1) * (y + 1) + x + 1];
- const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
- const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
- const int r = a + (((b - a) * yoff + 8) >> 4);
- int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
- se += diff;
- sse += diff * diff;
- }
- }
- *sse_ptr = sse;
- return sse - (((int64_t) se * se) >> (l2w + l2h));
-}
-
template<typename VarianceFunctionType>
class VarianceTest
: public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
@@ -190,6 +162,36 @@
EXPECT_EQ(expected, var);
}
+#if CONFIG_VP9_ENCODER
+
+unsigned int subpel_avg_variance_ref(const uint8_t *ref,
+ const uint8_t *src,
+ const uint8_t *second_pred,
+ int l2w, int l2h,
+ int xoff, int yoff,
+ unsigned int *sse_ptr) {
+ int se = 0;
+ unsigned int sse = 0;
+ const int w = 1 << l2w, h = 1 << l2h;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ // bilinear interpolation at a 16th pel step
+ const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ *sse_ptr = sse;
+ return sse - (((int64_t) se * se) >> (l2w + l2h));
+}
+
template<typename SubpelVarianceFunctionType>
class SubpelVarianceTest
: public ::testing::TestWithParam<tuple<int, int,
@@ -280,6 +282,8 @@
}
}
+#endif // CONFIG_VP9_ENCODER
+
// -----------------------------------------------------------------------------
// VP8 test cases.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index b9f4830..9e4918a 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -598,6 +598,10 @@
global %1:function hidden
%elifidn __OUTPUT_FORMAT__,elf64
global %1:function hidden
+ %elifidn __OUTPUT_FORMAT__,macho32
+ global %1:private_extern
+ %elifidn __OUTPUT_FORMAT__,macho64
+ global %1:private_extern
%else
global %1
%endif
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 06ef060..c60e463 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -124,7 +124,7 @@
b += 16;
}
- return (cur_mb->bmi + b - 4)->mv.as_int;
+ return (cur_mb->bmi + (b - 4))->mv.as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 43f84d0..bac3c94 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -138,14 +138,10 @@
{
for (r = 0; r < 4; r++)
{
-#if !(CONFIG_FAST_UNALIGNED)
pred_ptr[0] = ptr[0];
pred_ptr[1] = ptr[1];
pred_ptr[2] = ptr[2];
pred_ptr[3] = ptr[3];
-#else
- *(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
-#endif
pred_ptr += pitch;
ptr += pre_stride;
}
@@ -196,16 +192,12 @@
{
for (r = 0; r < 4; r++)
{
-#if !(CONFIG_FAST_UNALIGNED)
dst[0] = ptr[0];
dst[1] = ptr[1];
dst[2] = ptr[2];
dst[3] = ptr[3];
-#else
- *(uint32_t *)dst = *(uint32_t *)ptr ;
-#endif
- dst += dst_stride;
- ptr += pre_stride;
+ dst += dst_stride;
+ ptr += pre_stride;
}
}
}
@@ -270,7 +262,7 @@
+ x->block[yoffset+4].bmi.mv.as_mv.row
+ x->block[yoffset+5].bmi.mv.as_mv.row;
- temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
+ temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
@@ -279,7 +271,7 @@
+ x->block[yoffset+4].bmi.mv.as_mv.col
+ x->block[yoffset+5].bmi.mv.as_mv.col;
- temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
+ temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
@@ -558,7 +550,7 @@
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
- temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
+ temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
@@ -567,7 +559,7 @@
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
- temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);
+ temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index c0416b7..b409293 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -611,16 +611,12 @@
for (r = 0; r < 4; r++)
{
- #if !(CONFIG_FAST_UNALIGNED)
dst_ptr[0] = src_ptr[0];
dst_ptr[1] = src_ptr[1];
dst_ptr[2] = src_ptr[2];
dst_ptr[3] = src_ptr[3];
- #else
- *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
- #endif
- dst_ptr += dst_pitch;
- src_ptr += src_pixels_per_line;
+ dst_ptr += dst_pitch;
+ src_ptr += src_pixels_per_line;
}
}
}
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 8027a07..759d842 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -110,8 +110,8 @@
static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
{
- mv->row = (short)(read_mvcomponent(r, mvc) << 1);
- mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
+ mv->row = (short)(read_mvcomponent(r, mvc) * 2);
+ mv->col = (short)(read_mvcomponent(r, ++mvc) * 2);
}
@@ -292,9 +292,9 @@
blockmv.as_int = 0;
if( vp8_read(bc, prob[2]) )
{
- blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) << 1;
+ blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2;
blockmv.as_mv.row += best_mv.as_mv.row;
- blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
+ blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2;
blockmv.as_mv.col += best_mv.as_mv.col;
}
}
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 0050c11..50ee9c5 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -576,7 +576,7 @@
xd->left_available = 0;
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 5f0c1f7..8ca4f5f 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -432,7 +432,7 @@
assert(NEARESTMV <= m && m <= SPLITMV);
#endif
vp8_write_token(w, vp8_mv_ref_tree, p,
- vp8_mv_ref_encoding_array - NEARESTMV + m);
+ vp8_mv_ref_encoding_array + (m - NEARESTMV));
}
static void write_sub_mv_ref
@@ -444,7 +444,7 @@
assert(LEFT4X4 <= m && m <= NEW4X4);
#endif
vp8_write_token(w, vp8_sub_mv_ref_tree, p,
- vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
+ vp8_sub_mv_ref_encoding_array + (m - LEFT4X4));
}
static void write_mv
@@ -577,7 +577,7 @@
*/
xd->mb_to_left_edge = -((mb_col * 16) << 3);
xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
- xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
#ifdef VP8_ENTROPY_STATS
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index b5a11ae..091554a 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -20,10 +20,10 @@
for (i = 0; i < 4; i++)
{
- a1 = ((ip[0] + ip[3])<<3);
- b1 = ((ip[1] + ip[2])<<3);
- c1 = ((ip[1] - ip[2])<<3);
- d1 = ((ip[0] - ip[3])<<3);
+ a1 = ((ip[0] + ip[3]) * 8);
+ b1 = ((ip[1] + ip[2]) * 8);
+ c1 = ((ip[1] - ip[2]) * 8);
+ d1 = ((ip[0] - ip[3]) * 8);
op[0] = a1 + b1;
op[2] = a1 - b1;
@@ -72,10 +72,10 @@
for (i = 0; i < 4; i++)
{
- a1 = ((ip[0] + ip[2])<<2);
- d1 = ((ip[1] + ip[3])<<2);
- c1 = ((ip[1] - ip[3])<<2);
- b1 = ((ip[0] - ip[2])<<2);
+ a1 = ((ip[0] + ip[2]) * 4);
+ d1 = ((ip[1] + ip[3]) * 4);
+ c1 = ((ip[1] - ip[3]) * 4);
+ b1 = ((ip[0] - ip[2]) * 4);
op[0] = a1 + d1 + (a1!=0);
op[1] = b1 + c1;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index ded0c43..968c7f3 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -711,8 +711,8 @@
neutral_count++;
}
- d->bmi.mv.as_mv.row <<= 3;
- d->bmi.mv.as_mv.col <<= 3;
+ d->bmi.mv.as_mv.row *= 8;
+ d->bmi.mv.as_mv.col *= 8;
this_error = motion_error;
vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
vp8_encode_inter16x16y(x);
@@ -909,13 +909,16 @@
static double bitcost( double prob )
{
- return -(log( prob ) / log( 2.0 ));
+ if (prob > 0.000122)
+ return -log(prob) / log(2.0);
+ else
+ return 13.0;
}
static int64_t estimate_modemvcost(VP8_COMP *cpi,
FIRSTPASS_STATS * fpstats)
{
int mv_cost;
- int mode_cost;
+ int64_t mode_cost;
double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
@@ -937,10 +940,9 @@
/* Crude estimate of overhead cost from modes
* << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
*/
- mode_cost =
- (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
- (av_pct_motion * motion_cost) +
- (av_intra * intra_cost) ) * cpi->common.MBs ) << 9;
+ mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
+ (av_pct_motion * motion_cost) +
+ (av_intra * intra_cost)) * cpi->common.MBs) * 512;
return mv_cost + mode_cost;
}
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 83c3989..0b11ea6 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -210,7 +210,7 @@
unsigned char *z = (*(b->base_src) + b->src);
int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
- int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
+ int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
int tr = br, tc = bc;
unsigned int besterr;
unsigned int left, right, up, down, diag;
@@ -220,10 +220,14 @@
unsigned int quarteriters = 4;
int thismse;
- int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
- int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
- int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
- int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+ int minc = MAX(x->mv_col_min * 4,
+ (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
+ int maxc = MIN(x->mv_col_max * 4,
+ (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
+ int minr = MAX(x->mv_row_min * 4,
+ (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
+ int maxr = MIN(x->mv_row_max * 4,
+ (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
int y_stride;
int offset;
@@ -254,8 +258,8 @@
offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
/* central mv */
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->as_mv.row *= 8;
+ bestmv->as_mv.col *= 8;
/* calculate central point error */
besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
@@ -337,8 +341,8 @@
tc = bc;
}
- bestmv->as_mv.row = br << 1;
- bestmv->as_mv.col = bc << 1;
+ bestmv->as_mv.row = br * 2;
+ bestmv->as_mv.col = bc * 2;
if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
(abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
@@ -699,8 +703,8 @@
#endif
/* central mv */
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->as_mv.row *= 8;
+ bestmv->as_mv.col *= 8;
startmv = *bestmv;
/* calculate central point error */
@@ -1315,8 +1319,8 @@
(*num00)++;
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1709,8 +1713,8 @@
}
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1905,8 +1909,8 @@
}
}
- this_mv.as_mv.row = ref_mv->as_mv.row << 3;
- this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+ this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+ this_mv.as_mv.col = ref_mv->as_mv.col * 8;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
+ mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 521e84f..5016cc4 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -935,7 +935,7 @@
assert(NEARESTMV <= m && m <= SPLITMV);
vp8_mv_ref_probs(p, near_mv_ref_ct);
return vp8_cost_token(vp8_mv_ref_tree, p,
- vp8_mv_ref_encoding_array - NEARESTMV + m);
+ vp8_mv_ref_encoding_array + (m - NEARESTMV));
}
void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c
index 3e3e400..fb7b5cd 100644
--- a/vp9/common/arm/neon/vp9_idct16x16_neon.c
+++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c
@@ -20,26 +20,28 @@
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
+extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
int16_t *output,
int output_stride);
-extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
-extern void save_neon_registers();
-extern void restore_neon_registers();
+/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
+extern void vp9_push_neon(int64_t *store);
+extern void vp9_pop_neon(int64_t *store);
void vp9_short_idct16x16_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
@@ -102,28 +104,29 @@
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
-void vp9_short_idct10_16x16_add_neon(int16_t *input,
+void vp9_short_idct16x16_10_add_neon(int16_t *input,
uint8_t *dest, int dest_stride) {
+ int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
int16_t row_idct_output[16*16] = {0};
// save d8-d15 register values.
- save_neon_registers();
+ vp9_push_neon(store_reg);
/* Parallel idct on the upper 8 rows */
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
// stage 6 result in pass1_output.
- vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
+ vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
// with result in pass1(pass1_output) to calculate final result in stage 7
// which will be saved into row_idct_output.
- vp9_short_idct10_16x16_add_neon_pass2(input+1,
+ vp9_short_idct16x16_10_add_neon_pass2(input+1,
row_idct_output,
pass1_output,
0,
@@ -163,7 +166,7 @@
dest_stride);
// restore d8-d15 register values.
- restore_neon_registers();
+ vp9_pop_neon(store_reg);
return;
}
diff --git a/vp9/common/arm/neon/vp9_idct32x32_neon.c b/vp9/common/arm/neon/vp9_idct32x32_neon.c
deleted file mode 100644
index ceecd6f..0000000
--- a/vp9/common/arm/neon/vp9_idct32x32_neon.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vp9/common/vp9_common.h"
-
-// defined in vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
-extern void idct32_transpose_and_transform(int16_t *transpose_buffer,
- int16_t *output, int16_t *input);
-extern void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
-
-
-// defined in vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
-extern void save_neon_registers();
-extern void restore_neon_registers();
-
-void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest,
- int dest_stride) {
- // TODO(cd): move the creation of these buffers within the ASM file
- // internal buffer used to transpose 8 lines into before transforming them
- int16_t transpose_buffer[32 * 8];
- // results of the first pass (transpose and transform rows)
- int16_t pass1[32 * 32];
- // results of the second pass (transpose and transform columns)
- int16_t pass2[32 * 32];
-
- // save register we need to preserve
- save_neon_registers();
- // process rows
- idct32_transpose_and_transform(transpose_buffer, pass1, input);
- // process columns
- // TODO(cd): do these two steps/passes within the ASM file
- idct32_transpose_and_transform(transpose_buffer, pass2, pass1);
- // combine and add to dest
- // TODO(cd): integrate this within the last storage step of the second pass
- idct32_combine_add(dest, pass2, dest_stride);
- // restore register we need to preserve
- restore_neon_registers();
-}
-
-// TODO(cd): Eliminate this file altogether when everything is in ASM file
diff --git a/vp9/common/arm/neon/vp9_save_reg_neon.asm b/vp9/common/arm/neon/vp9_save_reg_neon.asm
new file mode 100644
index 0000000..71c3e70
--- /dev/null
+++ b/vp9/common/arm/neon/vp9_save_reg_neon.asm
@@ -0,0 +1,36 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp9_push_neon|
+ EXPORT |vp9_pop_neon|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+|vp9_push_neon| PROC
+ vst1.i64 {d8, d9, d10, d11}, [r0]!
+ vst1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+|vp9_pop_neon| PROC
+ vld1.i64 {d8, d9, d10, d11}, [r0]!
+ vld1.i64 {d12, d13, d14, d15}, [r0]!
+ bx lr
+
+ ENDP
+
+ END
+
diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
index 7464e80..df2a052 100644
--- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm
@@ -10,10 +10,8 @@
EXPORT |vp9_short_idct16x16_add_neon_pass1|
EXPORT |vp9_short_idct16x16_add_neon_pass2|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass1|
- EXPORT |vp9_short_idct10_16x16_add_neon_pass2|
- EXPORT |save_neon_registers|
- EXPORT |restore_neon_registers|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
+ EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
ARM
REQUIRE8
PRESERVE8
@@ -788,7 +786,7 @@
bx lr
ENDP ; |vp9_short_idct16x16_add_neon_pass2|
-;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input,
+;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
; int16_t *output, int output_stride)
;
; r0 int16_t input
@@ -798,7 +796,7 @@
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass1| PROC
+|vp9_short_idct16x16_10_add_neon_pass1| PROC
; TODO(hkuang): Find a better way to load the elements.
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
@@ -907,9 +905,9 @@
vst1.64 {d31}, [r1], r2
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass1|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
-;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
+;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
; int16_t *output,
; int16_t *pass1Output,
; int16_t skip_adding,
@@ -926,7 +924,7 @@
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
; will be stored back into q8-q15 registers. This function will touch q0-q7
; registers and use them as buffer during calculation.
-|vp9_short_idct10_16x16_add_neon_pass2| PROC
+|vp9_short_idct16x16_10_add_neon_pass2| PROC
push {r3-r9}
; TODO(hkuang): Find a better way to load the elements.
@@ -1177,15 +1175,5 @@
end_idct10_16x16_pass2
pop {r3-r9}
bx lr
- ENDP ; |vp9_short_idct10_16x16_add_neon_pass2|
-;void |save_neon_registers|()
-|save_neon_registers| PROC
- vpush {d8-d15}
- bx lr
- ENDP ; |save_registers|
-;void |restore_neon_registers|()
-|restore_neon_registers| PROC
- vpop {d8-d15}
- bx lr
- ENDP ; |restore_registers|
+ ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
END
diff --git a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
index 5c097cc..b5a284b 100644
--- a/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct32x32_add_neon.asm
@@ -43,8 +43,7 @@
cospi_31_64 EQU 804
- EXPORT |idct32_transpose_and_transform|
- EXPORT |idct32_combine_add|
+ EXPORT |vp9_short_idct32x32_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -100,6 +99,142 @@
vst1.16 {$reg2}, [r1]
MEND
; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q6-q9 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_CENTER_RESULTS
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d8}, [r10], r2
+ vld1.s16 {d11}, [r9], r11
+ vld1.s16 {d9}, [r10]
+ vld1.s16 {d10}, [r9]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q8, q8, #6
+ vrshr.s16 q9, q9, #6
+ vrshr.s16 q6, q6, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q7, q7, d9
+ vaddw.u8 q8, q8, d10
+ vaddw.u8 q9, q9, d11
+ vaddw.u8 q6, q6, d8
+ ; clip pixel
+ vqmovun.s16 d9, q7
+ vqmovun.s16 d10, q8
+ vqmovun.s16 d11, q9
+ vqmovun.s16 d8, q6
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d9}, [r10], r11
+ vst1.16 {d10}, [r9], r2
+ vst1.16 {d8}, [r10]
+ vst1.16 {d11}, [r9]
+ ; update pointers (by dest_stride * 2)
+ sub r9, r9, r2, lsl #1
+ add r10, r10, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q6-q9 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_CENTER_RESULTS_LAST
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d8}, [r10], r2
+ vld1.s16 {d11}, [r9], r11
+ vld1.s16 {d9}, [r10]
+ vld1.s16 {d10}, [r9]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q8, q8, #6
+ vrshr.s16 q9, q9, #6
+ vrshr.s16 q6, q6, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q7, q7, d9
+ vaddw.u8 q8, q8, d10
+ vaddw.u8 q9, q9, d11
+ vaddw.u8 q6, q6, d8
+ ; clip pixel
+ vqmovun.s16 d9, q7
+ vqmovun.s16 d10, q8
+ vqmovun.s16 d11, q9
+ vqmovun.s16 d8, q6
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d9}, [r10], r11
+ vst1.16 {d10}, [r9], r2
+ vst1.16 {d8}, [r10]!
+ vst1.16 {d11}, [r9]!
+ ; update pointers (by dest_stride * 2)
+ sub r9, r9, r2, lsl #1
+ add r10, r10, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q4-q7 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_EXTREME_RESULTS
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d4}, [r7], r2
+ vld1.s16 {d7}, [r6], r11
+ vld1.s16 {d5}, [r7]
+ vld1.s16 {d6}, [r6]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q5, q5, #6
+ vrshr.s16 q6, q6, #6
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q4, q4, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q5, q5, d5
+ vaddw.u8 q6, q6, d6
+ vaddw.u8 q7, q7, d7
+ vaddw.u8 q4, q4, d4
+ ; clip pixel
+ vqmovun.s16 d5, q5
+ vqmovun.s16 d6, q6
+ vqmovun.s16 d7, q7
+ vqmovun.s16 d4, q4
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d5}, [r7], r11
+ vst1.16 {d6}, [r6], r2
+ vst1.16 {d7}, [r6]
+ vst1.16 {d4}, [r7]
+ ; update pointers (by dest_stride * 2)
+ sub r6, r6, r2, lsl #1
+ add r7, r7, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
+ ; Combine-add results with current destination content
+ ; q4-q7 contain the results (out[j * 32 + 0-31])
+ MACRO
+ STORE_COMBINE_EXTREME_RESULTS_LAST
+ ; load dest[j * dest_stride + 0-31]
+ vld1.s16 {d4}, [r7], r2
+ vld1.s16 {d7}, [r6], r11
+ vld1.s16 {d5}, [r7]
+ vld1.s16 {d6}, [r6]
+ ; ROUND_POWER_OF_TWO
+ vrshr.s16 q5, q5, #6
+ vrshr.s16 q6, q6, #6
+ vrshr.s16 q7, q7, #6
+ vrshr.s16 q4, q4, #6
+ ; add to dest[j * dest_stride + 0-31]
+ vaddw.u8 q5, q5, d5
+ vaddw.u8 q6, q6, d6
+ vaddw.u8 q7, q7, d7
+ vaddw.u8 q4, q4, d4
+ ; clip pixel
+ vqmovun.s16 d5, q5
+ vqmovun.s16 d6, q6
+ vqmovun.s16 d7, q7
+ vqmovun.s16 d4, q4
+ ; store back into dest[j * dest_stride + 0-31]
+ vst1.16 {d5}, [r7], r11
+ vst1.16 {d6}, [r6], r2
+ vst1.16 {d7}, [r6]!
+ vst1.16 {d4}, [r7]!
+ ; update pointers (by dest_stride * 2)
+ sub r6, r6, r2, lsl #1
+ add r7, r7, r2, lsl #1
+ MEND
+ ; --------------------------------------------------------------------------
; Touches q8-q12, q15 (q13-q14 are preserved)
; valid output registers are anything but q8-q11
MACRO
@@ -110,12 +245,12 @@
; additions/substractions before the multiplies.
; generate the constants
; generate scalar constants
- mov r3, #$first_constant & 0xFF00
- add r3, #$first_constant & 0x00FF
+ mov r8, #$first_constant & 0xFF00
mov r12, #$second_constant & 0xFF00
+ add r8, #$first_constant & 0x00FF
add r12, #$second_constant & 0x00FF
; generate vector constants
- vdup.16 d30, r3
+ vdup.16 d30, r8
vdup.16 d31, r12
; (used) two for inputs (regA-regD), one for constants (q15)
; do some multiplications (ordered for maximum latency hiding)
@@ -153,15 +288,22 @@
MEND
; --------------------------------------------------------------------------
-;void idct32_transpose_and_transform(int16_t *transpose_buffer, int16_t *output, int16_t *input);
+;void vp9_short_idct32x32_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
;
-; r0 int16_t *transpose_buffer
-; r1 int16_t *output
-; r2 int16_t *input)
-; TODO(cd): have more logical parameter ordering but this issue will disappear
-; when functions are combined.
+; r0 int16_t *input,
+; r1 uint8_t *dest,
+; r2 int dest_stride)
+; loop counters
+; r4 bands loop counter
+; r5 pass loop counter
+; r8 transpose loop counter
+; combine-add pointers
+; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)
+; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)
+; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)
+; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)
-|idct32_transpose_and_transform| PROC
+|vp9_short_idct32x32_add_neon| PROC
; This function does one pass of idct32x32 transform.
;
; This is done by transposing the input and then doing a 1d transform on
@@ -171,43 +313,73 @@
; The 1d transform is done by looping over bands of eight columns (the
; idct32_bands loop). For each band, the transform input transposition
; is done on demand, one band of four 8x8 matrices at a time. The four
- ; matrices are trsnposed by pairs (the idct32_transpose_pair loop).
- push {r4}
- mov r4, #0 ; initialize bands loop counter
+ ; matrices are transposed by pairs (the idct32_transpose_pair loop).
+ push {r4-r11}
+ vpush {d8-d15}
+ ; stack operation
+ ; internal buffer used to transpose 8 lines into before transforming them
+ ; int16_t transpose_buffer[32 * 8];
+ ; at sp + [4096, 4607]
+ ; results of the first pass (transpose and transform rows)
+ ; int16_t pass1[32 * 32];
+ ; at sp + [0, 2047]
+ ; results of the second pass (transpose and transform columns)
+ ; int16_t pass2[32 * 32];
+ ; at sp + [2048, 4095]
+ sub sp, sp, #512+2048+2048
+
+ ; r6 = dest + 31 * dest_stride
+ ; r7 = dest + 0 * dest_stride
+ ; r9 = dest + 15 * dest_stride
+ ; r10 = dest + 16 * dest_stride
+ rsb r6, r2, r2, lsl #5
+ rsb r9, r2, r2, lsl #4
+ add r10, r1, r2, lsl #4
+ mov r7, r1
+ add r6, r6, r1
+ add r9, r9, r1
+ ; r11 = -dest_stride
+ neg r11, r2
+ ; r3 = input
+ mov r3, r0
+ ; parameters for first pass
+ ; r0 = transpose_buffer[32 * 8]
+ add r0, sp, #4096
+ ; r1 = pass1[32 * 32]
+ mov r1, sp
+
+ mov r5, #0 ; initialize pass loop counter
+idct32_pass_loop
+ mov r4, #4 ; initialize bands loop counter
idct32_bands_loop
- ; TODO(cd) get rid of these push/pop by properly adjusting register
- ; content at end of loop
- push {r0}
- push {r1}
- push {r2}
- mov r3, #0 ; initialize transpose loop counter
+ mov r8, #2 ; initialize transpose loop counter
idct32_transpose_pair_loop
; Load two horizontally consecutive 8x8 16bit data matrices. The first one
; into q0-q7 and the second one into q8-q15. There is a stride of 64,
; adjusted to 32 because of the two post-increments.
- vld1.s16 {q8}, [r2]!
- vld1.s16 {q0}, [r2]!
- add r2, #32
- vld1.s16 {q9}, [r2]!
- vld1.s16 {q1}, [r2]!
- add r2, #32
- vld1.s16 {q10}, [r2]!
- vld1.s16 {q2}, [r2]!
- add r2, #32
- vld1.s16 {q11}, [r2]!
- vld1.s16 {q3}, [r2]!
- add r2, #32
- vld1.s16 {q12}, [r2]!
- vld1.s16 {q4}, [r2]!
- add r2, #32
- vld1.s16 {q13}, [r2]!
- vld1.s16 {q5}, [r2]!
- add r2, #32
- vld1.s16 {q14}, [r2]!
- vld1.s16 {q6}, [r2]!
- add r2, #32
- vld1.s16 {q15}, [r2]!
- vld1.s16 {q7}, [r2]!
+ vld1.s16 {q8}, [r3]!
+ vld1.s16 {q0}, [r3]!
+ add r3, #32
+ vld1.s16 {q9}, [r3]!
+ vld1.s16 {q1}, [r3]!
+ add r3, #32
+ vld1.s16 {q10}, [r3]!
+ vld1.s16 {q2}, [r3]!
+ add r3, #32
+ vld1.s16 {q11}, [r3]!
+ vld1.s16 {q3}, [r3]!
+ add r3, #32
+ vld1.s16 {q12}, [r3]!
+ vld1.s16 {q4}, [r3]!
+ add r3, #32
+ vld1.s16 {q13}, [r3]!
+ vld1.s16 {q5}, [r3]!
+ add r3, #32
+ vld1.s16 {q14}, [r3]!
+ vld1.s16 {q6}, [r3]!
+ add r3, #32
+ vld1.s16 {q15}, [r3]!
+ vld1.s16 {q7}, [r3]!
; Transpose the two 8x8 16bit data matrices.
vswp d17, d24
@@ -255,11 +427,13 @@
vst1.16 {q7}, [r0]!
; increment pointers by adjusted stride (not necessary for r0/out)
- sub r2, r2, #8*32*2-32-16*2
+ ; go back by 7*32 for the seven lines moved fully by read and add
+ ; go back by 32 for the eigth line only read
+ ; advance by 16*2 to go the next pair
+ sub r3, r3, #7*32*2 + 32 - 16*2
; transpose pair loop processing
- add r3, r3, #1
- cmp r3, #1
- BLE idct32_transpose_pair_loop
+ subs r8, r8, #1
+ bne idct32_transpose_pair_loop
; restore r0/input to its original value
sub r0, r0, #32*8*2
@@ -815,21 +989,26 @@
vadd.s16 q9, q5, q0
vsub.s16 q6, q5, q0
vsub.s16 q7, q4, q1
- STORE_IN_OUTPUT 17, 17, 16, q7, q6
- STORE_IN_OUTPUT 16, 15, 14, q9, q8
+
+ cmp r5, #0
+ bgt idct32_bands_end_2nd_pass
+
+idct32_bands_end_1st_pass
+ STORE_IN_OUTPUT 17, 16, 17, q6, q7
+ STORE_IN_OUTPUT 17, 14, 15, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
;output[30 * 32] = step1b[1][i] - step1b[30][i];
;output[31 * 32] = step1b[0][i] - step1b[31][i];
- LOAD_FROM_OUTPUT 14, 30, 31, q0, q1
+ LOAD_FROM_OUTPUT 15, 30, 31, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 31, 31, 30, q7, q6
- STORE_IN_OUTPUT 30, 0, 1, q4, q5
+ STORE_IN_OUTPUT 31, 30, 31, q6, q7
+ STORE_IN_OUTPUT 31, 0, 1, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[2] = step1b[2][i] + step1b[13][i];
@@ -848,25 +1027,25 @@
;output[18 * 32] = step1b[13][i] - step1b[18][i];
;output[19 * 32] = step1b[12][i] - step1b[19][i];
LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 19, 19, 18, q9, q8
- STORE_IN_OUTPUT 18, 13, 12, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 19, 18, 19, q6, q7
+ STORE_IN_OUTPUT 19, 12, 13, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
;output[28 * 32] = step1b[3][i] - step1b[28][i];
;output[29 * 32] = step1b[2][i] - step1b[29][i];
- LOAD_FROM_OUTPUT 12, 28, 29, q0, q1
+ LOAD_FROM_OUTPUT 13, 28, 29, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 29, 29, 28, q7, q6
- STORE_IN_OUTPUT 28, 2, 3, q4, q5
+ STORE_IN_OUTPUT 29, 28, 29, q6, q7
+ STORE_IN_OUTPUT 29, 2, 3, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[4] = step1b[4][i] + step1b[11][i];
@@ -885,25 +1064,25 @@
;output[20 * 32] = step1b[11][i] - step1b[20][i];
;output[21 * 32] = step1b[10][i] - step1b[21][i];
LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 21, 21, 20, q9, q8
- STORE_IN_OUTPUT 20, 11, 10, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 21, 20, 21, q6, q7
+ STORE_IN_OUTPUT 21, 10, 11, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
;output[26 * 32] = step1b[5][i] - step1b[26][i];
;output[27 * 32] = step1b[4][i] - step1b[27][i];
- LOAD_FROM_OUTPUT 10, 26, 27, q0, q1
+ LOAD_FROM_OUTPUT 11, 26, 27, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 27, 27, 26, q7, q6
- STORE_IN_OUTPUT 26, 4, 5, q4, q5
+ STORE_IN_OUTPUT 27, 26, 27, q6, q7
+ STORE_IN_OUTPUT 27, 4, 5, q4, q5
; --------------------------------------------------------------------------
; part of stage 7
;step1[6] = step1b[6][i] + step1b[9][i];
@@ -922,92 +1101,199 @@
;output[22 * 32] = step1b[9][i] - step1b[22][i];
;output[23 * 32] = step1b[8][i] - step1b[23][i];
LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
- vadd.s16 q6, q4, q1
- vadd.s16 q7, q5, q0
- vsub.s16 q8, q5, q0
- vsub.s16 q9, q4, q1
- STORE_IN_OUTPUT 23, 23, 22, q9, q8
- STORE_IN_OUTPUT 22, 9, 8, q7, q6
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_IN_OUTPUT 23, 22, 23, q6, q7
+ STORE_IN_OUTPUT 23, 8, 9, q8, q9
; --------------------------------------------------------------------------
; part of final stage
;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
;output[24 * 32] = step1b[7][i] - step1b[24][i];
;output[25 * 32] = step1b[6][i] - step1b[25][i];
- LOAD_FROM_OUTPUT 8, 24, 25, q0, q1
+ LOAD_FROM_OUTPUT 9, 24, 25, q0, q1
vadd.s16 q4, q2, q1
vadd.s16 q5, q3, q0
vsub.s16 q6, q3, q0
vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 25, 25, 24, q7, q6
- STORE_IN_OUTPUT 24, 6, 7, q4, q5
- ; --------------------------------------------------------------------------
+ STORE_IN_OUTPUT 25, 24, 25, q6, q7
+ STORE_IN_OUTPUT 25, 6, 7, q4, q5
- ; TODO(cd) get rid of these push/pop by properly adjusting register
- ; content at end of loop
- pop {r2}
- pop {r1}
- pop {r0}
- add r1, r1, #8*2
- add r2, r2, #8*32*2
+ ; restore r0 by removing the last offset from the last
+ ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
+ sub r0, r0, #24*8*2
+ ; restore r1 by removing the last offset from the last
+ ; operation (STORE_IN_OUTPUT 24, 6, 7) => 7*32*2
+ ; advance by 8 columns => 8*2
+ sub r1, r1, #7*32*2 - 8*2
+ ; advance by 8 lines (8*32*2)
+ ; go back by the two pairs from the loop (32*2)
+ add r3, r3, #8*32*2 - 32*2
; bands loop processing
- add r4, r4, #1
- cmp r4, #3
- BLE idct32_bands_loop
+ subs r4, r4, #1
+ bne idct32_bands_loop
- pop {r4}
+ ; parameters for second pass
+ ; the input of pass2 is the result of pass1. we have to remove the offset
+ ; of 32 columns induced by the above idct32_bands_loop
+ sub r3, r1, #32*2
+ ; r1 = pass2[32 * 32]
+ add r1, sp, #2048
+
+ ; pass loop processing
+ add r5, r5, #1
+ B idct32_pass_loop
+
+idct32_bands_end_2nd_pass
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
+ ;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
+ ;output[30 * 32] = step1b[1][i] - step1b[30][i];
+ ;output[31 * 32] = step1b[0][i] - step1b[31][i];
+ LOAD_FROM_OUTPUT 17, 30, 31, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[2] = step1b[2][i] + step1b[13][i];
+ ;step1[3] = step1b[3][i] + step1b[12][i];
+ ;step1[12] = step1b[3][i] - step1b[12][i];
+ ;step1[13] = step1b[2][i] - step1b[13][i];
+ LOAD_FROM_OUTPUT 31, 12, 13, q0, q1
+ vadd.s16 q2, q10, q1
+ vadd.s16 q3, q11, q0
+ vsub.s16 q4, q11, q0
+ vsub.s16 q5, q10, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[12 * 32] = step1b[12][i] + step1b[19][i];
+ ;output[13 * 32] = step1b[13][i] + step1b[18][i];
+ ;output[18 * 32] = step1b[13][i] - step1b[18][i];
+ ;output[19 * 32] = step1b[12][i] - step1b[19][i];
+ LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
+ ;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
+ ;output[28 * 32] = step1b[3][i] - step1b[28][i];
+ ;output[29 * 32] = step1b[2][i] - step1b[29][i];
+ LOAD_FROM_OUTPUT 19, 28, 29, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[4] = step1b[4][i] + step1b[11][i];
+ ;step1[5] = step1b[5][i] + step1b[10][i];
+ ;step1[10] = step1b[5][i] - step1b[10][i];
+ ;step1[11] = step1b[4][i] - step1b[11][i];
+ LOAD_FROM_OUTPUT 29, 10, 11, q0, q1
+ vadd.s16 q2, q12, q1
+ vadd.s16 q3, q13, q0
+ vsub.s16 q4, q13, q0
+ vsub.s16 q5, q12, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[10 * 32] = step1b[10][i] + step1b[21][i];
+ ;output[11 * 32] = step1b[11][i] + step1b[20][i];
+ ;output[20 * 32] = step1b[11][i] - step1b[20][i];
+ ;output[21 * 32] = step1b[10][i] - step1b[21][i];
+ LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
+ ;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
+ ;output[26 * 32] = step1b[5][i] - step1b[26][i];
+ ;output[27 * 32] = step1b[4][i] - step1b[27][i];
+ LOAD_FROM_OUTPUT 21, 26, 27, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS
+ ; --------------------------------------------------------------------------
+ ; part of stage 7
+ ;step1[6] = step1b[6][i] + step1b[9][i];
+ ;step1[7] = step1b[7][i] + step1b[8][i];
+ ;step1[8] = step1b[7][i] - step1b[8][i];
+ ;step1[9] = step1b[6][i] - step1b[9][i];
+ LOAD_FROM_OUTPUT 27, 8, 9, q0, q1
+ vadd.s16 q2, q14, q1
+ vadd.s16 q3, q15, q0
+ vsub.s16 q4, q15, q0
+ vsub.s16 q5, q14, q1
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 8 * 32] = step1b[8][i] + step1b[23][i];
+ ;output[ 9 * 32] = step1b[9][i] + step1b[22][i];
+ ;output[22 * 32] = step1b[9][i] - step1b[22][i];
+ ;output[23 * 32] = step1b[8][i] - step1b[23][i];
+ LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
+ vadd.s16 q8, q4, q1
+ vadd.s16 q9, q5, q0
+ vsub.s16 q6, q5, q0
+ vsub.s16 q7, q4, q1
+ STORE_COMBINE_CENTER_RESULTS_LAST
+ ; --------------------------------------------------------------------------
+ ; part of final stage
+ ;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
+ ;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
+ ;output[24 * 32] = step1b[7][i] - step1b[24][i];
+ ;output[25 * 32] = step1b[6][i] - step1b[25][i];
+ LOAD_FROM_OUTPUT 23, 24, 25, q0, q1
+ vadd.s16 q4, q2, q1
+ vadd.s16 q5, q3, q0
+ vsub.s16 q6, q3, q0
+ vsub.s16 q7, q2, q1
+ STORE_COMBINE_EXTREME_RESULTS_LAST
+ ; --------------------------------------------------------------------------
+ ; restore pointers to their initial indices for next band pass by
+ ; removing/adding dest_stride * 8. The actual increment by eight
+ ; is taken care of within the _LAST macros.
+ add r6, r6, r2, lsl #3
+ add r9, r9, r2, lsl #3
+ sub r7, r7, r2, lsl #3
+ sub r10, r10, r2, lsl #3
+
+ ; restore r0 by removing the last offset from the last
+ ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
+ sub r0, r0, #24*8*2
+ ; restore r1 by removing the last offset from the last
+ ; operation (LOAD_FROM_OUTPUT 23, 24, 25) => 25*32*2
+ ; advance by 8 columns => 8*2
+ sub r1, r1, #25*32*2 - 8*2
+ ; advance by 8 lines (8*32*2)
+ ; go back by the two pairs from the loop (32*2)
+ add r3, r3, #8*32*2 - 32*2
+
+ ; bands loop processing
+ subs r4, r4, #1
+ bne idct32_bands_loop
+
+ ; stack operation
+ add sp, sp, #512+2048+2048
+ vpop {d8-d15}
+ pop {r4-r11}
bx lr
- ENDP ; |idct32_transpose_and_transform|
-
-;void idct32_combine_add(uint8_t *dest, int16_t *out, int dest_stride);
-;
-; r0 uint8_t *dest
-; r1 int16_t *out
-; r2 int dest_stride)
-
-|idct32_combine_add| PROC
-
- mov r12, r0 ; dest pointer used for stores
- sub r2, r2, #32 ; adjust the stride (remove the post-increments)
- mov r3, #0 ; initialize loop counter
-
-idct32_combine_add_loop
- ; load out[j * 32 + 0-31]
- vld1.s16 {q12}, [r1]!
- vld1.s16 {q13}, [r1]!
- vld1.s16 {q14}, [r1]!
- vld1.s16 {q15}, [r1]!
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {q6}, [r0]!
- vld1.s16 {q7}, [r0]!
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q12, q12, #6
- vrshr.s16 q13, q13, #6
- vrshr.s16 q14, q14, #6
- vrshr.s16 q15, q15, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q12, q12, d12
- vaddw.u8 q13, q13, d13
- vaddw.u8 q14, q14, d14
- vaddw.u8 q15, q15, d15
- ; clip pixel
- vqmovun.s16 d12, q12
- vqmovun.s16 d13, q13
- vqmovun.s16 d14, q14
- vqmovun.s16 d15, q15
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {q6}, [r12]!
- vst1.16 {q7}, [r12]!
- ; increment pointers by adjusted stride (not necessary for r1/out)
- add r0, r0, r2
- add r12, r12, r2
- ; loop processing
- add r3, r3, #1
- cmp r3, #31
- BLE idct32_combine_add_loop
-
- bx lr
- ENDP ; |idct32_transpose|
-
+ ENDP ; |vp9_short_idct32x32_add_neon|
END
diff --git a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
index a744f59..c02251a 100644
--- a/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_short_idct8x8_add_neon.asm
@@ -9,7 +9,7 @@
;
EXPORT |vp9_short_idct8x8_add_neon|
- EXPORT |vp9_short_idct10_8x8_add_neon|
+ EXPORT |vp9_short_idct8x8_10_add_neon|
ARM
REQUIRE8
PRESERVE8
@@ -310,13 +310,13 @@
bx lr
ENDP ; |vp9_short_idct8x8_add_neon|
-;void vp9_short_idct10_8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct10_8x8_add_neon| PROC
+|vp9_short_idct8x8_10_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
@@ -514,6 +514,6 @@
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_short_idct10_8x8_add_neon|
+ ENDP ; |vp9_short_idct8x8_10_add_neon|
END
diff --git a/vp9/common/generic/vp9_systemdependent.c b/vp9/common/generic/vp9_systemdependent.c
index f144721..536febb 100644
--- a/vp9/common/generic/vp9_systemdependent.c
+++ b/vp9/common/generic/vp9_systemdependent.c
@@ -10,7 +10,7 @@
#include "./vpx_config.h"
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_onyxc_int.h"
void vp9_machine_specific_config(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 864e27e..5e526a8 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -170,13 +170,8 @@
void vp9_create_common(VP9_COMMON *cm) {
vp9_machine_specific_config(cm);
- vp9_init_mbmode_probs(cm);
-
cm->tx_mode = ONLY_4X4;
cm->comp_pred_mode = HYBRID_PREDICTION;
-
- // Initialize reference frame sign bias structure to defaults
- vpx_memset(cm->ref_frame_sign_bias, 0, sizeof(cm->ref_frame_sign_bias));
}
void vp9_remove_common(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index c8d677f..ccd220f 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -137,7 +137,7 @@
TX_SIZE tx_size;
int_mv mv[2]; // for each reference frame used
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
- int_mv best_mv, best_second_mv;
+ int_mv best_mv[2];
uint8_t mode_context[MAX_REF_FRAMES];
@@ -244,10 +244,9 @@
unsigned char ab_index; // index of 4x4 block inside the 8x8 block
int q_index;
-
} MACROBLOCKD;
-static INLINE unsigned char *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
+static INLINE uint8_t *get_sb_index(MACROBLOCKD *xd, BLOCK_SIZE subsize) {
switch (subsize) {
case BLOCK_64X64:
case BLOCK_64X32:
diff --git a/vp9/common/vp9_common_data.h b/vp9/common/vp9_common_data.h
index 3822bfc..2945cd2 100644
--- a/vp9/common/vp9_common_data.h
+++ b/vp9/common/vp9_common_data.h
@@ -29,4 +29,4 @@
extern const TX_SIZE max_uv_txsize_lookup[BLOCK_SIZES];
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
-#endif // VP9_COMMON_VP9_COMMON_DATA_H
+#endif // VP9_COMMON_VP9_COMMON_DATA_H
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index abedf6b..1705402 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -282,7 +282,7 @@
int r;
for (r = h; r > 0; --r) {
- memcpy(dst, src, w);
+ vpx_memcpy(dst, src, w);
src += src_stride;
dst += dst_stride;
}
diff --git a/vp9/common/vp9_convolve.h b/vp9/common/vp9_convolve.h
index 13220e9..3d4cf69 100644
--- a/vp9/common/vp9_convolve.h
+++ b/vp9/common/vp9_convolve.h
@@ -7,8 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_CONVOLVE_H_
-#define VP9_COMMON_CONVOLVE_H_
+#ifndef VP9_COMMON_VP9_CONVOLVE_H_
+#define VP9_COMMON_VP9_CONVOLVE_H_
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -26,4 +26,4 @@
const int16_t (*filter_y)[8];
};
-#endif // VP9_COMMON_CONVOLVE_H_
+#endif // VP9_COMMON_VP9_CONVOLVE_H_
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index 79f769e..355ac1a 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -63,9 +63,9 @@
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
- log_frame_info(cm, "Vectors ",mvs);
+ log_frame_info(cm, "Vectors ", mvs);
for (mi_row = 0; mi_row < rows; mi_row++) {
- fprintf(mvs,"V ");
+ fprintf(mvs, "V ");
for (mi_col = 0; mi_col < cols; mi_col++) {
fprintf(mvs, "%4d:%4d ", mi_8x8[mi_index]->mbmi.mv[0].as_mv.row,
mi_8x8[mi_index]->mbmi.mv[0].as_mv.col);
diff --git a/vp9/common/vp9_default_coef_probs.h b/vp9/common/vp9_default_coef_probs.h
index 185fced..3b512be 100644
--- a/vp9/common/vp9_default_coef_probs.h
+++ b/vp9/common/vp9_default_coef_probs.h
@@ -7,6 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#ifndef VP9_COMMON_DEFAULT_COEF_PROBS_H_
+#define VP9_COMMON_DEFAULT_COEF_PROBS_H_
/*Generated file, included by vp9_entropy.c*/
static const vp9_coeff_probs_model default_coef_probs_4x4[BLOCK_TYPES] = {
@@ -694,3 +696,4 @@
}
};
+#endif // VP9_COMMON_DEFAULT_COEF_PROBS_H_
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index 32d9e0c..72ea72e 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -107,101 +107,171 @@
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_16x16[256]) = {
- 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
- 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
- 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
- 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
- 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
- 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
- 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
- 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
- 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
- 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
- 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
- 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
- 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
- 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
- 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
- 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255,
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80,
+ 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52,
+ 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69,
+ 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146,
+ 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25,
+ 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119,
+ 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194,
+ 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59,
+ 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13,
+ 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169,
+ 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108,
+ 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140,
+ 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141,
+ 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142,
+ 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159,
+ 251,
+ 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
+ 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_col_scan_16x16[256]) = {
- 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
- 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
- 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
- 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
- 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
- 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
- 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
- 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
- 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
- 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
- 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
- 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
- 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
- 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
- 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
- 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255,
+ 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81,
+ 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4,
+ 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21,
+ 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85,
+ 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179,
+ 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24,
+ 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227,
+ 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167,
+ 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229,
+ 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59,
+ 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170,
+ 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202,
+ 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125,
+ 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79,
+ 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205,
+ 236,
+ 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
+ 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_row_scan_16x16[256]) = {
- 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
- 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
- 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
- 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
- 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
- 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
- 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
- 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
- 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
- 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
- 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
- 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
- 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
- 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158,
- 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
- 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255,
+ 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20,
+ 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52,
+ 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69,
+ 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100,
+ 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102,
+ 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160,
+ 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176,
+ 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136,
+ 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166,
+ 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108,
+ 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170,
+ 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186,
+ 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110,
+ 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111,
+ 158,
+ 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220,
+ 175,
+ 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
+ 255,
};
DECLARE_ALIGNED(16, const int16_t, vp9_default_scan_32x32[1024]) = {
- 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100,
- 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197,
- 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136,
- 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451,
- 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234,
- 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, 486, 77, 204, 362,
- 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, 238, 48, 143,
- 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, 393, 300, 269, 176, 145,
- 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, 426, 395,
- 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, 210, 179, 117, 86, 55, 738, 707,
- 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, 304,
- 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, 864, 833, 802, 771, 740, 709,
- 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
- 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, 743, 619, 495, 371, 247, 123,
- 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681,
- 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, 651, 620, 589, 558, 527,
- 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373,
- 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, 499, 375, 251, 127,
- 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, 685, 654, 592, 561,
- 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, 407, 376, 345,
- 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, 967, 874, 843, 750,
- 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, 440, 409,
- 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, 752, 721, 690, 659,
- 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, 971,
- 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, 537, 444, 413, 972,
- 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
- 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, 1007, 883, 759, 635, 511,
- 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791,
- 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, 1011, 887, 763, 639,
- 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982,
- 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, 1016, 985, 954, 923,
- 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160,
+ 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193,
+ 68, 131, 37, 100,
+ 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38,
+ 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321,
+ 102, 352, 8, 197,
+ 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
+ 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293,
+ 41, 417, 199, 136,
+ 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105,
+ 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169,
+ 295, 420, 106, 451,
+ 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421,
+ 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
+ 453, 139, 44, 234,
+ 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108,
+ 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577,
+ 486, 77, 204, 362,
+ 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173,
+ 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17,
+ 111, 238, 48, 143,
+ 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51,
+ 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424,
+ 393, 300, 269, 176, 145,
+ 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301,
+ 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581,
+ 550, 519, 488, 457, 426, 395,
+ 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737,
+ 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241,
+ 210, 179, 117, 86, 55, 738, 707,
+ 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491,
+ 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676,
+ 645, 552, 521, 428, 397, 304,
+ 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553,
+ 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26,
+ 864, 833, 802, 771, 740, 709,
+ 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306,
+ 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741,
+ 710, 679, 617, 586, 555, 493,
+ 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
+ 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
+ 743, 619, 495, 371, 247, 123,
+ 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680,
+ 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929,
+ 898, 836, 805, 774, 712, 681,
+ 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154,
+ 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
+ 651, 620, 589, 558, 527,
+ 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124,
+ 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590,
+ 559, 497, 466, 435, 373,
+ 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715,
+ 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
+ 499, 375, 251, 127,
+ 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560,
+ 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716,
+ 685, 654, 592, 561,
+ 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903,
+ 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469,
+ 438, 407, 376, 345,
+ 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718,
+ 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998,
+ 967, 874, 843, 750,
+ 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503,
+ 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657,
+ 564, 533, 440, 409,
+ 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534,
+ 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783,
+ 752, 721, 690, 659,
+ 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970,
+ 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381,
+ 350, 319, 1002, 971,
+ 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631,
+ 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568,
+ 537, 444, 413, 972,
+ 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414,
+ 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601,
+ 570, 539, 508, 477,
+ 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571,
+ 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479,
+ 1007, 883, 759, 635, 511,
+ 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945,
+ 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915,
+ 884, 853, 822, 791,
+ 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
+ 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607,
+ 1011, 887, 763, 639,
+ 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825,
+ 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733,
+ 702, 671, 1013, 982,
+ 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015,
+ 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
+ 1016, 985, 954, 923,
+ 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863,
+ 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021,
+ 990, 959, 1022, 991, 1023,
};
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
-const vp9_tree_index vp9_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
-{
+const vp9_tree_index vp9_coef_tree[ 22] = {
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
-ZERO_TOKEN, 4, /* 1 = ZERO */
-ONE_TOKEN, 6, /* 2 = ONE */
@@ -569,31 +639,6 @@
vp9_default_scan_32x32_neighbors);
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan) {
- if (scan == vp9_default_scan_4x4) {
- return vp9_default_scan_4x4_neighbors;
- } else if (scan == vp9_row_scan_4x4) {
- return vp9_row_scan_4x4_neighbors;
- } else if (scan == vp9_col_scan_4x4) {
- return vp9_col_scan_4x4_neighbors;
- } else if (scan == vp9_default_scan_8x8) {
- return vp9_default_scan_8x8_neighbors;
- } else if (scan == vp9_row_scan_8x8) {
- return vp9_row_scan_8x8_neighbors;
- } else if (scan == vp9_col_scan_8x8) {
- return vp9_col_scan_8x8_neighbors;
- } else if (scan == vp9_default_scan_16x16) {
- return vp9_default_scan_16x16_neighbors;
- } else if (scan == vp9_row_scan_16x16) {
- return vp9_row_scan_16x16_neighbors;
- } else if (scan == vp9_col_scan_16x16) {
- return vp9_col_scan_16x16_neighbors;
- } else {
- assert(scan == vp9_default_scan_32x32);
- return vp9_default_scan_32x32_neighbors;
- }
-}
-
void vp9_coef_tree_initialize() {
vp9_init_neighbors();
init_bit_trees();
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index f138c09..4ed9481 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -190,9 +190,6 @@
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
-const int16_t *vp9_get_coef_neighbors_handle(const int16_t *scan);
-
-
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
@@ -336,37 +333,26 @@
}
}
-static int get_entropy_context(const MACROBLOCKD *xd, TX_SIZE tx_size,
- PLANE_TYPE type, int block_idx,
- ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
- const int16_t **scan,
- const uint8_t **band_translate) {
+static int get_entropy_context(TX_SIZE tx_size,
+ ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
switch (tx_size) {
case TX_4X4:
- *scan = get_scan_4x4(get_tx_type_4x4(type, xd, block_idx));
- *band_translate = vp9_coefband_trans_4x4;
- above_ec = A[0] != 0;
- left_ec = L[0] != 0;
+ above_ec = a[0] != 0;
+ left_ec = l[0] != 0;
break;
case TX_8X8:
- *scan = get_scan_8x8(get_tx_type_8x8(type, xd));
- *band_translate = vp9_coefband_trans_8x8plus;
- above_ec = !!*(uint16_t *)A;
- left_ec = !!*(uint16_t *)L;
+ above_ec = !!*(uint16_t *)a;
+ left_ec = !!*(uint16_t *)l;
break;
case TX_16X16:
- *scan = get_scan_16x16(get_tx_type_16x16(type, xd));
- *band_translate = vp9_coefband_trans_8x8plus;
- above_ec = !!*(uint32_t *)A;
- left_ec = !!*(uint32_t *)L;
+ above_ec = !!*(uint32_t *)a;
+ left_ec = !!*(uint32_t *)l;
break;
case TX_32X32:
- *scan = vp9_default_scan_32x32;
- *band_translate = vp9_coefband_trans_8x8plus;
- above_ec = !!*(uint64_t *)A;
- left_ec = !!*(uint64_t *)L;
+ above_ec = !!*(uint64_t *)a;
+ left_ec = !!*(uint64_t *)l;
break;
default:
assert(!"Invalid transform size.");
@@ -375,6 +361,35 @@
return combine_entropy_contexts(above_ec, left_ec);
}
+static void get_scan_and_band(const MACROBLOCKD *xd, TX_SIZE tx_size,
+ PLANE_TYPE type, int block_idx,
+ const int16_t **scan,
+ const int16_t **scan_nb,
+ const uint8_t **band_translate) {
+ switch (tx_size) {
+ case TX_4X4:
+ get_scan_nb_4x4(get_tx_type_4x4(type, xd, block_idx), scan, scan_nb);
+ *band_translate = vp9_coefband_trans_4x4;
+ break;
+ case TX_8X8:
+ get_scan_nb_8x8(get_tx_type_8x8(type, xd), scan, scan_nb);
+ *band_translate = vp9_coefband_trans_8x8plus;
+ break;
+ case TX_16X16:
+ get_scan_nb_16x16(get_tx_type_16x16(type, xd), scan, scan_nb);
+ *band_translate = vp9_coefband_trans_8x8plus;
+ break;
+ case TX_32X32:
+ *scan = vp9_default_scan_32x32;
+ *scan_nb = vp9_default_scan_32x32_neighbors;
+ *band_translate = vp9_coefband_trans_8x8plus;
+ break;
+ default:
+ assert(!"Invalid transform size.");
+ }
+}
+
+
enum { VP9_COEF_UPDATE_PROB = 252 };
#endif // VP9_COMMON_VP9_ENTROPY_H_
diff --git a/vp9/common/vp9_entropymode.h b/vp9/common/vp9_entropymode.h
index 4cf4c03..31537c7 100644
--- a/vp9/common/vp9_entropymode.h
+++ b/vp9/common/vp9_entropymode.h
@@ -14,7 +14,6 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_treecoder.h"
-#define SUBMVREF_COUNT 5
#define TX_SIZE_CONTEXTS 2
#define MODE_UPDATE_PROB 252
#define SWITCHABLE_FILTERS 3 // number of switchable filters
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 2e973e5..a9e25b7 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -39,12 +39,12 @@
};
struct vp9_token vp9_mv_class_encodings[MV_CLASSES];
-const vp9_tree_index vp9_mv_class0_tree [2 * CLASS0_SIZE - 2] = {
+const vp9_tree_index vp9_mv_class0_tree[2 * CLASS0_SIZE - 2] = {
-0, -1,
};
struct vp9_token vp9_mv_class0_encodings[CLASS0_SIZE];
-const vp9_tree_index vp9_mv_fp_tree [2 * 4 - 2] = {
+const vp9_tree_index vp9_mv_fp_tree[2 * 4 - 2] = {
-0, 2,
-1, 4,
-2, -3
@@ -53,8 +53,8 @@
static const nmv_context default_nmv_context = {
{32, 64, 96},
- {
- { /* vert component */
+ { // NOLINT
+ { /* vert component */ // NOLINT
128, /* sign */
{224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */
{216}, /* class0 */
@@ -64,7 +64,7 @@
160, /* class0_hp bit */
128, /* hp */
},
- { /* hor component */
+ { /* hor component */ // NOLINT
128, /* sign */
{216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */
{208}, /* class0 */
@@ -149,7 +149,7 @@
static void inc_mv_component(int v, nmv_component_counts *comp_counts,
int incr, int usehp) {
int s, z, c, o, d, e, f;
- assert (v != 0); /* should not be zero */
+ assert(v != 0); /* should not be zero */
s = v < 0;
comp_counts->sign[s] += incr;
z = (s ? -v : v) - 1; /* magnitude - 1 */
@@ -198,8 +198,6 @@
vp9_prob this_probs[],
const vp9_prob last_probs[],
const unsigned int num_events[]) {
-
-
const unsigned int left = tree[i] <= 0
? num_events[-tree[i]]
: adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
diff --git a/vp9/common/vp9_entropymv.h b/vp9/common/vp9_entropymv.h
index a10c933..5018c20 100644
--- a/vp9/common/vp9_entropymv.h
+++ b/vp9/common/vp9_entropymv.h
@@ -13,7 +13,7 @@
#define VP9_COMMON_VP9_ENTROPYMV_H_
#include "vp9/common/vp9_treecoder.h"
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vp9/common/vp9_blockd.h"
struct VP9Common;
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 7b1ffae..58260ce 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -11,7 +11,7 @@
#ifndef VP9_COMMON_VP9_FILTER_H_
#define VP9_COMMON_VP9_FILTER_H_
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#define SUBPEL_BITS 4
diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c
index 49a731f..73f6b4c 100644
--- a/vp9/common/vp9_findnearmv.c
+++ b/vp9/common/vp9_findnearmv.c
@@ -54,7 +54,7 @@
dst_list[1].as_int = 0;
if (block_idx == 0) {
- memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
+ vpx_memcpy(dst_list, mv_list, MAX_MV_REF_CANDIDATES * sizeof(int_mv));
} else if (block_idx == 1 || block_idx == 2) {
int dst = 0, n;
union b_mode_info *bmi = mi->bmi;
diff --git a/vp9/common/vp9_findnearmv.h b/vp9/common/vp9_findnearmv.h
index ad0d882..50dfdc7 100644
--- a/vp9/common/vp9_findnearmv.h
+++ b/vp9/common/vp9_findnearmv.h
@@ -55,13 +55,11 @@
if (!mi)
return DC_PRED;
- if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&mi->mbmi))
return DC_PRED;
- } else if (mi->mbmi.sb_type < BLOCK_8X8) {
- return ((mi->bmi + 1 + b)->as_mode);
- } else {
- return mi->mbmi.mode;
- }
+ else
+ return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 1 + b)->as_mode
+ : mi->mbmi.mode;
}
assert(b == 1 || b == 3);
return (mi->bmi + b - 1)->as_mode;
@@ -77,13 +75,11 @@
if (!mi)
return DC_PRED;
- if (mi->mbmi.ref_frame[0] != INTRA_FRAME) {
+ if (is_inter_block(&mi->mbmi))
return DC_PRED;
- } else if (mi->mbmi.sb_type < BLOCK_8X8) {
- return ((mi->bmi + 2 + b)->as_mode);
- } else {
- return mi->mbmi.mode;
- }
+ else
+ return mi->mbmi.sb_type < BLOCK_8X8 ? (mi->bmi + 2 + b)->as_mode
+ : mi->mbmi.mode;
}
return (mi->bmi + b - 2)->as_mode;
diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c
index a224525..10b83f5 100644
--- a/vp9/common/vp9_idct.c
+++ b/vp9/common/vp9_idct.c
@@ -28,10 +28,10 @@
int16_t *op = output;
for (i = 0; i < 4; i++) {
- a1 = ip[0] >> WHT_UPSCALE_FACTOR;
- c1 = ip[1] >> WHT_UPSCALE_FACTOR;
- d1 = ip[2] >> WHT_UPSCALE_FACTOR;
- b1 = ip[3] >> WHT_UPSCALE_FACTOR;
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ c1 = ip[1] >> UNIT_QUANT_SHIFT;
+ d1 = ip[2] >> UNIT_QUANT_SHIFT;
+ b1 = ip[3] >> UNIT_QUANT_SHIFT;
a1 += c1;
d1 -= b1;
e1 = (a1 - d1) >> 1;
@@ -77,7 +77,7 @@
int16_t *ip = in;
int16_t *op = tmp;
- a1 = ip[0] >> WHT_UPSCALE_FACTOR;
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
e1 = a1 >> 1;
a1 -= e1;
op[0] = a1;
@@ -420,7 +420,7 @@
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
+void vp9_short_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
@@ -838,7 +838,7 @@
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[16 * 16] = { 0 };
int16_t *outptr = out;
@@ -1269,8 +1269,18 @@
}
}
-void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
+void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
+ int dest_stride) {
+ int i, j;
+ int a1;
+
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
- output[0] = ROUND_POWER_OF_TWO(out, 6);
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+
+ for (j = 0; j < 32; ++j) {
+ for (i = 0; i < 32; ++i)
+ dest[i] = clip_pixel(dest[i] + a1);
+ dest += dest_stride;
+ }
}
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h
index 0c47da6..59892cd 100644
--- a/vp9/common/vp9_idct.h
+++ b/vp9/common/vp9_idct.h
@@ -22,10 +22,11 @@
#define DCT_CONST_BITS 14
#define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1))
-#define WHT_UPSCALE_FACTOR 2
+#define UNIT_QUANT_SHIFT 2
+#define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT)
#define pair_set_epi16(a, b) \
- _mm_set1_epi32(((uint16_t)(a)) + (((uint16_t)(b)) << 16))
+ _mm_set_epi16(b, a, b, a, b, a, b, a)
#define pair_set_epi32(a, b) \
_mm_set_epi32(b, a, b, a)
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index cfb5cd4..85ac6d2 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
@@ -316,13 +316,13 @@
continue;
}
- intra_lvl = lvl_seg + (lf->ref_deltas[INTRA_FRAME] << n_shift);
+ intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * (1 << n_shift);
lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref)
for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
- const int inter_lvl = lvl_seg + (lf->ref_deltas[ref] << n_shift)
- + (lf->mode_deltas[mode] << n_shift);
+ const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * (1 << n_shift)
+ + lf->mode_deltas[mode] * (1 << n_shift);
lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
}
}
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 91d40ac..c698090 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -12,7 +12,7 @@
#define VP9_COMMON_VP9_LOOPFILTER_H_
#include "vpx_ports/mem.h"
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_seg_common.h"
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index 88130d8..2c4bf6c 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index a444b85..6590796 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -108,7 +108,7 @@
};
// clamp_mv_ref
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index f424e6a..acb4724 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -13,7 +13,7 @@
#ifdef __cplusplus
extern "C"
-{
+{ // NOLINT
#endif
#include "./vpx_config.h"
@@ -33,7 +33,6 @@
FOURFIVE = 1,
THREEFIVE = 2,
ONETWO = 3
-
} VPX_SCALING;
typedef enum {
@@ -71,42 +70,48 @@
// 3 - lowest quality/fastest decode
int width; // width of data passed to the compressor
int height; // height of data passed to the compressor
- double framerate; // set to passed in framerate
- int64_t target_bandwidth; // bandwidth to be used in kilobits per second
+ double framerate; // set to passed in framerate
+ int64_t target_bandwidth; // bandwidth to be used in kilobits per second
- int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
- int Sharpness; // parameter used for sharpening output: recommendation 0:
+ int noise_sensitivity; // pre processing blur: recommendation 0
+ int Sharpness; // sharpening output: recommendation 0:
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
// mode ->
- // (0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
- // a television signal or feed from a live camera). ( speed setting controls how fast )
- // (1)=Good Quality Fast Encoding. The encoder balances quality with the amount of time it takes to
- // encode the output. ( speed setting controls how fast )
- // (2)=One Pass - Best Quality. The encoder places priority on the quality of the output over encoding
- // speed. The output is compressed at the highest possible quality. This option takes the longest
- // amount of time to encode. ( speed setting ignored )
- // (3)=Two Pass - First Pass. The encoder generates a file of statistics for use in the second encoding
- // pass. ( speed setting controls how fast )
- // (4)=Two Pass - Second Pass. The encoder uses the statistics that were generated in the first encoding
- // pass to create the compressed output. ( speed setting controls how fast )
- // (5)=Two Pass - Second Pass Best. The encoder uses the statistics that were generated in the first
- // encoding pass to create the compressed output using the highest possible quality, and taking a
+ // (0)=Realtime/Live Encoding. This mode is optimized for realtime
+ // encoding (for example, capturing a television signal or feed from
+ // a live camera). ( speed setting controls how fast )
+ // (1)=Good Quality Fast Encoding. The encoder balances quality with the
+ // amount of time it takes to encode the output. ( speed setting
+ // controls how fast )
+ // (2)=One Pass - Best Quality. The encoder places priority on the
+ // quality of the output over encoding speed. The output is compressed
+ // at the highest possible quality. This option takes the longest
+ // amount of time to encode. ( speed setting ignored )
+ // (3)=Two Pass - First Pass. The encoder generates a file of statistics
+ // for use in the second encoding pass. ( speed setting controls how
+ // fast )
+ // (4)=Two Pass - Second Pass. The encoder uses the statistics that were
+ // generated in the first encoding pass to create the compressed
+ // output. ( speed setting controls how fast )
+ // (5)=Two Pass - Second Pass Best. The encoder uses the statistics that
+ // were generated in the first encoding pass to create the compressed
+ // output using the highest possible quality, and taking a
// longer amount of time to encode.. ( speed setting ignored )
- int Mode; //
+ int Mode;
// Key Framing Operations
- int auto_key; // automatically detect cut scenes and set the keyframes
- int key_freq; // maximum distance to key frame.
+ int auto_key; // autodetect cut scenes and set the keyframes
+ int key_freq; // maximum distance to key frame.
- int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
- int lag_in_frames; // how many frames lag before we start encoding
+ int allow_lag; // allow lagged compression (if 0 lagin frames is ignored)
+ int lag_in_frames; // how many frames lag before we start encoding
// ----------------------------------------------------------------
// DATARATE CONTROL OPTIONS
- int end_usage; // vbr or cbr
+ int end_usage; // vbr or cbr
// buffer targeting aggressiveness
int under_shoot_pct;
@@ -138,7 +143,7 @@
int play_alternate;
int alt_freq;
- int encode_breakout; // early breakout encode threshold : for video conf recommend 800
+ int encode_breakout; // early breakout : for video conf recommend 800
/* Bitfield defining the error resiliency features to enable.
* Can provide decodable frames after losses in previous
@@ -173,8 +178,8 @@
void vp9_change_config(VP9_PTR onyx, VP9_CONFIG *oxcf);
-// receive a frames worth of data caller can assume that a copy of this frame is made
-// and not just a copy of the pointer..
+ // receive a frames worth of data. caller can assume that a copy of this
+ // frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_PTR comp, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time_stamp);
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 0431e14..f7d6391 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -11,9 +11,9 @@
#ifndef VP9_COMMON_VP9_ONYXC_INT_H_
#define VP9_COMMON_VP9_ONYXC_INT_H_
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
@@ -120,7 +120,7 @@
YV12_BUFFER_CONFIG post_proc_buffer;
- FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
+ FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
FRAME_TYPE frame_type;
int show_frame;
@@ -280,10 +280,10 @@
static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int bh,
int mi_col, int bw) {
- xd->mb_to_top_edge = -((mi_row * MI_SIZE) << 3);
- xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) << 3;
- xd->mb_to_left_edge = -((mi_col * MI_SIZE) << 3);
- xd->mb_to_right_edge = ((cm->mi_cols - bw - mi_col) * MI_SIZE) << 3;
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+ xd->mb_to_bottom_edge = ((cm->mi_rows - bh - mi_row) * MI_SIZE) * 8;
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+ xd->mb_to_right_edge = ((cm->mi_cols - bw - mi_col) * MI_SIZE) * 8;
// Are edges available for intra prediction?
xd->up_available = (mi_row != 0);
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index 955e676..212a28a 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -8,6 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
#include "./vpx_config.h"
#include "vpx_scale/yv12config.h"
@@ -18,11 +21,6 @@
#include "./vp9_rtcd.h"
#include "./vpx_scale_rtcd.h"
-
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-
#define RGB_TO_YUV(t) \
( (0.257*(float)(t >> 16)) + (0.504*(float)(t >> 8 & 0xff)) + \
(0.098*(float)(t & 0xff)) + 16), \
@@ -155,7 +153,6 @@
p_dst = dst_ptr;
for (col = 0; col < cols; col++) {
-
int kernel = 4;
int v = p_src[col];
@@ -257,7 +254,7 @@
void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch,
int rows, int cols, int flimit) {
int r, c, i;
- const short *rv3 = &vp9_rv[63 & rand()];
+ const short *rv3 = &vp9_rv[63 & rand()]; // NOLINT
for (c = 0; c < cols; c++) {
uint8_t *s = &dst[c];
@@ -408,7 +405,6 @@
next = next + j;
}
-
}
for (; next < 256; next++)
@@ -416,7 +412,7 @@
}
for (i = 0; i < 3072; i++) {
- state->noise[i] = char_dist[rand() & 0xff];
+ state->noise[i] = char_dist[rand() & 0xff]; // NOLINT
}
for (i = 0; i < 16; i++) {
@@ -680,13 +676,14 @@
#if 0 && CONFIG_POSTPROC_VISUALIZER
if (flags & VP9D_DEBUG_TXT_FRAME_INFO) {
char message[512];
- sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
- (cm->frame_type == KEY_FRAME),
- cm->refresh_golden_frame,
- cm->base_qindex,
- cm->filter_level,
- flags,
- cm->mb_cols, cm->mb_rows);
+ snprintf(message, sizeof(message) -1,
+ "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
+ (cm->frame_type == KEY_FRAME),
+ cm->refresh_golden_frame,
+ cm->base_qindex,
+ cm->filter_level,
+ flags,
+ cm->mb_cols, cm->mb_rows);
vp9_blit_text(message, cm->post_proc_buffer.y_buffer,
cm->post_proc_buffer.y_stride);
}
@@ -707,7 +704,7 @@
for (j = 0; j < mb_cols; j++) {
char zz[4];
- sprintf(zz, "%c", mi[mb_index].mbmi.mode + 'a');
+ snprintf(zz, sizeof(zz) - 1, "%c", mi[mb_index].mbmi.mode + 'a');
vp9_blit_text(zz, y_ptr, post->y_stride);
mb_index++;
@@ -716,7 +713,6 @@
mb_index++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
-
}
}
@@ -740,9 +736,9 @@
mi[mb_index].mbmi.skip_coeff);
if (cm->frame_type == KEY_FRAME)
- sprintf(zz, "a");
+ snprintf(zz, sizeof(zz) - 1, "a");
else
- sprintf(zz, "%c", dc_diff + '0');
+ snprintf(zz, sizeof(zz) - 1, "%c", dc_diff + '0');
vp9_blit_text(zz, y_ptr, post->y_stride);
mb_index++;
@@ -751,7 +747,6 @@
mb_index++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
-
}
}
@@ -894,8 +889,9 @@
constrain_line(lx0, &x1, ly0 + 1, &y1, width, height);
vp9_blit_line(lx0, x1, ly0 + 1, y1, y_buffer, y_stride);
- } else
+ } else {
vp9_blit_line(lx0, x1, ly0, y1, y_buffer, y_stride);
+ }
}
mi++;
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 47ca8ab..3ec9f34 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -69,8 +69,9 @@
const MACROBLOCKD *xd);
-static INLINE vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
- const MACROBLOCKD *xd) {
+static INLINE
+vp9_prob vp9_get_pred_prob_comp_inter_inter(const VP9_COMMON *cm,
+ const MACROBLOCKD *xd) {
const int pred_context = vp9_get_pred_context_comp_inter_inter(cm, xd);
return cm->fc.comp_inter_prob[pred_context];
}
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index bc40854..6dbdb42 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -14,69 +14,69 @@
#if 1
static const int16_t dc_qlookup[QINDEX_RANGE] = {
- 4, 8, 8, 9, 10, 11, 12, 12,
- 13, 14, 15, 16, 17, 18, 19, 19,
- 20, 21, 22, 23, 24, 25, 26, 26,
- 27, 28, 29, 30, 31, 32, 32, 33,
- 34, 35, 36, 37, 38, 38, 39, 40,
- 41, 42, 43, 43, 44, 45, 46, 47,
- 48, 48, 49, 50, 51, 52, 53, 53,
- 54, 55, 56, 57, 57, 58, 59, 60,
- 61, 62, 62, 63, 64, 65, 66, 66,
- 67, 68, 69, 70, 70, 71, 72, 73,
- 74, 74, 75, 76, 77, 78, 78, 79,
- 80, 81, 81, 82, 83, 84, 85, 85,
- 87, 88, 90, 92, 93, 95, 96, 98,
- 99, 101, 102, 104, 105, 107, 108, 110,
- 111, 113, 114, 116, 117, 118, 120, 121,
- 123, 125, 127, 129, 131, 134, 136, 138,
- 140, 142, 144, 146, 148, 150, 152, 154,
- 156, 158, 161, 164, 166, 169, 172, 174,
- 177, 180, 182, 185, 187, 190, 192, 195,
- 199, 202, 205, 208, 211, 214, 217, 220,
- 223, 226, 230, 233, 237, 240, 243, 247,
- 250, 253, 257, 261, 265, 269, 272, 276,
- 280, 284, 288, 292, 296, 300, 304, 309,
- 313, 317, 322, 326, 330, 335, 340, 344,
- 349, 354, 359, 364, 369, 374, 379, 384,
- 389, 395, 400, 406, 411, 417, 423, 429,
- 435, 441, 447, 454, 461, 467, 475, 482,
- 489, 497, 505, 513, 522, 530, 539, 549,
- 559, 569, 579, 590, 602, 614, 626, 640,
- 654, 668, 684, 700, 717, 736, 755, 775,
- 796, 819, 843, 869, 896, 925, 955, 988,
+ 4, 8, 8, 9, 10, 11, 12, 12,
+ 13, 14, 15, 16, 17, 18, 19, 19,
+ 20, 21, 22, 23, 24, 25, 26, 26,
+ 27, 28, 29, 30, 31, 32, 32, 33,
+ 34, 35, 36, 37, 38, 38, 39, 40,
+ 41, 42, 43, 43, 44, 45, 46, 47,
+ 48, 48, 49, 50, 51, 52, 53, 53,
+ 54, 55, 56, 57, 57, 58, 59, 60,
+ 61, 62, 62, 63, 64, 65, 66, 66,
+ 67, 68, 69, 70, 70, 71, 72, 73,
+ 74, 74, 75, 76, 77, 78, 78, 79,
+ 80, 81, 81, 82, 83, 84, 85, 85,
+ 87, 88, 90, 92, 93, 95, 96, 98,
+ 99, 101, 102, 104, 105, 107, 108, 110,
+ 111, 113, 114, 116, 117, 118, 120, 121,
+ 123, 125, 127, 129, 131, 134, 136, 138,
+ 140, 142, 144, 146, 148, 150, 152, 154,
+ 156, 158, 161, 164, 166, 169, 172, 174,
+ 177, 180, 182, 185, 187, 190, 192, 195,
+ 199, 202, 205, 208, 211, 214, 217, 220,
+ 223, 226, 230, 233, 237, 240, 243, 247,
+ 250, 253, 257, 261, 265, 269, 272, 276,
+ 280, 284, 288, 292, 296, 300, 304, 309,
+ 313, 317, 322, 326, 330, 335, 340, 344,
+ 349, 354, 359, 364, 369, 374, 379, 384,
+ 389, 395, 400, 406, 411, 417, 423, 429,
+ 435, 441, 447, 454, 461, 467, 475, 482,
+ 489, 497, 505, 513, 522, 530, 539, 549,
+ 559, 569, 579, 590, 602, 614, 626, 640,
+ 654, 668, 684, 700, 717, 736, 755, 775,
+ 796, 819, 843, 869, 896, 925, 955, 988,
1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
};
static const int16_t ac_qlookup[QINDEX_RANGE] = {
- 4, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22,
- 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32, 33, 34, 35, 36, 37, 38,
- 39, 40, 41, 42, 43, 44, 45, 46,
- 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62,
- 63, 64, 65, 66, 67, 68, 69, 70,
- 71, 72, 73, 74, 75, 76, 77, 78,
- 79, 80, 81, 82, 83, 84, 85, 86,
- 87, 88, 89, 90, 91, 92, 93, 94,
- 95, 96, 97, 98, 99, 100, 101, 102,
- 104, 106, 108, 110, 112, 114, 116, 118,
- 120, 122, 124, 126, 128, 130, 132, 134,
- 136, 138, 140, 142, 144, 146, 148, 150,
- 152, 155, 158, 161, 164, 167, 170, 173,
- 176, 179, 182, 185, 188, 191, 194, 197,
- 200, 203, 207, 211, 215, 219, 223, 227,
- 231, 235, 239, 243, 247, 251, 255, 260,
- 265, 270, 275, 280, 285, 290, 295, 300,
- 305, 311, 317, 323, 329, 335, 341, 347,
- 353, 359, 366, 373, 380, 387, 394, 401,
- 408, 416, 424, 432, 440, 448, 456, 465,
- 474, 483, 492, 501, 510, 520, 530, 540,
- 550, 560, 571, 582, 593, 604, 615, 627,
- 639, 651, 663, 676, 689, 702, 715, 729,
- 743, 757, 771, 786, 801, 816, 832, 848,
- 864, 881, 898, 915, 933, 951, 969, 988,
+ 4, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70,
+ 71, 72, 73, 74, 75, 76, 77, 78,
+ 79, 80, 81, 82, 83, 84, 85, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94,
+ 95, 96, 97, 98, 99, 100, 101, 102,
+ 104, 106, 108, 110, 112, 114, 116, 118,
+ 120, 122, 124, 126, 128, 130, 132, 134,
+ 136, 138, 140, 142, 144, 146, 148, 150,
+ 152, 155, 158, 161, 164, 167, 170, 173,
+ 176, 179, 182, 185, 188, 191, 194, 197,
+ 200, 203, 207, 211, 215, 219, 223, 227,
+ 231, 235, 239, 243, 247, 251, 255, 260,
+ 265, 270, 275, 280, 285, 290, 295, 300,
+ 305, 311, 317, 323, 329, 335, 341, 347,
+ 353, 359, 366, 373, 380, 387, 394, 401,
+ 408, 416, 424, 432, 440, 448, 456, 465,
+ 474, 483, 492, 501, 510, 520, 530, 540,
+ 550, 560, 571, 582, 593, 604, 615, 627,
+ 639, 651, 663, 676, 689, 702, 715, 729,
+ 743, 757, 771, 786, 801, 816, 832, 848,
+ 864, 881, 898, 915, 933, 951, 969, 988,
1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index dc1d46c..18407dd 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -59,8 +59,8 @@
const struct subpix_fn_table *subpix,
enum mv_precision precision) {
const int is_q4 = precision == MV_PRECISION_Q4;
- const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row << 1,
- is_q4 ? src_mv->col : src_mv->col << 1 };
+ const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+ is_q4 ? src_mv->col : src_mv->col * 2 };
const MV32 mv = scale->scale_mv(&mv_q4, scale);
const int subpel_x = mv.col & SUBPEL_MASK;
const int subpel_y = mv.row & SUBPEL_MASK;
@@ -100,16 +100,17 @@
const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS;
const int spel_bottom = spel_top - SUBPEL_SHIFTS;
MV clamped_mv = {
- src_mv->row << (1 - ss_y),
- src_mv->col << (1 - ss_x)
+ src_mv->row * (1 << (1 - ss_y)),
+ src_mv->col * (1 << (1 - ss_x))
};
assert(ss_x <= 1);
assert(ss_y <= 1);
- clamp_mv(&clamped_mv, (xd->mb_to_left_edge << (1 - ss_x)) - spel_left,
- (xd->mb_to_right_edge << (1 - ss_x)) + spel_right,
- (xd->mb_to_top_edge << (1 - ss_y)) - spel_top,
- (xd->mb_to_bottom_edge << (1 - ss_y)) + spel_bottom);
+ clamp_mv(&clamped_mv,
+ xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+ xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+ xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+ xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
return clamped_mv;
}
@@ -131,7 +132,7 @@
const int x = 4 * (block & ((1 << bwl) - 1));
const int y = 4 * (block >> bwl);
const MODE_INFO *mi = xd->this_mi;
- const int use_second_ref = mi->mbmi.ref_frame[1] > 0;
+ const int is_compound = has_second_ref(&mi->mbmi);
int ref;
assert(x < bw);
@@ -139,7 +140,7 @@
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_w == bw);
assert(mi->mbmi.sb_type < BLOCK_8X8 || 4 << pred_h == bh);
- for (ref = 0; ref < 1 + use_second_ref; ++ref) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
struct scale_factors *const scale = &xd->scale_factor[ref];
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 4a451b9..bd609dc 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -13,7 +13,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/vpx_once.h"
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_onyxc_int.h"
diff --git a/vp9/common/vp9_rtcd.c b/vp9/common/vp9_rtcd.c
index 72613ae..dc15a84 100644
--- a/vp9/common/vp9_rtcd.c
+++ b/vp9/common/vp9_rtcd.c
@@ -7,9 +7,9 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_config.h"
+#include "./vpx_config.h"
#define RTCD_C
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vpx_ports/vpx_once.h"
void vpx_scale_rtcd(void);
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 042afbb..8dacdd0 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -53,7 +53,7 @@
specialize vp9_d45_predictor_4x4 $ssse3_x86inc
prototype void vp9_d63_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_4x4
+specialize vp9_d63_predictor_4x4 $ssse3_x86inc
prototype void vp9_h_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_4x4 $ssse3_x86inc
@@ -92,7 +92,7 @@
specialize vp9_d45_predictor_8x8 $ssse3_x86inc
prototype void vp9_d63_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_8x8
+specialize vp9_d63_predictor_8x8 $ssse3_x86inc
prototype void vp9_h_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_8x8 $ssse3_x86inc
@@ -131,7 +131,7 @@
specialize vp9_d45_predictor_16x16 $ssse3_x86inc
prototype void vp9_d63_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_16x16
+specialize vp9_d63_predictor_16x16 $ssse3_x86inc
prototype void vp9_h_predictor_16x16 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_16x16 $ssse3_x86inc
@@ -170,7 +170,7 @@
specialize vp9_d45_predictor_32x32 $ssse3_x86inc
prototype void vp9_d63_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
-specialize vp9_d63_predictor_32x32
+specialize vp9_d63_predictor_32x32 $ssse3_x86inc
prototype void vp9_h_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_h_predictor_32x32 $ssse3 x86inc
@@ -202,17 +202,6 @@
prototype void vp9_dc_128_predictor_32x32 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_dc_128_predictor_32x32
-if [ "$CONFIG_VP9_DECODER" = "yes" ]; then
-prototype void vp9_add_constant_residual_8x8 "const int16_t diff, uint8_t *dest, int stride"
-specialize vp9_add_constant_residual_8x8 sse2 neon
-
-prototype void vp9_add_constant_residual_16x16 "const int16_t diff, uint8_t *dest, int stride"
-specialize vp9_add_constant_residual_16x16 sse2 neon
-
-prototype void vp9_add_constant_residual_32x32 "const int16_t diff, uint8_t *dest, int stride"
-specialize vp9_add_constant_residual_32x32 sse2 neon
-fi
-
#
# Loopfilter
#
@@ -306,8 +295,8 @@
prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct8x8_add sse2 neon
-prototype void vp9_short_idct10_8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_8x8_add sse2 neon
+prototype void vp9_short_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct8x8_10_add sse2 neon
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_1_add sse2 neon
@@ -315,14 +304,14 @@
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_add sse2 neon
-prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct10_16x16_add sse2 neon
+prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct16x16_10_add sse2 neon
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct32x32_add sse2 neon
-prototype void vp9_short_idct1_32x32 "int16_t *input, int16_t *output"
-specialize vp9_short_idct1_32x32
+prototype void vp9_short_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_short_idct32x32_1_add
prototype void vp9_short_iht4x4_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_short_iht4x4_add sse2 neon
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 7a720d0..ece0114 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -48,4 +48,4 @@
sf->y_scale_fp != REF_NO_SCALE;
}
-#endif // VP9_COMMON_VP9_SCALE_H_
+#endif // VP9_COMMON_VP9_SCALE_H_
diff --git a/vp9/common/vp9_subpelvar.h b/vp9/common/vp9_subpelvar.h
deleted file mode 100644
index fe75481..0000000
--- a/vp9/common/vp9_subpelvar.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VP9_COMMON_VP9_SUBPELVAR_H_
-#define VP9_COMMON_VP9_SUBPELVAR_H_
-
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_convolve.h"
-
-static void variance(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- int w,
- int h,
- unsigned int *sse,
- int *sum) {
- int i, j;
- int diff;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- diff = src_ptr[j] - ref_ptr[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- src_ptr += source_stride;
- ref_ptr += recon_stride;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_first_pass
- *
- * INPUTS : uint8_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement first-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
-static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
- uint16_t *output_ptr,
- unsigned int src_pixels_per_line,
- int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
- (int)src_ptr[pixel_step] * vp9_filter[1],
- FILTER_BITS);
-
- src_ptr++;
- }
-
- // Next row...
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_second_pass
- *
- * INPUTS : int32_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter taps.
- *
- * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement second-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
-static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
- uint8_t *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const int16_t *vp9_filter) {
- unsigned int i, j;
-
- for (i = 0; i < output_height; i++) {
- for (j = 0; j < output_width; j++) {
- output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
- (int)src_ptr[pixel_step] * vp9_filter[1],
- FILTER_BITS);
- src_ptr++;
- }
-
- src_ptr += src_pixels_per_line - output_width;
- output_ptr += output_width;
- }
-}
-
-#endif // VP9_COMMON_VP9_SUBPELVAR_H_
diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c
index 2e21a5b..da1213d 100644
--- a/vp9/common/vp9_treecoder.c
+++ b/vp9/common/vp9_treecoder.c
@@ -25,8 +25,9 @@
if (j <= 0) {
p[-j].value = v;
p[-j].len = l;
- } else
+ } else {
tree2tok(p, t, j, v, l);
+ }
} while (++v & 1);
}
@@ -65,11 +66,9 @@
return left + right;
}
-void vp9_tree_probs_from_distribution(
- vp9_tree tree,
- vp9_prob probs [ /* n-1 */ ],
- unsigned int branch_ct [ /* n-1 */ ] [2],
- const unsigned int num_events[ /* n */ ],
- unsigned int tok0_offset) {
+void vp9_tree_probs_from_distribution(vp9_tree tree, vp9_prob probs[/* n-1 */],
+ unsigned int branch_ct[/* n-1 */][2],
+ const unsigned int num_events[/* n */],
+ unsigned int tok0_offset) {
convert_distribution(0, tree, probs, branch_ct, num_events, tok0_offset);
}
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 8f740f4..9e9d632 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -985,7 +985,7 @@
RECON_AND_STORE(dest, in[7]);
}
-void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_short_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
@@ -1014,7 +1014,7 @@
TRANSPOSE_8X4(in0, in1, in2, in3, in0, in1, in2, in3)
// Stage1
- {
+ { //NOLINT
const __m128i lo_17 = _mm_unpackhi_epi16(in0, in3);
const __m128i lo_35 = _mm_unpackhi_epi16(in1, in2);
@@ -1039,7 +1039,7 @@
}
// Stage2
- {
+ { //NOLINT
const __m128i lo_04 = _mm_unpacklo_epi16(in0, in2);
const __m128i lo_26 = _mm_unpacklo_epi16(in1, in3);
@@ -1069,7 +1069,7 @@
}
// Stage3
- {
+ { //NOLINT
const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6);
stp1_0 = _mm_adds_epi16(stp2_0, stp2_3);
stp1_1 = _mm_adds_epi16(stp2_1, stp2_2);
@@ -2456,7 +2456,7 @@
write_buffer_8x16(dest, in1, stride);
}
-void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest,
+void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
@@ -3548,4 +3548,4 @@
dest += 8 - (stride * 32);
}
}
-}
+} //NOLINT
diff --git a/vp9/common/x86/vp9_intrapred_ssse3.asm b/vp9/common/x86/vp9_intrapred_ssse3.asm
index 67c8ab0..c51d011 100644
--- a/vp9/common/x86/vp9_intrapred_ssse3.asm
+++ b/vp9/common/x86/vp9_intrapred_ssse3.asm
@@ -17,8 +17,8 @@
pb_7m1: times 8 db 7, -1
pb_15: times 16 db 15
-sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7
-sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7
+sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
+sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
@@ -305,3 +305,153 @@
RESTORE_GOT
RET
+
+; ------------------------------------------
+; input: x, y, z, result
+;
+; trick from pascal
+; (x+2y+z+2)>>2 can be calculated as:
+; result = avg(x,z)
+; result -= xor(x,z) & 1
+; result = avg(result,y)
+; ------------------------------------------
+%macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4
+ pavgb %4, %1, %3
+ pxor %3, %1
+ pand %3, [GLOBAL(pb_1)]
+ psubb %4, %3
+ pavgb %4, %2
+%endmacro
+
+INIT_XMM ssse3
+cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset
+ GET_GOT goffsetq
+
+ movq m3, [aboveq]
+ pshufb m1, m3, [GLOBAL(sh_b23456777)]
+ pshufb m2, m3, [GLOBAL(sh_b12345677)]
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4
+ pavgb m3, m2
+
+ ; store 4 lines
+ movd [dstq ], m3
+ movd [dstq+strideq], m4
+ lea dstq, [dstq+strideq*2]
+ psrldq m3, 1
+ psrldq m4, 1
+ movd [dstq ], m3
+ movd [dstq+strideq], m4
+ RESTORE_GOT
+ RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset
+ GET_GOT goffsetq
+
+ movq m3, [aboveq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ pshufb m1, m3, [GLOBAL(sh_b2345677777777777)]
+ pshufb m0, m3, [GLOBAL(sh_b0123456777777777)]
+ pshufb m2, m3, [GLOBAL(sh_b1234567777777777)]
+ pshufb m3, [GLOBAL(sh_b0123456777777777)]
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4
+ pavgb m3, m2
+
+ ; store 4 lines
+ movq [dstq ], m3
+ movq [dstq+strideq], m4
+ psrldq m3, 1
+ psrldq m4, 1
+ movq [dstq+strideq*2], m3
+ movq [dstq+stride3q ], m4
+ lea dstq, [dstq+strideq*4]
+ psrldq m3, 1
+ psrldq m4, 1
+
+ ; store 4 lines
+ movq [dstq ], m3
+ movq [dstq+strideq], m4
+ psrldq m3, 1
+ psrldq m4, 1
+ movq [dstq+strideq*2], m3
+ movq [dstq+stride3q ], m4
+ RESTORE_GOT
+ RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset
+ GET_GOT goffsetq
+
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3, line
+ lea stride3q, [strideq*3]
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)]
+ pshufb m3, m0, m1
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4
+ pavgb m0, m3
+
+ mov lined, 4
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m4
+ pshufb m0, m1
+ pshufb m4, m1
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m4
+ pshufb m0, m1
+ pshufb m4, m1
+ lea dstq, [dstq+strideq*4]
+ dec lined
+ jnz .loop
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM ssse3
+cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset
+ GET_GOT goffsetq
+
+ mova m0, [aboveq]
+ mova m7, [aboveq+16]
+ DEFINE_ARGS dst, stride, stride3, line
+ mova m1, [GLOBAL(sh_b123456789abcdeff)]
+ lea stride3q, [strideq*3]
+ pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)]
+ pshufb m3, m7, m1
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4
+ palignr m6, m7, m0, 1
+ palignr m5, m7, m0, 2
+ pavgb m7, m3
+
+ X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2
+ pavgb m0, m6
+
+ mov lined, 8
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m7
+ mova [dstq+strideq ], m2
+ mova [dstq+strideq +16], m4
+ palignr m3, m7, m0, 1
+ palignr m5, m4, m2, 1
+ pshufb m7, m1
+ pshufb m4, m1
+
+ mova [dstq+strideq*2 ], m3
+ mova [dstq+strideq*2+16], m7
+ mova [dstq+stride3q ], m5
+ mova [dstq+stride3q +16], m4
+ palignr m0, m7, m3, 1
+ palignr m2, m4, m5, 1
+ pshufb m7, m1
+ pshufb m4, m1
+ lea dstq, [dstq+strideq*4]
+ dec lined
+ jnz .loop
+ RESTORE_GOT
+ REP_RET
diff --git a/vp9/common/x86/vp9_postproc_x86.h b/vp9/common/x86/vp9_postproc_x86.h
index b0e8b18..8870215 100644
--- a/vp9/common/x86/vp9_postproc_x86.h
+++ b/vp9/common/x86/vp9_postproc_x86.h
@@ -61,4 +61,4 @@
#endif
#endif
-#endif
+#endif // VP9_COMMON_X86_VP9_POSTPROC_X86_H_
diff --git a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
index bbf9888..277902f 100644
--- a/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
+++ b/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
@@ -705,60 +705,53 @@
movsxd rcx, dword ptr arg(4) ;output_height
.loop:
- movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4
+ prefetcht0 [rsi + 2 * rax -3]
- movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12
- punpcklqdq xmm0, xmm3
+ movq xmm0, [rsi - 3] ;load src data
+ movq xmm4, [rsi + 5]
+ movq xmm7, [rsi + 13]
+ punpcklqdq xmm0, xmm4
+ punpcklqdq xmm4, xmm7
movdqa xmm1, xmm0
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm0
+ movdqa xmm5, xmm4
+ movdqa xmm6, xmm4
+ movdqa xmm7, xmm4
+
pshufb xmm0, [GLOBAL(shuf_t0t1)]
- pmaddubsw xmm0, k0k1
-
- movdqa xmm2, xmm1
pshufb xmm1, [GLOBAL(shuf_t2t3)]
- pmaddubsw xmm1, k2k3
-
- movdqa xmm4, xmm2
pshufb xmm2, [GLOBAL(shuf_t4t5)]
- pmaddubsw xmm2, k4k5
+ pshufb xmm3, [GLOBAL(shuf_t6t7)]
+ pshufb xmm4, [GLOBAL(shuf_t0t1)]
+ pshufb xmm5, [GLOBAL(shuf_t2t3)]
+ pshufb xmm6, [GLOBAL(shuf_t4t5)]
+ pshufb xmm7, [GLOBAL(shuf_t6t7)]
- pshufb xmm4, [GLOBAL(shuf_t6t7)]
- pmaddubsw xmm4, k6k7
+ pmaddubsw xmm0, k0k1
+ pmaddubsw xmm1, k2k3
+ pmaddubsw xmm2, k4k5
+ pmaddubsw xmm3, k6k7
+ pmaddubsw xmm4, k0k1
+ pmaddubsw xmm5, k2k3
+ pmaddubsw xmm6, k4k5
+ pmaddubsw xmm7, k6k7
paddsw xmm0, xmm1
- paddsw xmm0, xmm4
+ paddsw xmm0, xmm3
paddsw xmm0, xmm2
+ paddsw xmm4, xmm5
+ paddsw xmm4, xmm7
+ paddsw xmm4, xmm6
+
paddsw xmm0, krd
+ paddsw xmm4, krd
psraw xmm0, 7
+ psraw xmm4, 7
packuswb xmm0, xmm0
-
-
- movq xmm3, [rsi + 5]
- movq xmm7, [rsi + 13]
- punpcklqdq xmm3, xmm7
-
- movdqa xmm1, xmm3
- pshufb xmm3, [GLOBAL(shuf_t0t1)]
- pmaddubsw xmm3, k0k1
-
- movdqa xmm2, xmm1
- pshufb xmm1, [GLOBAL(shuf_t2t3)]
- pmaddubsw xmm1, k2k3
-
- movdqa xmm4, xmm2
- pshufb xmm2, [GLOBAL(shuf_t4t5)]
- pmaddubsw xmm2, k4k5
-
- pshufb xmm4, [GLOBAL(shuf_t6t7)]
- pmaddubsw xmm4, k6k7
-
- paddsw xmm3, xmm1
- paddsw xmm3, xmm4
- paddsw xmm3, xmm2
- paddsw xmm3, krd
- psraw xmm3, 7
- packuswb xmm3, xmm3
- punpcklqdq xmm0, xmm3
+ packuswb xmm4, xmm4
+ punpcklqdq xmm0, xmm4
%if %1
movdqa xmm1, [rdi]
pavgb xmm0, xmm1
diff --git a/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm b/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm
deleted file mode 100644
index 174e747..0000000
--- a/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm
+++ /dev/null
@@ -1,230 +0,0 @@
-;
-; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
- EXPORT |vp9_add_constant_residual_8x8_neon|
- EXPORT |vp9_add_constant_residual_16x16_neon|
- EXPORT |vp9_add_constant_residual_32x32_neon|
- ARM
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
- MACRO
- LD_16x8 $src, $stride
- vld1.8 {q8}, [$src], $stride
- vld1.8 {q9}, [$src], $stride
- vld1.8 {q10}, [$src], $stride
- vld1.8 {q11}, [$src], $stride
- vld1.8 {q12}, [$src], $stride
- vld1.8 {q13}, [$src], $stride
- vld1.8 {q14}, [$src], $stride
- vld1.8 {q15}, [$src], $stride
- MEND
-
- MACRO
- ADD_DIFF_16x8 $diff
- vqadd.u8 q8, q8, $diff
- vqadd.u8 q9, q9, $diff
- vqadd.u8 q10, q10, $diff
- vqadd.u8 q11, q11, $diff
- vqadd.u8 q12, q12, $diff
- vqadd.u8 q13, q13, $diff
- vqadd.u8 q14, q14, $diff
- vqadd.u8 q15, q15, $diff
- MEND
-
- MACRO
- SUB_DIFF_16x8 $diff
- vqsub.u8 q8, q8, $diff
- vqsub.u8 q9, q9, $diff
- vqsub.u8 q10, q10, $diff
- vqsub.u8 q11, q11, $diff
- vqsub.u8 q12, q12, $diff
- vqsub.u8 q13, q13, $diff
- vqsub.u8 q14, q14, $diff
- vqsub.u8 q15, q15, $diff
- MEND
-
- MACRO
- ST_16x8 $dst, $stride
- vst1.8 {q8}, [$dst], $stride
- vst1.8 {q9}, [$dst], $stride
- vst1.8 {q10}, [$dst], $stride
- vst1.8 {q11}, [$dst], $stride
- vst1.8 {q12}, [$dst], $stride
- vst1.8 {q13}, [$dst], $stride
- vst1.8 {q14}, [$dst], $stride
- vst1.8 {q15}, [$dst], $stride
- MEND
-
-; void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
-; int width, int height) {
-; int r, c;
-;
-; for (r = 0; r < height; r++) {
-; for (c = 0; c < width; c++)
-; dest[c] = clip_pixel(diff + dest[c]);
-;
-; dest += stride;
-; }
-;}
-;void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest,
-; int stride) {
-; add_constant_residual(diff, dest, stride, 8, 8);
-;}
-; r0 : const int16_t diff
-; r1 : const uint8_t *dest
-; r2 : int stride
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-|vp9_add_constant_residual_8x8_neon| PROC
- mov r3, r1 ; r3: save dest to r3
- vld1.8 {d0}, [r1], r2
- vld1.8 {d1}, [r1], r2
- vld1.8 {d2}, [r1], r2
- vld1.8 {d3}, [r1], r2
- vld1.8 {d4}, [r1], r2
- vld1.8 {d5}, [r1], r2
- vld1.8 {d6}, [r1], r2
- vld1.8 {d7}, [r1], r2
- cmp r0, #0
- bge DIFF_POSITIVE_8x8
-
-DIFF_NEGATIVE_8x8 ; diff < 0
- neg r0, r0
- usat r0, #8, r0
- vdup.u8 q8, r0
-
- vqsub.u8 q0, q0, q8
- vqsub.u8 q1, q1, q8
- vqsub.u8 q2, q2, q8
- vqsub.u8 q3, q3, q8
- b DIFF_SAVE_8x8
-
-DIFF_POSITIVE_8x8 ; diff >= 0
- usat r0, #8, r0
- vdup.u8 q8, r0
-
- vqadd.u8 q0, q0, q8
- vqadd.u8 q1, q1, q8
- vqadd.u8 q2, q2, q8
- vqadd.u8 q3, q3, q8
-
-DIFF_SAVE_8x8
- vst1.8 {d0}, [r3], r2
- vst1.8 {d1}, [r3], r2
- vst1.8 {d2}, [r3], r2
- vst1.8 {d3}, [r3], r2
- vst1.8 {d4}, [r3], r2
- vst1.8 {d5}, [r3], r2
- vst1.8 {d6}, [r3], r2
- vst1.8 {d7}, [r3], r2
-
- bx lr
- ENDP
-
-;void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest,
-; int stride) {
-; add_constant_residual(diff, dest, stride, 16, 16);
-;}
-; r0 : const int16_t diff
-; r1 : const uint8_t *dest
-; r2 : int stride
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-|vp9_add_constant_residual_16x16_neon| PROC
- mov r3, r1
- LD_16x8 r1, r2
- cmp r0, #0
- bge DIFF_POSITIVE_16x16
-
-|DIFF_NEGATIVE_16x16|
- neg r0, r0
- usat r0, #8, r0
- vdup.u8 q0, r0
-
- SUB_DIFF_16x8 q0
- ST_16x8 r3, r2
- LD_16x8 r1, r2
- SUB_DIFF_16x8 q0
- b DIFF_SAVE_16x16
-
-|DIFF_POSITIVE_16x16|
- usat r0, #8, r0
- vdup.u8 q0, r0
-
- ADD_DIFF_16x8 q0
- ST_16x8 r3, r2
- LD_16x8 r1, r2
- ADD_DIFF_16x8 q0
-
-|DIFF_SAVE_16x16|
- ST_16x8 r3, r2
- bx lr
- ENDP
-
-;void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
-; int stride) {
-; add_constant_residual(diff, dest, stride, 32, 32);
-;}
-; r0 : const int16_t diff
-; r1 : const uint8_t *dest
-; r2 : int stride
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-|vp9_add_constant_residual_32x32_neon| PROC
- push {r4,lr}
- pld [r1]
- mov r3, r1
- add r4, r1, #16 ; r4 dest + 16 for second loop
- cmp r0, #0
- bge DIFF_POSITIVE_32x32
-
-|DIFF_NEGATIVE_32x32|
- neg r0, r0
- usat r0, #8, r0
- vdup.u8 q0, r0
- mov r0, #4
-
-|DIFF_NEGATIVE_32x32_LOOP|
- sub r0, #1
- LD_16x8 r1, r2
- SUB_DIFF_16x8 q0
- ST_16x8 r3, r2
-
- LD_16x8 r1, r2
- SUB_DIFF_16x8 q0
- ST_16x8 r3, r2
- cmp r0, #2
- moveq r1, r4
- moveq r3, r4
- cmp r0, #0
- bne DIFF_NEGATIVE_32x32_LOOP
- pop {r4,pc}
-
-|DIFF_POSITIVE_32x32|
- usat r0, #8, r0
- vdup.u8 q0, r0
- mov r0, #4
-
-|DIFF_POSITIVE_32x32_LOOP|
- sub r0, #1
- LD_16x8 r1, r2
- ADD_DIFF_16x8 q0
- ST_16x8 r3, r2
-
- LD_16x8 r1, r2
- ADD_DIFF_16x8 q0
- ST_16x8 r3, r2
- cmp r0, #2
- moveq r1, r4
- moveq r3, r4
- cmp r0, #0
- bne DIFF_POSITIVE_32x32_LOOP
- pop {r4,pc}
- ENDP
-
- END
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 84a29b1..ebb98d8 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -75,28 +75,9 @@
return TX_4X4;
}
-static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize,
- int mi_row, int mi_col, int segment_id) {
- const int mi_offset = mi_row * cm->mi_cols + mi_col;
- const int bw = 1 << mi_width_log2(bsize);
- const int bh = 1 << mi_height_log2(bsize);
- const int xmis = MIN(cm->mi_cols - mi_col, bw);
- const int ymis = MIN(cm->mi_rows - mi_row, bh);
- int x, y;
-
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
-
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++)
- cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
-}
-
static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
vp9_reader *r) {
- MACROBLOCKD *const xd = &pbi->mb;
struct segmentation *const seg = &pbi->common.seg;
- const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
- int segment_id;
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -104,9 +85,7 @@
if (!seg->update_map)
return 0;
- segment_id = read_segment_id(r, seg);
- set_segment_id(&pbi->common, bsize, mi_row, mi_col, segment_id);
- return segment_id;
+ return read_segment_id(r, seg);
}
static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col,
@@ -115,7 +94,7 @@
MACROBLOCKD *const xd = &pbi->mb;
struct segmentation *const seg = &cm->seg;
const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type;
- int pred_segment_id, segment_id;
+ int pred_segment_id;;
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -129,13 +108,10 @@
const vp9_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd);
const int pred_flag = vp9_read(r, pred_prob);
vp9_set_pred_flag_seg_id(xd, pred_flag);
- segment_id = pred_flag ? pred_segment_id
- : read_segment_id(r, seg);
+ return pred_flag ? pred_segment_id : read_segment_id(r, seg);
} else {
- segment_id = read_segment_id(r, seg);
+ return read_segment_id(r, seg);
}
- set_segment_id(cm, bsize, mi_row, mi_col, segment_id);
- return segment_id;
}
static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) {
@@ -200,7 +176,6 @@
static int read_mv_component(vp9_reader *r,
const nmv_component *mvcomp, int usehp) {
-
int mag, d, fr, hp;
const int sign = vp9_read(r, mvcomp->sign);
const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes);
@@ -443,8 +418,7 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = xd->allow_high_precision_mv;
- int_mv nearest, nearby, best_mv;
- int_mv nearest_second, nearby_second, best_mv_second;
+ int_mv nearest[2], nearmv[2], best[2];
uint8_t inter_mode_ctx;
MV_REFERENCE_FRAME ref0;
int is_compound;
@@ -469,8 +443,8 @@
// nearest, nearby
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest, &nearby);
- best_mv.as_int = mbmi->ref_mvs[ref0][0].as_int;
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]);
+ best[0].as_int = nearest[0].as_int;
}
if (is_compound) {
@@ -479,9 +453,8 @@
ref1, mbmi->ref_mvs[ref1], mi_row, mi_col);
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
- vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1],
- &nearest_second, &nearby_second);
- best_mv_second.as_int = mbmi->ref_mvs[ref1][0].as_int;
+ vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]);
+ best[1].as_int = nearest[1].as_int;
}
}
@@ -493,75 +466,76 @@
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; // 1 or 2
int idx, idy;
+ int b_mode;
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
- int_mv blockmv, secondmv;
+ int_mv block[2];
const int j = idy * 2 + idx;
- const int b_mode = read_inter_mode(cm, r, inter_mode_ctx);
+ b_mode = read_inter_mode(cm, r, inter_mode_ctx);
if (b_mode == NEARESTMV || b_mode == NEARMV) {
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest, &nearby, j, 0,
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[0],
+ &nearmv[0], j, 0,
mi_row, mi_col);
if (is_compound)
- vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest_second,
- &nearby_second, j, 1,
- mi_row, mi_col);
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[1],
+ &nearmv[1], j, 1,
+ mi_row, mi_col);
}
switch (b_mode) {
case NEWMV:
- read_mv(r, &blockmv.as_mv, &best_mv.as_mv, nmvc,
- &cm->counts.mv, allow_hp);
-
+ read_mv(r, &block[0].as_mv, &best[0].as_mv, nmvc, &cm->counts.mv,
+ allow_hp);
if (is_compound)
- read_mv(r, &secondmv.as_mv, &best_mv_second.as_mv, nmvc,
- &cm->counts.mv, allow_hp);
+ read_mv(r, &block[1].as_mv, &best[1].as_mv, nmvc, &cm->counts.mv,
+ allow_hp);
break;
case NEARESTMV:
- blockmv.as_int = nearest.as_int;
+ block[0].as_int = nearest[0].as_int;
if (is_compound)
- secondmv.as_int = nearest_second.as_int;
+ block[1].as_int = nearest[1].as_int;
break;
case NEARMV:
- blockmv.as_int = nearby.as_int;
+ block[0].as_int = nearmv[0].as_int;
if (is_compound)
- secondmv.as_int = nearby_second.as_int;
+ block[1].as_int = nearmv[1].as_int;
break;
case ZEROMV:
- blockmv.as_int = 0;
+ block[0].as_int = 0;
if (is_compound)
- secondmv.as_int = 0;
+ block[1].as_int = 0;
break;
default:
assert(!"Invalid inter mode value");
}
- mi->bmi[j].as_mv[0].as_int = blockmv.as_int;
+ mi->bmi[j].as_mv[0].as_int = block[0].as_int;
if (is_compound)
- mi->bmi[j].as_mv[1].as_int = secondmv.as_int;
+ mi->bmi[j].as_mv[1].as_int = block[1].as_int;
if (num_4x4_h == 2)
mi->bmi[j + 2] = mi->bmi[j];
if (num_4x4_w == 2)
mi->bmi[j + 1] = mi->bmi[j];
- mi->mbmi.mode = b_mode;
}
}
+ mi->mbmi.mode = b_mode;
mv0->as_int = mi->bmi[3].as_mv[0].as_int;
mv1->as_int = mi->bmi[3].as_mv[1].as_int;
} else {
switch (mbmi->mode) {
case NEARMV:
- mv0->as_int = nearby.as_int;
+ mv0->as_int = nearmv[0].as_int;
if (is_compound)
- mv1->as_int = nearby_second.as_int;
+ mv1->as_int = nearmv[1].as_int;
break;
case NEARESTMV:
- mv0->as_int = nearest.as_int;
+ mv0->as_int = nearest[0].as_int;
if (is_compound)
- mv1->as_int = nearest_second.as_int;
+ mv1->as_int = nearest[1].as_int;
break;
case ZEROMV:
@@ -571,9 +545,9 @@
break;
case NEWMV:
- read_mv(r, &mv0->as_mv, &best_mv.as_mv, nmvc, &cm->counts.mv, allow_hp);
+ read_mv(r, &mv0->as_mv, &best[0].as_mv, nmvc, &cm->counts.mv, allow_hp);
if (is_compound)
- read_mv(r, &mv1->as_mv, &best_mv_second.as_mv, nmvc, &cm->counts.mv,
+ read_mv(r, &mv1->as_mv, &best[1].as_mv, nmvc, &cm->counts.mv,
allow_hp);
break;
default:
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 34ed0c7..77fec50 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -436,7 +436,6 @@
static void setup_loopfilter(struct loopfilter *lf,
struct vp9_read_bit_buffer *rb) {
-
lf->filter_level = vp9_rb_read_literal(rb, 6);
lf->sharpness_level = vp9_rb_read_literal(rb, 3);
@@ -935,6 +934,15 @@
}
}
+static void update_segmentation_map(VP9_COMMON *cm) {
+ int i, j;
+
+ for (i = 0; i < cm->mi_rows; ++i)
+ for (j = 0; j < cm->mi_cols; ++j)
+ cm->last_frame_seg_map[i * cm->mi_cols + j] =
+ cm->mi_grid_visible[i * cm->mode_info_stride + j]->mbmi.segment_id;
+}
+
int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
int i;
VP9_COMMON *const cm = &pbi->common;
@@ -1014,5 +1022,7 @@
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = cm->fc;
+ update_segmentation_map(cm);
+
return 0;
}
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index cd74a0b..8fcf83e 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -105,9 +105,8 @@
const int16_t *scan, *nb;
const uint8_t *band_translate;
uint8_t token_cache[1024];
- int pt = get_entropy_context(xd, tx_size, type, block_idx, A, L,
- &scan, &band_translate);
- nb = vp9_get_coef_neighbors_handle(scan);
+ int pt = get_entropy_context(tx_size, A, L);
+ get_scan_and_band(xd, tx_size, type, block_idx, &scan, &nb, &band_translate);
while (1) {
int val;
@@ -122,7 +121,7 @@
if (!vp9_read(r, prob[EOB_CONTEXT_NODE]))
break;
-SKIP_START:
+ SKIP_START:
if (c >= seg_eob)
break;
if (c)
diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c
index 8cc64f7..9a970d4 100644
--- a/vp9/decoder/vp9_dsubexp.c
+++ b/vp9/decoder/vp9_dsubexp.c
@@ -67,7 +67,6 @@
206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, 221,
222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
-
};
// v = merge_index(v, MAX_PROBS - 1, MODULUS_PARAM);
v = inv_map_table[v];
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index 395e636..627873f 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -8,37 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp9_rtcd.h"
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/decoder/vp9_idct_blk.h"
-static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride,
- int width, int height) {
- int r, c;
-
- for (r = 0; r < height; r++) {
- for (c = 0; c < width; c++)
- dest[c] = clip_pixel(diff + dest[c]);
-
- dest += stride;
- }
-}
-
-void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest,
- int stride) {
- add_constant_residual(diff, dest, stride, 8, 8);
-}
-
-void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest,
- int stride) {
- add_constant_residual(diff, dest, stride, 16, 16);
-}
-
-void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest,
- int stride) {
- add_constant_residual(diff, dest, stride, 32, 32);
-}
-
void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
int eob) {
if (tx_type == DCT_DCT) {
@@ -96,7 +69,7 @@
vp9_short_idct8x8_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
- vp9_short_idct10_8x8_add(input, dest, stride);
+ vp9_short_idct8x8_10_add(input, dest, stride);
vpx_memset(input, 0, 128);
} else {
vp9_short_idct8x8_add(input, dest, stride);
@@ -126,7 +99,7 @@
vp9_short_idct16x16_1_add(input, dest, stride);
input[0] = 0;
} else if (eob <= 10) {
- vp9_short_idct10_16x16_add(input, dest, stride);
+ vp9_short_idct16x16_10_add(input, dest, stride);
vpx_memset(input, 0, 512);
} else {
vp9_short_idct16x16_add(input, dest, stride);
@@ -136,12 +109,9 @@
}
void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) {
- DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024);
-
if (eob) {
if (eob == 1) {
- vp9_short_idct1_32x32(input, output);
- vp9_add_constant_residual_32x32(output[0], dest, stride);
+ vp9_short_idct32x32_1_add(input, dest, stride);
input[0] = 0;
} else {
vp9_short_idct32x32_add(input, dest, stride);
diff --git a/vp9/decoder/vp9_idct_blk.h b/vp9/decoder/vp9_idct_blk.h
index 1810bd0..00f1bc6 100644
--- a/vp9/decoder/vp9_idct_blk.h
+++ b/vp9/decoder/vp9_idct_blk.h
@@ -14,17 +14,16 @@
#include "vp9/common/vp9_blockd.h"
+void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest,
+ int stride, int eob);
-void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride,
- int eob);
-
-void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
-void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
-void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest,
+void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob);
#endif // VP9_DECODER_VP9_IDCT_BLK_H_
diff --git a/vp9/decoder/vp9_onyxd.h b/vp9/decoder/vp9_onyxd.h
index cd5b750..a4b9c24 100644
--- a/vp9/decoder/vp9_onyxd.h
+++ b/vp9/decoder/vp9_onyxd.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ONYXD_H_
-#define VP9_COMMON_VP9_ONYXD_H_
+#ifndef VP9_DECODER_VP9_ONYXD_H_
+#define VP9_DECODER_VP9_ONYXD_H_
#ifdef __cplusplus
extern "C" {
@@ -40,7 +40,7 @@
void vp9_initialize_dec();
int vp9_receive_compressed_data(VP9D_PTR comp,
- uint64_t size, const uint8_t **dest,
+ size_t size, const uint8_t **dest,
int64_t time_stamp);
int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd,
@@ -66,4 +66,4 @@
}
#endif
-#endif // VP9_COMMON_VP9_ONYXD_H_
+#endif // VP9_DECODER_VP9_ONYXD_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 17d5def..a42c2cf 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -65,13 +65,12 @@
#endif
#if WRITE_RECON_BUFFER == 2
void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
-
// write the frame
FILE *yframe;
int i;
char filename[255];
- sprintf(filename, "dx\\y%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->y_height; i++)
@@ -79,7 +78,7 @@
frame->y_width, 1, yframe);
fclose(yframe);
- sprintf(filename, "dx\\u%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->uv_height; i++)
@@ -87,7 +86,7 @@
frame->uv_width, 1, yframe);
fclose(yframe);
- sprintf(filename, "dx\\v%04d.raw", this_frame);
+ snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
yframe = fopen(filename, "wb");
for (i = 0; i < frame->uv_height; i++)
@@ -214,13 +213,13 @@
* vpxenc --test-decode functionality working, and will be replaced in a
* later commit that adds VP9-specific controls for this functionality.
*/
- if (ref_frame_flag == VP9_LAST_FLAG)
+ if (ref_frame_flag == VP9_LAST_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[0];
- else if (ref_frame_flag == VP9_GOLD_FLAG)
+ } else if (ref_frame_flag == VP9_GOLD_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[1];
- else if (ref_frame_flag == VP9_ALT_FLAG)
+ } else if (ref_frame_flag == VP9_ALT_FLAG) {
ref_fb_ptr = &pbi->common.active_ref_idx[2];
- else {
+ } else {
vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR,
"Invalid reference frame");
return pbi->common.error.error_code;
@@ -277,7 +276,7 @@
}
int vp9_receive_compressed_data(VP9D_PTR ptr,
- uint64_t size, const uint8_t **psource,
+ size_t size, const uint8_t **psource,
int64_t time_stamp) {
VP9D_COMP *pbi = (VP9D_COMP *) ptr;
VP9_COMMON *cm = &pbi->common;
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index a051971..76d7c57 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -25,7 +25,7 @@
VP9D_CONFIG oxcf;
const uint8_t *source;
- uint32_t source_sz;
+ size_t source_sz;
int64_t last_time_stamp;
int ready_for_new_data;
@@ -41,4 +41,4 @@
VP9Worker lf_worker;
} VP9D_COMP;
-#endif // VP9_DECODER_VP9_TREEREADER_H_
+#endif // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/decoder/vp9_read_bit_buffer.h b/vp9/decoder/vp9_read_bit_buffer.h
index c7fa3aa..41a6868 100644
--- a/vp9/decoder/vp9_read_bit_buffer.h
+++ b/vp9/decoder/vp9_read_bit_buffer.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_READ_BIT_BUFFER_
-#define VP9_READ_BIT_BUFFER_
+#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_
+#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_
#include <limits.h>
@@ -57,4 +57,4 @@
return vp9_rb_read_bit(rb) ? -value : value;
}
-#endif // VP9_READ_BIT_BUFFER_
+#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index a8f7e04..0b5eca0 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -17,7 +17,7 @@
#ifndef VP9_DECODER_VP9_THREAD_H_
#define VP9_DECODER_VP9_THREAD_H_
-#include "vpx_config.h"
+#include "./vpx_config.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@@ -90,4 +90,4 @@
} // extern "C"
#endif
-#endif /* VP9_DECODER_VP9_THREAD_H_ */
+#endif // VP9_DECODER_VP9_THREAD_H_
diff --git a/vp9/decoder/vp9_treereader.h b/vp9/decoder/vp9_treereader.h
index 710cc4c..f612497 100644
--- a/vp9/decoder/vp9_treereader.h
+++ b/vp9/decoder/vp9_treereader.h
@@ -23,7 +23,8 @@
const vp9_prob *const p) {
register vp9_tree_index i = 0;
- while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0);
+ while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0)
+ continue;
return -i;
}
diff --git a/vp9/decoder/x86/vp9_dequantize_sse2.c b/vp9/decoder/x86/vp9_dequantize_sse2.c
deleted file mode 100644
index 54ec67f..0000000
--- a/vp9/decoder/x86/vp9_dequantize_sse2.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include <emmintrin.h> // SSE2
-#include "./vpx_config.h"
-#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
-#include "vp9/common/vp9_idct.h"
-
-void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest,
- int stride) {
- uint8_t abs_diff;
- __m128i d;
-
- // Prediction data.
- __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride));
- __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride));
- __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride));
- __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride));
- __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride));
- __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride));
- __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride));
- __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride));
-
- p0 = _mm_unpacklo_epi64(p0, p1);
- p2 = _mm_unpacklo_epi64(p2, p3);
- p4 = _mm_unpacklo_epi64(p4, p5);
- p6 = _mm_unpacklo_epi64(p6, p7);
-
- // Clip diff value to [0, 255] range. Then, do addition or subtraction
- // according to its sign.
- if (diff >= 0) {
- abs_diff = (diff > 255) ? 255 : diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
-
- p0 = _mm_adds_epu8(p0, d);
- p2 = _mm_adds_epu8(p2, d);
- p4 = _mm_adds_epu8(p4, d);
- p6 = _mm_adds_epu8(p6, d);
- } else {
- abs_diff = (diff < -255) ? 255 : -diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
-
- p0 = _mm_subs_epu8(p0, d);
- p2 = _mm_subs_epu8(p2, d);
- p4 = _mm_subs_epu8(p4, d);
- p6 = _mm_subs_epu8(p6, d);
- }
-
- _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0);
- p0 = _mm_srli_si128(p0, 8);
- _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0);
-
- _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2);
- p2 = _mm_srli_si128(p2, 8);
- _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2);
-
- _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4);
- p4 = _mm_srli_si128(p4, 8);
- _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4);
-
- _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6);
- p6 = _mm_srli_si128(p6, 8);
- _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6);
-}
-
-void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest,
- int stride) {
- uint8_t abs_diff;
- __m128i d;
-
- // Prediction data.
- __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
- __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
- __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
- __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
- __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride));
- __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride));
- __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride));
- __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride));
- __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride));
- __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride));
- __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride));
- __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride));
- __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride));
- __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride));
- __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride));
- __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride));
-
- // Clip diff value to [0, 255] range. Then, do addition or subtraction
- // according to its sign.
- if (diff >= 0) {
- abs_diff = (diff > 255) ? 255 : diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
-
- p0 = _mm_adds_epu8(p0, d);
- p1 = _mm_adds_epu8(p1, d);
- p2 = _mm_adds_epu8(p2, d);
- p3 = _mm_adds_epu8(p3, d);
- p4 = _mm_adds_epu8(p4, d);
- p5 = _mm_adds_epu8(p5, d);
- p6 = _mm_adds_epu8(p6, d);
- p7 = _mm_adds_epu8(p7, d);
- p8 = _mm_adds_epu8(p8, d);
- p9 = _mm_adds_epu8(p9, d);
- p10 = _mm_adds_epu8(p10, d);
- p11 = _mm_adds_epu8(p11, d);
- p12 = _mm_adds_epu8(p12, d);
- p13 = _mm_adds_epu8(p13, d);
- p14 = _mm_adds_epu8(p14, d);
- p15 = _mm_adds_epu8(p15, d);
- } else {
- abs_diff = (diff < -255) ? 255 : -diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
-
- p0 = _mm_subs_epu8(p0, d);
- p1 = _mm_subs_epu8(p1, d);
- p2 = _mm_subs_epu8(p2, d);
- p3 = _mm_subs_epu8(p3, d);
- p4 = _mm_subs_epu8(p4, d);
- p5 = _mm_subs_epu8(p5, d);
- p6 = _mm_subs_epu8(p6, d);
- p7 = _mm_subs_epu8(p7, d);
- p8 = _mm_subs_epu8(p8, d);
- p9 = _mm_subs_epu8(p9, d);
- p10 = _mm_subs_epu8(p10, d);
- p11 = _mm_subs_epu8(p11, d);
- p12 = _mm_subs_epu8(p12, d);
- p13 = _mm_subs_epu8(p13, d);
- p14 = _mm_subs_epu8(p14, d);
- p15 = _mm_subs_epu8(p15, d);
- }
-
- // Store results
- _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
- _mm_store_si128((__m128i *)(dest + 1 * stride), p1);
- _mm_store_si128((__m128i *)(dest + 2 * stride), p2);
- _mm_store_si128((__m128i *)(dest + 3 * stride), p3);
- _mm_store_si128((__m128i *)(dest + 4 * stride), p4);
- _mm_store_si128((__m128i *)(dest + 5 * stride), p5);
- _mm_store_si128((__m128i *)(dest + 6 * stride), p6);
- _mm_store_si128((__m128i *)(dest + 7 * stride), p7);
- _mm_store_si128((__m128i *)(dest + 8 * stride), p8);
- _mm_store_si128((__m128i *)(dest + 9 * stride), p9);
- _mm_store_si128((__m128i *)(dest + 10 * stride), p10);
- _mm_store_si128((__m128i *)(dest + 11 * stride), p11);
- _mm_store_si128((__m128i *)(dest + 12 * stride), p12);
- _mm_store_si128((__m128i *)(dest + 13 * stride), p13);
- _mm_store_si128((__m128i *)(dest + 14 * stride), p14);
- _mm_store_si128((__m128i *)(dest + 15 * stride), p15);
-}
-
-void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest,
- int stride) {
- uint8_t abs_diff;
- __m128i d;
- int i = 8;
-
- if (diff >= 0) {
- abs_diff = (diff > 255) ? 255 : diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
- } else {
- abs_diff = (diff < -255) ? 255 : -diff;
- d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0);
- }
-
- do {
- // Prediction data.
- __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride));
- __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16));
- __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride));
- __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16));
- __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride));
- __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16));
- __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride));
- __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16));
-
- // Clip diff value to [0, 255] range. Then, do addition or subtraction
- // according to its sign.
- if (diff >= 0) {
- p0 = _mm_adds_epu8(p0, d);
- p1 = _mm_adds_epu8(p1, d);
- p2 = _mm_adds_epu8(p2, d);
- p3 = _mm_adds_epu8(p3, d);
- p4 = _mm_adds_epu8(p4, d);
- p5 = _mm_adds_epu8(p5, d);
- p6 = _mm_adds_epu8(p6, d);
- p7 = _mm_adds_epu8(p7, d);
- } else {
- p0 = _mm_subs_epu8(p0, d);
- p1 = _mm_subs_epu8(p1, d);
- p2 = _mm_subs_epu8(p2, d);
- p3 = _mm_subs_epu8(p3, d);
- p4 = _mm_subs_epu8(p4, d);
- p5 = _mm_subs_epu8(p5, d);
- p6 = _mm_subs_epu8(p6, d);
- p7 = _mm_subs_epu8(p7, d);
- }
-
- // Store results
- _mm_store_si128((__m128i *)(dest + 0 * stride), p0);
- _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1);
- _mm_store_si128((__m128i *)(dest + 1 * stride), p2);
- _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3);
- _mm_store_si128((__m128i *)(dest + 2 * stride), p4);
- _mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5);
- _mm_store_si128((__m128i *)(dest + 3 * stride), p6);
- _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7);
-
- dest += 4 * stride;
- } while (--i);
-}
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 957cfd2..20dd8e1 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -283,7 +283,7 @@
const TOKENEXTRA *const stop) {
TOKENEXTRA *p = *tp;
- while (p < stop) {
+ while (p < stop && p->token != EOSB_TOKEN) {
const int t = p->token;
const struct vp9_token *const a = vp9_coef_encodings + t;
const vp9_extra_bit *const b = vp9_extra_bits + t;
@@ -293,10 +293,6 @@
int n = a->len;
vp9_prob probs[ENTROPY_NODES];
- if (t == EOSB_TOKEN) {
- ++p;
- break;
- }
if (t >= TWO_TOKEN) {
vp9_model_to_full_probs(p->context_tree, probs);
pp = probs;
@@ -338,7 +334,7 @@
++p;
}
- *tp = p;
+ *tp = p + (p->token == EOSB_TOKEN);
}
static void write_sb_mv_ref(vp9_writer *w, MB_PREDICTION_MODE mode,
@@ -488,17 +484,13 @@
}
if (bsize < BLOCK_8X8) {
- int j;
- MB_PREDICTION_MODE blockmode;
- int_mv blockmv;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- j = idy * 2 + idx;
- blockmode = x->partition_info->bmi[j].mode;
- blockmv = m->bmi[j].as_mv[0];
+ const int j = idy * 2 + idx;
+ const MB_PREDICTION_MODE blockmode = x->partition_info->bmi[j].mode;
write_sb_mv_ref(bc, blockmode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
[inter_mode_offset(blockmode)];
@@ -507,14 +499,12 @@
#ifdef ENTROPY_STATS
active_section = 11;
#endif
- vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv,
- nmvc, allow_hp);
+ vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv,
+ &mi->best_mv[0].as_mv, nmvc, allow_hp);
- if (mi->ref_frame[1] > INTRA_FRAME)
- vp9_encode_mv(cpi, bc,
- &m->bmi[j].as_mv[1].as_mv,
- &mi->best_second_mv.as_mv,
- nmvc, allow_hp);
+ if (has_second_ref(mi))
+ vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv,
+ &mi->best_mv[1].as_mv, nmvc, allow_hp);
}
}
}
@@ -522,12 +512,12 @@
#ifdef ENTROPY_STATS
active_section = 5;
#endif
- vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv,
- nmvc, allow_hp);
+ vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv,
+ &mi->best_mv[0].as_mv, nmvc, allow_hp);
- if (mi->ref_frame[1] > INTRA_FRAME)
- vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv,
- nmvc, allow_hp);
+ if (has_second_ref(mi))
+ vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv,
+ &mi->best_mv[1].as_mv, nmvc, allow_hp);
}
}
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 013047e..5a0d746 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -34,6 +34,7 @@
typedef struct {
MODE_INFO mic;
PARTITION_INFO partition_info;
+ unsigned char zcoeff_blk[256];
int skip;
int_mv best_ref_mv;
int_mv second_best_ref_mv;
@@ -136,6 +137,7 @@
int mv_row_min;
int mv_row_max;
+ unsigned char zcoeff_blk[TX_SIZES][256];
int skip;
int encode_breakout;
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 4f4ad04..b9c3000 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -58,10 +58,10 @@
for (i = 0; i < 4; ++i) {
// Load inputs.
if (0 == pass) {
- input[0] = in[0 * stride] << 4;
- input[1] = in[1 * stride] << 4;
- input[2] = in[2 * stride] << 4;
- input[3] = in[3 * stride] << 4;
+ input[0] = in[0 * stride] * 16;
+ input[1] = in[1 * stride] * 16;
+ input[2] = in[2 * stride] * 16;
+ input[3] = in[3 * stride] * 16;
if (i == 0 && input[0]) {
input[0] += 1;
}
@@ -160,7 +160,7 @@
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * pitch + i] << 4;
+ temp_in[j] = input[j * pitch + i] * 16;
if (i == 0 && temp_in[0])
temp_in[0] += 1;
ht.cols(temp_in, temp_out);
@@ -250,14 +250,14 @@
int i;
for (i = 0; i < 8; i++) {
// stage 1
- s0 = (input[0 * stride] + input[7 * stride]) << 2;
- s1 = (input[1 * stride] + input[6 * stride]) << 2;
- s2 = (input[2 * stride] + input[5 * stride]) << 2;
- s3 = (input[3 * stride] + input[4 * stride]) << 2;
- s4 = (input[3 * stride] - input[4 * stride]) << 2;
- s5 = (input[2 * stride] - input[5 * stride]) << 2;
- s6 = (input[1 * stride] - input[6 * stride]) << 2;
- s7 = (input[0 * stride] - input[7 * stride]) << 2;
+ s0 = (input[0 * stride] + input[7 * stride]) * 4;
+ s1 = (input[1 * stride] + input[6 * stride]) * 4;
+ s2 = (input[2 * stride] + input[5 * stride]) * 4;
+ s3 = (input[3 * stride] + input[4 * stride]) * 4;
+ s4 = (input[3 * stride] - input[4 * stride]) * 4;
+ s5 = (input[2 * stride] - input[5 * stride]) * 4;
+ s6 = (input[1 * stride] - input[6 * stride]) * 4;
+ s7 = (input[0 * stride] - input[7 * stride]) * 4;
// fdct4_1d(step, step);
x0 = s0 + s3;
@@ -331,23 +331,23 @@
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
- input[0] = (in[0 * stride] + in[15 * stride]) << 2;
- input[1] = (in[1 * stride] + in[14 * stride]) << 2;
- input[2] = (in[2 * stride] + in[13 * stride]) << 2;
- input[3] = (in[3 * stride] + in[12 * stride]) << 2;
- input[4] = (in[4 * stride] + in[11 * stride]) << 2;
- input[5] = (in[5 * stride] + in[10 * stride]) << 2;
- input[6] = (in[6 * stride] + in[ 9 * stride]) << 2;
- input[7] = (in[7 * stride] + in[ 8 * stride]) << 2;
+ input[0] = (in[0 * stride] + in[15 * stride]) * 4;
+ input[1] = (in[1 * stride] + in[14 * stride]) * 4;
+ input[2] = (in[2 * stride] + in[13 * stride]) * 4;
+ input[3] = (in[3 * stride] + in[12 * stride]) * 4;
+ input[4] = (in[4 * stride] + in[11 * stride]) * 4;
+ input[5] = (in[5 * stride] + in[10 * stride]) * 4;
+ input[6] = (in[6 * stride] + in[ 9 * stride]) * 4;
+ input[7] = (in[7 * stride] + in[ 8 * stride]) * 4;
// Calculate input for the next 8 results.
- step1[0] = (in[7 * stride] - in[ 8 * stride]) << 2;
- step1[1] = (in[6 * stride] - in[ 9 * stride]) << 2;
- step1[2] = (in[5 * stride] - in[10 * stride]) << 2;
- step1[3] = (in[4 * stride] - in[11 * stride]) << 2;
- step1[4] = (in[3 * stride] - in[12 * stride]) << 2;
- step1[5] = (in[2 * stride] - in[13 * stride]) << 2;
- step1[6] = (in[1 * stride] - in[14 * stride]) << 2;
- step1[7] = (in[0 * stride] - in[15 * stride]) << 2;
+ step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4;
+ step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4;
+ step1[2] = (in[5 * stride] - in[10 * stride]) * 4;
+ step1[3] = (in[4 * stride] - in[11 * stride]) * 4;
+ step1[4] = (in[3 * stride] - in[12 * stride]) * 4;
+ step1[5] = (in[2 * stride] - in[13 * stride]) * 4;
+ step1[6] = (in[1 * stride] - in[14 * stride]) * 4;
+ step1[7] = (in[0 * stride] - in[15 * stride]) * 4;
} else {
// Calculate input for the first 8 results.
input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
@@ -575,7 +575,7 @@
// Columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * pitch + i] << 2;
+ temp_in[j] = input[j * pitch + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
outptr[j * 8 + i] = temp_out[j];
@@ -637,10 +637,10 @@
c1 = e1 - c1;
a1 -= c1;
d1 += b1;
- op[0] = a1 << WHT_UPSCALE_FACTOR;
- op[1] = c1 << WHT_UPSCALE_FACTOR;
- op[2] = d1 << WHT_UPSCALE_FACTOR;
- op[3] = b1 << WHT_UPSCALE_FACTOR;
+ op[0] = a1 * UNIT_QUANT_FACTOR;
+ op[1] = c1 * UNIT_QUANT_FACTOR;
+ op[2] = d1 * UNIT_QUANT_FACTOR;
+ op[3] = b1 * UNIT_QUANT_FACTOR;
ip += 4;
op += 4;
@@ -975,7 +975,7 @@
// Columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * pitch + i] << 2;
+ temp_in[j] = input[j * pitch + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
@@ -1335,7 +1335,7 @@
for (i = 0; i < 32; ++i) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
- temp_in[j] = input[j * shortpitch + i] << 2;
+ temp_in[j] = input[j * shortpitch + i] * 4;
dct32_1d(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
@@ -1364,7 +1364,7 @@
for (i = 0; i < 32; ++i) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
- temp_in[j] = input[j * shortpitch + i] << 2;
+ temp_in[j] = input[j * shortpitch + i] * 4;
dct32_1d(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 3b92a39..8950a05 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -390,6 +390,9 @@
}
x->skip = ctx->skip;
+ vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
+ sizeof(ctx->zcoeff_blk));
+
if (!output_enabled)
return;
@@ -428,18 +431,19 @@
cpi->mode_chosen_counts[mb_mode_index]++;
if (is_inter_block(mbmi)
&& (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv, best_second_mv;
+ int_mv best_mv[2];
const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
- best_mv.as_int = ctx->best_ref_mv.as_int;
- best_second_mv.as_int = ctx->second_best_ref_mv.as_int;
+ best_mv[0].as_int = ctx->best_ref_mv.as_int;
+ best_mv[1].as_int = ctx->second_best_ref_mv.as_int;
if (mbmi->mode == NEWMV) {
- best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int;
- best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int;
+ best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
+ if (rf2 > 0)
+ best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
}
- mbmi->best_mv.as_int = best_mv.as_int;
- mbmi->best_second_mv.as_int = best_second_mv.as_int;
- vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
+ mbmi->best_mv[0].as_int = best_mv[0].as_int;
+ mbmi->best_mv[1].as_int = best_mv[1].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
}
if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
@@ -947,323 +951,6 @@
}
}
-static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8,
- BLOCK_SIZE bsize, int mis, int mi_row,
- int mi_col) {
- int r, c;
- const int bs = MAX(num_8x8_blocks_wide_lookup[bsize],
- num_8x8_blocks_high_lookup[bsize]);
- const int idx_str = mis * mi_row + mi_col;
- MODE_INFO **const mi2 = &mi_8x8[idx_str];
-
- mi2[0] = cm->mi + idx_str;
- mi2[0]->mbmi.sb_type = bsize;
-
- for (r = 0; r < bs; r++)
- for (c = 0; c < bs; c++)
- if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols)
- mi2[r * mis + c] = mi2[0];
-}
-
-typedef struct {
- int64_t sum_square_error;
- int64_t sum_error;
- int count;
- int variance;
-} var;
-
-typedef struct {
- var none;
- var horz[2];
- var vert[2];
-} partition_variance;
-
-#define VT(TYPE, BLOCKSIZE) \
- typedef struct { \
- partition_variance vt; \
- BLOCKSIZE split[4]; } TYPE;
-
-VT(v8x8, var)
-VT(v16x16, v8x8)
-VT(v32x32, v16x16)
-VT(v64x64, v32x32)
-
-typedef struct {
- partition_variance *vt;
- var *split[4];
-} vt_node;
-
-typedef enum {
- V16X16,
- V32X32,
- V64X64,
-} TREE_LEVEL;
-
-static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) {
- int i;
- switch (bsize) {
- case BLOCK_64X64: {
- v64x64 *vt = (v64x64 *) data;
- node->vt = &vt->vt;
- for (i = 0; i < 4; i++)
- node->split[i] = &vt->split[i].vt.none;
- break;
- }
- case BLOCK_32X32: {
- v32x32 *vt = (v32x32 *) data;
- node->vt = &vt->vt;
- for (i = 0; i < 4; i++)
- node->split[i] = &vt->split[i].vt.none;
- break;
- }
- case BLOCK_16X16: {
- v16x16 *vt = (v16x16 *) data;
- node->vt = &vt->vt;
- for (i = 0; i < 4; i++)
- node->split[i] = &vt->split[i].vt.none;
- break;
- }
- case BLOCK_8X8: {
- v8x8 *vt = (v8x8 *) data;
- node->vt = &vt->vt;
- for (i = 0; i < 4; i++)
- node->split[i] = &vt->split[i];
- break;
- }
- default:
- node->vt = 0;
- for (i = 0; i < 4; i++)
- node->split[i] = 0;
- assert(-1);
- }
-}
-
-// Set variance values given sum square error, sum error, count.
-static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
- v->sum_square_error = s2;
- v->sum_error = s;
- v->count = c;
- if (c > 0)
- v->variance = (int)(256
- * (v->sum_square_error - v->sum_error * v->sum_error / v->count)
- / v->count);
- else
- v->variance = 0;
-}
-
-// Combine 2 variance structures by summing the sum_error, sum_square_error,
-// and counts and then calculating the new variance.
-void sum_2_variances(var *r, var *a, var*b) {
- fill_variance(r, a->sum_square_error + b->sum_square_error,
- a->sum_error + b->sum_error, a->count + b->count);
-}
-
-static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
- vt_node node;
- tree_to_node(data, bsize, &node);
- sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
- sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
- sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
- sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
- sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
-}
-
-#if PERFORM_RANDOM_PARTITIONING
-static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
- BLOCK_SIZE block_size, int mi_row,
- int mi_col, int mi_size) {
- VP9_COMMON * const cm = &cpi->common;
- vt_node vt;
- const int mis = cm->mode_info_stride;
- int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
-
- tree_to_node(data, block_size, &vt);
-
- // split none is available only if we have more than half a block size
- // in width and height inside the visible image
- if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
- (rand() & 3) < 1) {
- set_block_size(cm, m, block_size, mis, mi_row, mi_col);
- return 1;
- }
-
- // vertical split is available on all but the bottom border
- if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
- && (rand() & 3) < 1) {
- set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
- mi_col);
- return 1;
- }
-
- // horizontal split is available on all but the right border
- if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
- && (rand() & 3) < 1) {
- set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
- mi_col);
- return 1;
- }
-
- return 0;
-}
-
-#else // !PERFORM_RANDOM_PARTITIONING
-
-static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m,
- BLOCK_SIZE bsize, int mi_row,
- int mi_col, int mi_size) {
- VP9_COMMON * const cm = &cpi->common;
- vt_node vt;
- const int mis = cm->mode_info_stride;
- int64_t threshold = 50 * cpi->common.base_qindex;
-
- tree_to_node(data, bsize, &vt);
-
- // split none is available only if we have more than half a block size
- // in width and height inside the visible image
- if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
- && vt.vt->none.variance < threshold) {
- set_block_size(cm, m, bsize, mis, mi_row, mi_col);
- return 1;
- }
-
- // vertical split is available on all but the bottom border
- if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
- && vt.vt->vert[1].variance < threshold) {
- set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row,
- mi_col);
- return 1;
- }
-
- // horizontal split is available on all but the right border
- if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
- && vt.vt->horz[1].variance < threshold) {
- set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row,
- mi_col);
- return 1;
- }
-
- return 0;
-}
-#endif // PERFORM_RANDOM_PARTITIONING
-
-static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
- int mi_row, int mi_col) {
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCK *x = &cpi->mb;
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
- const int mis = cm->mode_info_stride;
- // TODO(JBB): More experimentation or testing of this threshold;
- int64_t threshold = 4;
- int i, j, k;
- v64x64 vt;
- unsigned char * s;
- int sp;
- const unsigned char * d;
- int dp;
- int pixels_wide = 64, pixels_high = 64;
-
- vp9_zero(vt);
- set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
-
- if (xd->mb_to_right_edge < 0)
- pixels_wide += (xd->mb_to_right_edge >> 3);
-
- if (xd->mb_to_bottom_edge < 0)
- pixels_high += (xd->mb_to_bottom_edge >> 3);
-
- s = x->plane[0].src.buf;
- sp = x->plane[0].src.stride;
-
- // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
- // but this needs more experimentation.
- threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
-
- d = vp9_64x64_zeros;
- dp = 64;
- if (cm->frame_type != KEY_FRAME) {
- int_mv nearest_mv, near_mv;
- const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
- YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
- YV12_BUFFER_CONFIG *second_ref_fb = NULL;
-
- setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
- &xd->scale_factor[0]);
- setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
- &xd->scale_factor[1]);
-
- xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME;
- xd->this_mi->mbmi.sb_type = BLOCK_64X64;
- vp9_find_best_ref_mvs(xd,
- mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]],
- &nearest_mv, &near_mv);
-
- xd->this_mi->mbmi.mv[0] = nearest_mv;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
-
- d = xd->plane[0].dst.buf;
- dp = xd->plane[0].dst.stride;
- }
-
- // Fill in the entire tree of 8x8 variances for splits.
- for (i = 0; i < 4; i++) {
- const int x32_idx = ((i & 1) << 5);
- const int y32_idx = ((i >> 1) << 5);
- for (j = 0; j < 4; j++) {
- const int x16_idx = x32_idx + ((j & 1) << 4);
- const int y16_idx = y32_idx + ((j >> 1) << 4);
- v16x16 *vst = &vt.split[i].split[j];
- for (k = 0; k < 4; k++) {
- int x_idx = x16_idx + ((k & 1) << 3);
- int y_idx = y16_idx + ((k >> 1) << 3);
- unsigned int sse = 0;
- int sum = 0;
- if (x_idx < pixels_wide && y_idx < pixels_high)
- vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
- d + y_idx * dp + x_idx, dp, &sse, &sum);
- fill_variance(&vst->split[k].vt.none, sse, sum, 64);
- }
- }
- }
- // Fill the rest of the variance tree by summing the split partition
- // values.
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
- }
- fill_variance_tree(&vt.split[i], BLOCK_32X32);
- }
- fill_variance_tree(&vt, BLOCK_64X64);
- // Now go through the entire structure, splitting every block size until
- // we get to one that's got a variance lower than our threshold, or we
- // hit 8x8.
- if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col,
- 4)) {
- for (i = 0; i < 4; ++i) {
- const int x32_idx = ((i & 1) << 2);
- const int y32_idx = ((i >> 1) << 2);
- if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32,
- (mi_row + y32_idx), (mi_col + x32_idx), 2)) {
- for (j = 0; j < 4; ++j) {
- const int x16_idx = ((j & 1) << 1);
- const int y16_idx = ((j >> 1) << 1);
- if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8,
- BLOCK_16X16,
- (mi_row + y32_idx + y16_idx),
- (mi_col + x32_idx + x16_idx), 1)) {
- for (k = 0; k < 4; ++k) {
- const int x8_idx = (k & 1);
- const int y8_idx = (k >> 1);
- set_block_size(cm, mi_8x8, BLOCK_8X8, mis,
- (mi_row + y32_idx + y16_idx + y8_idx),
- (mi_col + x32_idx + x16_idx + x8_idx));
- }
- }
- }
- }
- }
- }
-}
-
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist,
@@ -1881,12 +1568,12 @@
best_dist = sum_dist;
best_rd = sum_rd;
*(get_sb_partitioning(x, bsize)) = subsize;
- } else {
- // skip rectangular partition test when larger block size
- // gives better rd cost
- if (cpi->sf.less_rectangular_check)
- do_rect &= !partition_none_allowed;
}
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ if (cpi->sf.less_rectangular_check)
+ do_rect &= !partition_none_allowed;
}
partition_split_done = 1;
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -2056,7 +1743,7 @@
if (cpi->sf.reference_masking)
rd_pick_reference_frame(cpi, mi_row, mi_col);
- if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
+ if (cpi->sf.use_lastframe_partitioning ||
cpi->sf.use_one_partition_size_always ) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
@@ -2068,10 +1755,6 @@
set_partitioning(cpi, mi_8x8, mi_row, mi_col);
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
- } else if (cpi->sf.partition_by_variance) {
- choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col);
- rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1);
} else {
if ((cpi->common.current_video_frame
% cpi->sf.last_partitioning_redo_frequency) == 0
@@ -2208,7 +1891,7 @@
cpi->inter_zz_count = 0;
vp9_zero(cm->counts.switchable_interp);
- vp9_zero(cpi->txfm_stepdown_count);
+ vp9_zero(cpi->tx_stepdown_count);
xd->mi_8x8 = cm->mi_grid_visible;
// required for vp9_frame_init_quantizer
@@ -2347,18 +2030,19 @@
int mis, TX_SIZE max_tx_size, int bw, int bh,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
VP9_COMMON * const cm = &cpi->common;
- MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
return;
+ } else {
+ MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
+ if (mbmi->tx_size > max_tx_size) {
+ const int ymbs = MIN(bh, cm->mi_rows - mi_row);
+ const int xmbs = MIN(bw, cm->mi_cols - mi_col);
- if (mbmi->tx_size > max_tx_size) {
- const int ymbs = MIN(bh, cm->mi_rows - mi_row);
- const int xmbs = MIN(bw, cm->mi_cols - mi_col);
-
- assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
- get_skip_flag(mi_8x8, mis, ymbs, xmbs));
- set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
+ assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
+ get_skip_flag(mi_8x8, mis, ymbs, xmbs));
+ set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
+ }
}
}
@@ -2453,9 +2137,9 @@
unsigned int total = 0;
int i;
for (i = 0; i < TX_SIZES; ++i)
- total += cpi->txfm_stepdown_count[i];
+ total += cpi->tx_stepdown_count[i];
if (total) {
- double fraction = (double)cpi->txfm_stepdown_count[0] / total;
+ double fraction = (double)cpi->tx_stepdown_count[0] / total;
cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
// printf("fraction = %f\n", fraction);
} // else keep unchanged
@@ -2627,7 +2311,6 @@
} else {
encode_frame_internal(cpi);
}
-
}
static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) {
@@ -2732,7 +2415,7 @@
int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
- if (mbmi->ref_frame[1] > 0) {
+ if (has_second_ref(mbmi)) {
idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
second_ref_fb = &cm->yv12_fb[idx];
}
@@ -2744,7 +2427,6 @@
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
&xd->scale_factor[1]);
-
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 8dd80a5..76a5d33 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -53,7 +53,7 @@
if (eob <= 1)
vp9_short_idct8x8_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
- vp9_short_idct10_8x8_add(dqcoeff, dest, stride);
+ vp9_short_idct8x8_10_add(dqcoeff, dest, stride);
else
vp9_short_idct8x8_add(dqcoeff, dest, stride);
}
@@ -64,7 +64,7 @@
if (eob <= 1)
vp9_short_idct16x16_1_add(dqcoeff, dest, stride);
else if (eob <= 10)
- vp9_short_idct10_16x16_add(dqcoeff, dest, stride);
+ vp9_short_idct16x16_10_add(dqcoeff, dest, stride);
else
vp9_short_idct16x16_add(dqcoeff, dest, stride);
}
@@ -161,7 +161,7 @@
int best, band, pt;
PLANE_TYPE type = pd->plane_type;
int err_mult = plane_rd_mult[type];
- int default_eob;
+ const int default_eob = 16 << (tx_size << 1);
const int16_t *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
@@ -172,29 +172,7 @@
assert((!type && !plane) || (type && plane));
dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
- switch (tx_size) {
- default:
- case TX_4X4:
- default_eob = 16;
- scan = get_scan_4x4(get_tx_type_4x4(type, xd, ib));
- band_translate = vp9_coefband_trans_4x4;
- break;
- case TX_8X8:
- scan = get_scan_8x8(get_tx_type_8x8(type, xd));
- default_eob = 64;
- band_translate = vp9_coefband_trans_8x8plus;
- break;
- case TX_16X16:
- scan = get_scan_16x16(get_tx_type_16x16(type, xd));
- default_eob = 256;
- band_translate = vp9_coefband_trans_8x8plus;
- break;
- case TX_32X32:
- scan = vp9_default_scan_32x32;
- default_eob = 1024;
- band_translate = vp9_coefband_trans_8x8plus;
- break;
- }
+ get_scan_and_band(xd, tx_size, type, ib, &scan, &nb, &band_translate);
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
@@ -213,7 +191,6 @@
for (i = 0; i < eob; i++)
token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
qcoeff_ptr[scan[i]]].token];
- nb = vp9_get_coef_neighbors_handle(scan);
for (i = eob; i-- > i0;) {
int base_bits, d2, dx;
@@ -387,36 +364,10 @@
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
- int i;
- switch (tx_size) {
- case TX_4X4:
- vpx_memcpy(args->ctx->ta[plane], pd->above_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_w);
- vpx_memcpy(args->ctx->tl[plane], pd->left_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_h);
- break;
- case TX_8X8:
- for (i = 0; i < num_4x4_w; i += 2)
- args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 2)
- args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i];
- break;
- case TX_16X16:
- for (i = 0; i < num_4x4_w; i += 4)
- args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 4)
- args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i];
- break;
- case TX_32X32:
- for (i = 0; i < num_4x4_w; i += 8)
- args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_h; i += 8)
- args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i];
- break;
- default:
- assert(0);
- }
+ vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
+ pd->above_context, pd->left_context,
+ num_4x4_w, num_4x4_h);
}
void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -504,6 +455,14 @@
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
pd->dst.buf, pd->dst.stride);
+
+ // TODO(jingning): per transformed block zero forcing only enabled for
+ // luma component. will integrate chroma components as well.
+ if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+ pd->eobs[block] = 0;
+ return;
+ }
+
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize)
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index ed3a2bb..db08ee8 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -314,44 +314,34 @@
build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
}
-void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv, int_mv *second_best_ref_mv) {
+static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
+ nmv_context_counts *counts) {
+ int i;
+ for (i = 0; i < 1 + is_compound; ++i) {
+ const MV diff = { mv[i].as_mv.row - ref[i].as_mv.row,
+ mv[i].as_mv.col - ref[i].as_mv.col };
+ vp9_inc_mv(&diff, counts);
+ }
+}
+
+void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]) {
MODE_INFO *mi = x->e_mbd.mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
- MV diff;
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
- int idx, idy;
+ const int is_compound = has_second_ref(mbmi);
if (mbmi->sb_type < BLOCK_8X8) {
- PARTITION_INFO *pi = x->partition_info;
- for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
- for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
- const int i = idy * 2 + idx;
- if (pi->bmi[i].mode == NEWMV) {
- diff.row = mi->bmi[i].as_mv[0].as_mv.row - best_ref_mv->as_mv.row;
- diff.col = mi->bmi[i].as_mv[0].as_mv.col - best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int idx, idy;
- if (mi->mbmi.ref_frame[1] > INTRA_FRAME) {
- diff.row = mi->bmi[i].as_mv[1].as_mv.row -
- second_best_ref_mv->as_mv.row;
- diff.col = mi->bmi[i].as_mv[1].as_mv.col -
- second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
- }
- }
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int i = idy * 2 + idx;
+ if (x->partition_info->bmi[i].mode == NEWMV)
+ inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, &cpi->NMVcount);
}
}
} else if (mbmi->mode == NEWMV) {
- diff.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row;
- diff.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
-
- if (mbmi->ref_frame[1] > INTRA_FRAME) {
- diff.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row;
- diff.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col;
- vp9_inc_mv(&diff, &cpi->NMVcount);
- }
+ inc_mvs(mbmi->mv, best_ref_mv, is_compound, &cpi->NMVcount);
}
}
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index 2789ce1..6331778 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -25,7 +25,7 @@
int usehp,
int mvc_flag_v,
int mvc_flag_h);
-void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
- int_mv *best_ref_mv, int_mv *second_best_ref_mv);
+
+void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
#endif // VP9_ENCODER_VP9_ENCODEMV_H_
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 9cf7b83..eaa3bd1 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -534,10 +534,11 @@
recon_yoffset = (mb_row * recon_y_stride * 16);
recon_uvoffset = (mb_row * recon_uv_stride * 8);
- // Set up limit values for motion vectors to prevent them extending outside the UMV borders
- x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 8));
+ // Set up limit values for motion vectors to prevent them extending
+ // outside the UMV borders
+ x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
- + (VP9BORDERINPIXELS - 8);
+ + BORDER_MV_PIXELS_B16;
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
@@ -583,9 +584,9 @@
intra_error += (int64_t)this_error;
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
- x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 8));
+ x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
- + (VP9BORDERINPIXELS - 8);
+ + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search
if (cm->current_video_frame > 0) {
@@ -660,8 +661,8 @@
neutral_count++;
}
- mv.as_mv.row <<= 3;
- mv.as_mv.col <<= 3;
+ mv.as_mv.row *= 8;
+ mv.as_mv.col *= 8;
this_error = motion_error;
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
xd->this_mi->mbmi.tx_size = TX_4X4;
@@ -2093,14 +2094,19 @@
cpi->twopass.est_max_qcorrection_factor = 1.0;
// Set a cq_level in constrained quality mode.
+ // Commenting this code out for now since it does not seem to be
+ // working well.
+ /*
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
int est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats,
- section_target_bandwidth);
+ section_target_bandwidth);
- cpi->cq_target_quality = cpi->oxcf.cq_level;
if (est_cq > cpi->cq_target_quality)
cpi->cq_target_quality = est_cq;
+ else
+ cpi->cq_target_quality = cpi->oxcf.cq_level;
}
+ */
// guess at maxq needed in 2nd pass
cpi->twopass.maxq_max_limit = cpi->worst_quality;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 5a671f2..0a6576e 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -10,14 +10,17 @@
#include <limits.h>
-#include <vpx_mem/vpx_mem.h>
-#include <vp9/encoder/vp9_encodeintra.h>
-#include <vp9/encoder/vp9_rdopt.h>
-#include <vp9/common/vp9_blockd.h>
-#include <vp9/common/vp9_reconinter.h>
-#include <vp9/common/vp9_reconintra.h>
-#include <vp9/common/vp9_systemdependent.h>
-#include <vp9/encoder/vp9_segmentation.h>
+#include "vpx_mem/vpx_mem.h"
+#include "vp9/encoder/vp9_encodeintra.h"
+#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
int_mv *ref_mv,
@@ -46,9 +49,9 @@
ref_full.as_mv.row = ref_mv->as_mv.row >> 3;
/*cpi->sf.search_method == HEX*/
- best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit,
+ best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit,
0, &v_fn_ptr,
- 0, ref_mv, dst_mv);
+ 0, &ref_mv->as_mv, &dst_mv->as_mv);
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
@@ -57,7 +60,7 @@
unsigned int sse;
best_err = cpi->find_fractional_mv_step(
x,
- dst_mv, ref_mv,
+ &dst_mv->as_mv, &ref_mv->as_mv,
x->errorperbit, &v_fn_ptr,
0, cpi->sf.subpel_iters_per_step, NULL, NULL,
& distortion, &sse);
@@ -246,9 +249,8 @@
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_top_mv.as_int = 0;
gld_top_mv.as_int = 0;
- x->mv_row_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
- x->mv_row_max = (cm->mb_rows - 1) * 8 + VP9BORDERINPIXELS
- - 8 - VP9_INTERP_EXTEND;
+ x->mv_row_min = -BORDER_MV_PIXELS_B16;
+ x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->up_available = 0;
xd->plane[0].dst.stride = buf->y_stride;
xd->plane[0].pre[0].stride = buf->y_stride;
@@ -267,9 +269,8 @@
// Set up limit values for motion vectors to prevent them extending outside the UMV borders
arf_left_mv.as_int = arf_top_mv.as_int;
gld_left_mv.as_int = gld_top_mv.as_int;
- x->mv_col_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND);
- x->mv_col_max = (cm->mb_cols - 1) * 8 + VP9BORDERINPIXELS
- - 8 - VP9_INTERP_EXTEND;
+ x->mv_col_min = -BORDER_MV_PIXELS_B16;
+ x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->left_available = 0;
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 1360088..44eaa65 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -59,38 +59,39 @@
return sr;
}
-int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
- int weight) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
- mvcost[0][v.row] +
- mvcost[1][v.col]) * weight, 7);
+static INLINE int mv_cost(const MV *mv,
+ const int *joint_cost, int *comp_cost[2]) {
+ return joint_cost[vp9_get_mv_joint(mv)] +
+ comp_cost[0][mv->row] + comp_cost[1][mv->col];
}
-static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
+int vp9_mv_bit_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2], int weight) {
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
+}
+
+static int mv_err_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2],
int error_per_bit) {
if (mvcost) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
- mvcost[0][v.row] +
- mvcost[1][v.col]) * error_per_bit, 13);
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
+ error_per_bit, 13);
}
return 0;
}
-static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
- int *mvsadcost[2], int error_per_bit) {
+static int mvsad_err_cost(const MV *mv, const MV *ref,
+ const int *mvjsadcost, int *mvsadcost[2],
+ int error_per_bit) {
if (mvsadcost) {
- MV v;
- v.row = mv->as_mv.row - ref->as_mv.row;
- v.col = mv->as_mv.col - ref->as_mv.col;
- return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] +
- mvsadcost[0][v.row] +
- mvsadcost[1][v.col]) * error_per_bit, 8);
+ const MV diff = { mv->row - ref->row,
+ mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
+ error_per_bit, 8);
}
return 0;
}
@@ -136,66 +137,26 @@
}
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
- int len;
- int search_site_count = 0;
+ int len, ss_count = 1;
- // Generate offsets for 8 search sites per step.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = 0;
- search_site_count++;
+ x->ss[0].mv.col = x->ss[0].mv.row = 0;
+ x->ss[0].offset = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = 0;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = -len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = 0;
- x->ss[search_site_count].offset = len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride - len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = -len;
- x->ss[search_site_count].offset = -len * stride + len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = -len;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride - len;
- search_site_count++;
-
- // Compute offsets for search sites.
- x->ss[search_site_count].mv.col = len;
- x->ss[search_site_count].mv.row = len;
- x->ss[search_site_count].offset = len * stride + len;
- search_site_count++;
+ // Generate offsets for 8 search sites per step.
+ const MV ss_mvs[8] = {
+ {-len, 0 }, {len, 0 }, { 0, -len}, {0, len},
+ {-len, -len}, {-len, len}, {len, -len}, {len, len}
+ };
+ int i;
+ for (i = 0; i < 8; ++i) {
+ search_site *const ss = &x->ss[ss_count++];
+ ss->mv = ss_mvs[i];
+ ss->offset = ss->mv.row * stride + ss->mv.col;
+ }
}
- x->ss_count = search_site_count;
+ x->ss_count = ss_count;
x->searches_per_step = 8;
}
@@ -313,7 +274,7 @@
}
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -333,30 +294,26 @@
unsigned int eighthiters = iters_per_step;
int thismse;
- uint8_t *y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
- bestmv->as_mv.col;
-
const int y_stride = xd->plane[0].pre[0].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ uint8_t *y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row << 3;
- int bc = bestmv->as_mv.col << 3;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
- const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
-
// central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->row <<= 3;
+ bestmv->col <<= 3;
// calculate central point error
besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
@@ -391,7 +348,7 @@
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
@@ -404,18 +361,18 @@
}
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -424,49 +381,36 @@
int *distortion,
unsigned int *sse1) {
uint8_t *z = x->plane[0].src.buf;
- int src_stride = x->plane[0].src.stride;
+ const int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
- int rr, rc, br, bc, hstep;
- int tr, tc;
unsigned int besterr = INT_MAX;
unsigned int sse;
unsigned int whichdir;
int thismse;
- int maxc, minc, maxr, minr;
- int y_stride;
- int offset;
unsigned int halfiters = iters_per_step;
unsigned int quarteriters = iters_per_step;
unsigned int eighthiters = iters_per_step;
- uint8_t *y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
- bestmv->as_mv.col;
+ const int y_stride = xd->plane[0].pre[0].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ uint8_t *y = xd->plane[0].pre[0].buf + offset;
- y_stride = xd->plane[0].pre[0].stride;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
- rr = ref_mv->as_mv.row;
- rc = ref_mv->as_mv.col;
- br = bestmv->as_mv.row << 3;
- bc = bestmv->as_mv.col << 3;
- hstep = 4;
- minc = MAX(x->mv_col_min << 3,
- (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
- maxc = MIN(x->mv_col_max << 3,
- (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
- minr = MAX(x->mv_row_min << 3,
- (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
- maxr = MIN(x->mv_row_max << 3,
- (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
-
- tr = br;
- tc = bc;
-
- offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+ int tr = br;
+ int tc = bc;
// central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
@@ -492,7 +436,7 @@
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
@@ -503,11 +447,11 @@
tc = bc;
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
@@ -520,7 +464,7 @@
z, src_stride, &sse, second_pred)
int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -543,30 +487,26 @@
int thismse;
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- uint8_t *const y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
- bestmv->as_mv.col;
-
const int y_stride = xd->plane[0].pre[0].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ uint8_t *const y = xd->plane[0].pre[0].buf + offset;
- int rr = ref_mv->as_mv.row;
- int rc = ref_mv->as_mv.col;
- int br = bestmv->as_mv.row << 3;
- int bc = bestmv->as_mv.col << 3;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
int hstep = 4;
- const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
- const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
- const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
- const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
- const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
-
// central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
@@ -604,7 +544,7 @@
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
@@ -616,18 +556,18 @@
tc = bc;
}
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
}
int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -638,51 +578,37 @@
const uint8_t *second_pred,
int w, int h) {
uint8_t *z = x->plane[0].src.buf;
- int src_stride = x->plane[0].src.stride;
+ const int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
- int rr, rc, br, bc, hstep;
- int tr, tc;
unsigned int besterr = INT_MAX;
unsigned int sse;
unsigned int whichdir;
int thismse;
- int maxc, minc, maxr, minr;
- int y_stride;
- int offset;
unsigned int halfiters = iters_per_step;
unsigned int quarteriters = iters_per_step;
unsigned int eighthiters = iters_per_step;
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
- uint8_t *y = xd->plane[0].pre[0].buf +
- (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
- bestmv->as_mv.col;
+ const int y_stride = xd->plane[0].pre[0].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ uint8_t *y = xd->plane[0].pre[0].buf + offset;
- y_stride = xd->plane[0].pre[0].stride;
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
- rr = ref_mv->as_mv.row;
- rc = ref_mv->as_mv.col;
- br = bestmv->as_mv.row << 3;
- bc = bestmv->as_mv.col << 3;
- hstep = 4;
- minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
- ((1 << MV_MAX_BITS) - 1));
- maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
- ((1 << MV_MAX_BITS) - 1));
- minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
- ((1 << MV_MAX_BITS) - 1));
- maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
- ((1 << MV_MAX_BITS) - 1));
-
- tr = br;
- tc = bc;
-
-
- offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
+ int tr = br;
+ int tc = bc;
// central mv
- bestmv->as_mv.row <<= 3;
- bestmv->as_mv.col <<= 3;
+ bestmv->row *= 8;
+ bestmv->col *= 8;
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
@@ -716,7 +642,7 @@
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
+ if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
@@ -726,11 +652,11 @@
tr = br;
tc = bc;
}
- bestmv->as_mv.row = br;
- bestmv->as_mv.col = bc;
+ bestmv->row = br;
+ bestmv->col = bc;
- if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
return INT_MAX;
return besterr;
@@ -754,10 +680,10 @@
#define CHECK_POINT \
{\
- if (this_mv.as_mv.col < x->mv_col_min) continue;\
- if (this_mv.as_mv.col > x->mv_col_max) continue;\
- if (this_mv.as_mv.row < x->mv_row_min) continue;\
- if (this_mv.as_mv.row > x->mv_row_max) continue;\
+ if (this_mv.col < x->mv_col_min) continue;\
+ if (this_mv.col > x->mv_col_max) continue;\
+ if (this_mv.row < x->mv_row_min) continue;\
+ if (this_mv.row > x->mv_row_max) continue;\
}
#define CHECK_BETTER \
@@ -765,7 +691,7 @@
if (thissad < bestsad)\
{\
if (use_mvcost) \
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
+ thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \
mvjsadcost, mvsadcost, \
sad_per_bit);\
if (thissad < bestsad)\
@@ -790,14 +716,14 @@
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
static int vp9_pattern_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
int do_refine,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv, int_mv *best_mv,
+ const MV *center_mv, MV *best_mv,
const int num_candidates[MAX_PATTERN_SCALES],
const MV candidates[MAX_PATTERN_SCALES]
[MAX_PATTERN_CANDIDATES]) {
@@ -810,7 +736,7 @@
int what_stride = x->plane[0].src.stride;
int in_what_stride = xd->plane[0].pre[0].stride;
int br, bc;
- int_mv this_mv;
+ MV this_mv;
int bestsad = INT_MAX;
int thissad;
uint8_t *base_offset;
@@ -823,24 +749,22 @@
int *mvjsadcost = x->nmvjointsadcost;
int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
- fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
+ fcenter_mv.as_mv.row = center_mv->row >> 3;
+ fcenter_mv.as_mv.col = center_mv->col >> 3;
// adjust ref_mv to make sure it is within MV range
- clamp_mv(&ref_mv->as_mv,
- x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
- br = ref_mv->as_mv.row;
- bc = ref_mv->as_mv.col;
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ br = ref_mv->row;
+ bc = ref_mv->col;
// Work out the start point for the search
base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
this_offset = base_offset + (br * in_what_stride) + bc;
- this_mv.as_mv.row = br;
- this_mv.as_mv.col = bc;
- bestsad = vfp->sdf(what, what_stride, this_offset,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ this_mv.row = br;
+ this_mv.col = bc;
+ bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Search all possible scales upto the search param around the center point
// pick the scale of the point that is best as the starting scale of
@@ -853,21 +777,21 @@
CHECK_BOUNDS((1 << t))
if (all_in) {
for (i = 0; i < num_candidates[t]; i++) {
- this_mv.as_mv.row = br + candidates[t][i].row;
- this_mv.as_mv.col = bc + candidates[t][i].col;
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[t][i].row;
+ this_mv.col = bc + candidates[t][i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < num_candidates[t]; i++) {
- this_mv.as_mv.row = br + candidates[t][i].row;
- this_mv.as_mv.col = bc + candidates[t][i].col;
+ this_mv.row = br + candidates[t][i].row;
+ this_mv.col = bc + candidates[t][i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -897,21 +821,21 @@
CHECK_BOUNDS((1 << s))
if (all_in) {
for (i = 0; i < num_candidates[s]; i++) {
- this_mv.as_mv.row = br + candidates[s][i].row;
- this_mv.as_mv.col = bc + candidates[s][i].col;
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[s][i].row;
+ this_mv.col = bc + candidates[s][i].col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < num_candidates[s]; i++) {
- this_mv.as_mv.row = br + candidates[s][i].row;
- this_mv.as_mv.col = bc + candidates[s][i].col;
+ this_mv.row = br + candidates[s][i].row;
+ this_mv.col = bc + candidates[s][i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * in_what_stride) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -935,25 +859,21 @@
get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
if (all_in) {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- this_mv.as_mv.row = br +
- candidates[s][next_chkpts_indices[i]].row;
- this_mv.as_mv.col = bc +
- candidates[s][next_chkpts_indices[i]].col;
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
+ this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- this_mv.as_mv.row = br +
- candidates[s][next_chkpts_indices[i]].row;
- this_mv.as_mv.col = bc +
- candidates[s][next_chkpts_indices[i]].col;
+ this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
+ this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -980,21 +900,21 @@
CHECK_BOUNDS(1)
if (all_in) {
for (i = 0; i < 4; i++) {
- this_mv.as_mv.row = br + neighbors[i].row;
- this_mv.as_mv.col = bc + neighbors[i].col;
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
}
} else {
for (i = 0; i < 4; i++) {
- this_mv.as_mv.row = br + neighbors[i].row;
- this_mv.as_mv.col = bc + neighbors[i].col;
+ this_mv.row = br + neighbors[i].row;
+ this_mv.col = bc + neighbors[i].col;
CHECK_POINT
- this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
- this_mv.as_mv.col;
+ this_offset = base_offset + (this_mv.row * (in_what_stride)) +
+ this_mv.col;
thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
bestsad);
CHECK_BETTER
@@ -1010,31 +930,32 @@
}
}
- best_mv->as_mv.row = br;
- best_mv->as_mv.col = bc;
+ best_mv->row = br;
+ best_mv->col = bc;
- this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) +
- best_mv->as_mv.col;
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_offset = base_offset + (best_mv->row * in_what_stride) +
+ best_mv->col;
+ this_mv.row = best_mv->row * 8;
+ this_mv.col = best_mv->col * 8;
if (bestsad == INT_MAX)
return INT_MAX;
- return
- vfp->vf(what, what_stride, this_offset, in_what_stride,
- (unsigned int *)(&bestsad)) +
- use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost,
- x->errorperbit) : 0;
+
+ return vfp->vf(what, what_stride, this_offset, in_what_stride,
+ (unsigned int *)&bestsad) +
+ use_mvcost ? mv_err_cost(&this_mv, center_mv,
+ x->nmvjointcost, x->mvcost, x->errorperbit)
+ : 0;
}
int vp9_hex_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv, int_mv *best_mv) {
+ const MV *center_mv, MV *best_mv) {
// First scale has 8-closest points, the rest have 6 points in hex shape
// at increasing scales
static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
@@ -1063,14 +984,14 @@
}
int vp9_bigdia_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv) {
+ const MV *center_mv,
+ MV *best_mv) {
// First scale has 4-closest points, the rest have 8 points in diamond
// shape at increasing scales
static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
@@ -1097,22 +1018,21 @@
{{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
{-512, 512}, {-1024, 0}},
};
- return
- vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
- do_init_search, 0, vfp, use_mvcost,
- center_mv, best_mv,
- bigdia_num_candidates, bigdia_candidates);
+ return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
+ do_init_search, 0, vfp, use_mvcost,
+ center_mv, best_mv,
+ bigdia_num_candidates, bigdia_candidates);
}
int vp9_square_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int sad_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vfp,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv) {
+ const MV *center_mv,
+ MV *best_mv) {
// All scales have 8 closest points in square shape
static const int square_num_candidates[MAX_PATTERN_SCALES] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -1139,11 +1059,10 @@
{{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
{0, 1024}, {-1024, 1024}, {-1024, 0}},
};
- return
- vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
- do_init_search, 0, vfp, use_mvcost,
- center_mv, best_mv,
- square_num_candidates, square_candidates);
+ return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
+ do_init_search, 0, vfp, use_mvcost,
+ center_mv, best_mv,
+ square_num_candidates, square_candidates);
};
#undef CHECK_BOUNDS
@@ -1199,10 +1118,9 @@
best_address = in_what;
// Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride, in_what,
- in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// search_param determines the length of the initial step and hence the number of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1228,7 +1146,7 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1260,7 +1178,7 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1278,15 +1196,16 @@
(*num00)++;
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad == INT_MAX)
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost,
- mvcost, x->errorperbit);
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
}
int vp9_diamond_search_sadx4(MACROBLOCK *x,
@@ -1340,10 +1259,9 @@
best_address = in_what;
// Check the starting position
- bestsad = fn_ptr->sdf(what, what_stride,
- in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// search_param determines the length of the initial step and hence the number of iterations
// 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
@@ -1378,7 +1296,7 @@
if (sad_array[t] < bestsad) {
this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
- sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
+ sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (sad_array[t] < bestsad) {
@@ -1402,7 +1320,7 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1433,7 +1351,7 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1451,15 +1369,16 @@
(*num00)++;
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad == INT_MAX)
return INT_MAX;
return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) + mv_err_cost(&this_mv,
- center_mv, mvjcost, mvcost, x->errorperbit);
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
}
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
@@ -1570,8 +1489,8 @@
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1588,8 +1507,8 @@
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1602,14 +1521,14 @@
}
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1660,8 +1579,8 @@
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1685,8 +1604,8 @@
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1706,7 +1625,7 @@
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1723,14 +1642,14 @@
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1783,8 +1702,8 @@
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
// Apply further limits to prevent us looking using vectors that stretch
// beyond the UMV border
@@ -1808,8 +1727,8 @@
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1834,7 +1753,7 @@
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
@@ -1855,8 +1774,8 @@
if (thissad < bestsad) {
this_mv.as_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1871,14 +1790,14 @@
}
}
- this_mv.as_mv.row = best_mv->as_mv.row << 3;
- this_mv.as_mv.col = best_mv->as_mv.col << 3;
+ this_mv.as_mv.row = best_mv->as_mv.row * 8;
+ this_mv.as_mv.col = best_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1909,8 +1828,10 @@
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -1927,8 +1848,8 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -1947,14 +1868,14 @@
}
}
- this_mv.as_mv.row = ref_mv->as_mv.row << 3;
- this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+ this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+ this_mv.as_mv.col = ref_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -1986,8 +1907,10 @@
fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
- bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ bestsad = fn_ptr->sdf(what, what_stride, best_address,
+ in_what_stride, 0x7fffffff) +
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -2010,8 +1933,8 @@
if (sad_array[j] < bestsad) {
this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
- sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (sad_array[j] < bestsad) {
bestsad = sad_array[j];
@@ -2032,8 +1955,8 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -2053,14 +1976,14 @@
}
}
- this_mv.as_mv.row = ref_mv->as_mv.row << 3;
- this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+ this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+ this_mv.as_mv.col = ref_mv->as_mv.col * 8;
if (bestsad < INT_MAX)
- return
- fn_ptr->vf(what, what_stride, best_address, in_what_stride,
- (unsigned int *)(&thissad)) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
+ (unsigned int *)(&thissad)) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
else
return INT_MAX;
}
@@ -2100,7 +2023,8 @@
/* Get compound pred by averaging two pred blocks. */
bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
second_pred, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
for (i = 0; i < search_range; i++) {
int best_site = -1;
@@ -2123,9 +2047,8 @@
if (thissad < bestsad) {
this_mv.as_mv.row = this_row_offset;
this_mv.as_mv.col = this_col_offset;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
- mvsadcost, error_per_bit);
-
+ thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv,
+ mvjsadcost, mvsadcost, error_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_site = j;
@@ -2144,16 +2067,16 @@
}
}
- this_mv.as_mv.row = ref_mv->as_mv.row << 3;
- this_mv.as_mv.col = ref_mv->as_mv.col << 3;
+ this_mv.as_mv.row = ref_mv->as_mv.row * 8;
+ this_mv.as_mv.col = ref_mv->as_mv.col * 8;
if (bestsad < INT_MAX) {
// FIXME(rbultje, yunqing): add full-pixel averaging variance functions
// so we don't have to use the subpixel with xoff=0,yoff=0 here.
- return fn_ptr->svaf(best_address, in_what_stride, 0, 0,
- what, what_stride, (unsigned int *)(&thissad),
- second_pred) +
- mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
+ return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride,
+ (unsigned int *)(&thissad), second_pred) +
+ mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv,
+ mvjcost, mvcost, x->errorperbit);
} else {
return INT_MAX;
}
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 3598fa0..77c157c 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -22,10 +22,14 @@
#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1)
// Maximum size of the first step in full pel units
#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1))
+// Allowed motion vector pixel distance outside image border
+// for Block_16x16
+#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
+
void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv);
-int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost,
- int *mvcost[2], int weight);
+int vp9_mv_bit_cost(const MV *mv, const MV *ref,
+ const int *mvjcost, int *mvcost[2], int weight);
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride);
@@ -40,37 +44,36 @@
int_mv *ref_mv, int_mv *dst_mv);
int vp9_hex_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
int vp9_bigdia_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
int vp9_square_search(MACROBLOCK *x,
- int_mv *ref_mv,
+ MV *ref_mv,
int search_param,
int error_per_bit,
int do_init_search,
const vp9_variance_fn_ptr_t *vf,
int use_mvcost,
- int_mv *center_mv,
- int_mv *best_mv);
+ const MV *center_mv,
+ MV *best_mv);
typedef int (fractional_mv_step_fp) (
MACROBLOCK *x,
- int_mv *bestmv,
- int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
@@ -84,7 +87,7 @@
typedef int (fractional_mv_step_comp_fp) (
MACROBLOCK *x,
- int_mv *bestmv, int_mv *ref_mv,
+ MV *bestmv, const MV *ref_mv,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index bc1b4a8..1a1ea9e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -122,6 +122,8 @@
static int gf_low_motion_minq[QINDEX_RANGE];
static int gf_high_motion_minq[QINDEX_RANGE];
static int inter_minq[QINDEX_RANGE];
+static int afq_low_motion_minq[QINDEX_RANGE];
+static int afq_high_motion_minq[QINDEX_RANGE];
static INLINE void Scale2Ratio(int mode, int *hr, int *hs) {
switch (mode) {
@@ -193,22 +195,52 @@
gf_low_motion_minq[i] = calculate_minq_index(maxq,
0.0000015,
-0.0009,
- 0.33,
+ 0.32,
0.0);
gf_high_motion_minq[i] = calculate_minq_index(maxq,
0.0000021,
-0.00125,
- 0.45,
+ 0.50,
0.0);
inter_minq[i] = calculate_minq_index(maxq,
0.00000271,
-0.00113,
- 0.697,
+ 0.75,
0.0);
-
+ afq_low_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000015,
+ -0.0009,
+ 0.33,
+ 0.0);
+ afq_high_motion_minq[i] = calculate_minq_index(maxq,
+ 0.0000021,
+ -0.00125,
+ 0.55,
+ 0.0);
}
}
+static int get_active_quality(int q,
+ int gfu_boost,
+ int low,
+ int high,
+ int *low_motion_minq,
+ int *high_motion_minq) {
+ int active_best_quality;
+ if (gfu_boost > high) {
+ active_best_quality = low_motion_minq[q];
+ } else if (gfu_boost < low) {
+ active_best_quality = high_motion_minq[q];
+ } else {
+ const int gap = high - low;
+ const int offset = high - gfu_boost;
+ const int qdiff = high_motion_minq[q] - low_motion_minq[q];
+ const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
+ active_best_quality = low_motion_minq[q] + adjustment;
+ }
+ return active_best_quality;
+}
+
static void set_mvcost(MACROBLOCK *mb) {
if (mb->e_mbd.allow_high_precision_mv) {
mb->mvcost = mb->nmvcost_hp;
@@ -692,7 +724,6 @@
sf->adaptive_motion_search = 0;
sf->use_avoid_tested_higherror = 0;
sf->reference_masking = 0;
- sf->partition_by_variance = 0;
sf->use_one_partition_size_always = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
@@ -815,8 +846,12 @@
sf->mode_skip_start = 6;
}
if (speed == 3) {
+ sf->less_rectangular_check = 1;
+ sf->use_square_partition_only = 1;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->partition_by_variance = 1;
+ sf->use_lastframe_partitioning = 1;
+ sf->adjust_partitioning_from_last_frame = 1;
+ sf->last_partitioning_redo_frequency = 3;
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
cpi->common.intra_only ||
cpi->common.show_frame == 0) ?
@@ -828,17 +863,23 @@
FLAG_SKIP_COMP_REFMISMATCH |
FLAG_SKIP_INTRA_LOWVAR |
FLAG_EARLY_TERMINATE;
+ sf->intra_y_mode_mask = INTRA_DC_ONLY;
+ sf->intra_uv_mode_mask = INTRA_DC_ONLY;
+ sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
+ sf->adaptive_motion_search = 1;
+ sf->using_small_partition_info = 0;
sf->disable_splitmv = 1;
sf->auto_mv_step_size = 1;
sf->search_method = BIGDIA;
sf->subpel_iters_per_step = 1;
+ sf->use_fast_lpf_pick = 1;
+ sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_interval = 2;
sf->disable_split_var_thresh = 64;
sf->disable_filter_search_var_thresh = 64;
- sf->intra_y_mode_mask = INTRA_DC_ONLY;
- sf->intra_uv_mode_mask = INTRA_DC_ONLY;
sf->use_fast_coef_updates = 2;
sf->mode_skip_start = 6;
}
@@ -2691,18 +2732,10 @@
double q_val;
// Baseline value derived from cpi->active_worst_quality and kf boost
- if (cpi->kf_boost > high) {
- cpi->active_best_quality = kf_low_motion_minq[q];
- } else if (cpi->kf_boost < low) {
- cpi->active_best_quality = kf_high_motion_minq[q];
- } else {
- const int gap = high - low;
- const int offset = high - cpi->kf_boost;
- const int qdiff = kf_high_motion_minq[q] - kf_low_motion_minq[q];
- const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
-
- cpi->active_best_quality = kf_low_motion_minq[q] + adjustment;
- }
+ cpi->active_best_quality = get_active_quality(q, cpi->kf_boost,
+ low, high,
+ kf_low_motion_minq,
+ kf_high_motion_minq);
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -2737,47 +2770,48 @@
q = cpi->avg_frame_qindex;
}
// For constrained quality dont allow Q less than the cq level
- if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
- q < cpi->cq_target_quality) {
- q = cpi->cq_target_quality;
- }
- if (cpi->gfu_boost > high) {
- cpi->active_best_quality = gf_low_motion_minq[q];
- } else if (cpi->gfu_boost < low) {
- cpi->active_best_quality = gf_high_motion_minq[q];
- } else {
- const int gap = high - low;
- const int offset = high - cpi->gfu_boost;
- const int qdiff = gf_high_motion_minq[q] - gf_low_motion_minq[q];
- const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
-
- cpi->active_best_quality = gf_low_motion_minq[q] + adjustment;
- }
-
- // Constrained quality use slightly lower active best.
- if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY)
+ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
+ if (q < cpi->cq_target_quality)
+ q = cpi->cq_target_quality;
+ if (cpi->frames_since_key > 1) {
+ cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
+ low, high,
+ afq_low_motion_minq,
+ afq_high_motion_minq);
+ } else {
+ cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
+ low, high,
+ gf_low_motion_minq,
+ gf_high_motion_minq);
+ }
+ // Constrained quality use slightly lower active best.
cpi->active_best_quality = cpi->active_best_quality * 15 / 16;
- // TODO(debargha): Refine the logic below
- if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
+ } else if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
if (!cpi->refresh_alt_ref_frame) {
cpi->active_best_quality = cpi->cq_target_quality;
} else {
if (cpi->frames_since_key > 1) {
- if (cpi->gfu_boost > high) {
- cpi->active_best_quality = cpi->cq_target_quality * 6 / 16;
- } else if (cpi->gfu_boost < low) {
- cpi->active_best_quality = cpi->cq_target_quality * 11 / 16;
- } else {
- const int gap = high - low;
- const int offset = high - cpi->gfu_boost;
- const int qdiff = cpi->cq_target_quality * 5 / 16;
- const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
- cpi->active_best_quality = cpi->cq_target_quality * 6 / 16
- + adjustment;
- }
+ cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
+ low, high,
+ afq_low_motion_minq,
+ afq_high_motion_minq);
+ } else {
+ cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
+ low, high,
+ gf_low_motion_minq,
+ gf_high_motion_minq);
}
}
+ } else {
+ if (!cpi->refresh_alt_ref_frame) {
+ cpi->active_best_quality = inter_minq[q];
+ } else {
+ cpi->active_best_quality = get_active_quality(q, cpi->gfu_boost,
+ low, high,
+ gf_low_motion_minq,
+ gf_high_motion_minq);
+ }
}
} else {
if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
@@ -3262,7 +3296,7 @@
// in this frame.
// update_base_skip_probs(cpi);
-#if CONFIG_INTERNAL_STATS
+#if 0 // CONFIG_INTERNAL_STATS
{
FILE *f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
int recon_err;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 92edf49..1002ff7 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -61,16 +61,11 @@
#define INTRA_ZBIN_BOOST 0
typedef struct {
- nmv_context nmvc;
int nmvjointcost[MV_JOINTS];
int nmvcosts[2][MV_VALS];
int nmvcosts_hp[2][MV_VALS];
vp9_prob segment_pred_probs[PREDICTION_PROBS];
- vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
- vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS];
- vp9_prob single_ref_prob[REF_CONTEXTS][2];
- vp9_prob comp_ref_prob[REF_CONTEXTS];
unsigned char *last_frame_seg_map_copy;
@@ -79,20 +74,8 @@
// 0 = ZERO_MV, MV
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
- vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES];
-
- vp9_prob y_mode_prob[4][INTRA_MODES - 1];
- vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
- vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
-
- vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1]
- [SWITCHABLE_FILTERS - 1];
-
int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
- vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
-
- struct tx_probs tx_probs;
- vp9_prob mbskip_probs[MBSKIP_CONTEXTS];
+ FRAME_CONTEXT fc;
} CODING_CONTEXT;
typedef struct {
@@ -267,7 +250,6 @@
TX_SIZE_SEARCH_METHOD tx_size_search_method;
int use_lp32x32fdct;
int use_avoid_tested_higherror;
- int partition_by_variance;
int use_one_partition_size_always;
int less_rectangular_check;
int use_square_partition_only;
@@ -392,8 +374,7 @@
int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int64_t rd_comp_pred_diff[NB_PREDICTION_TYPES];
- // FIXME(rbultje) int64_t?
- int rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
+ int64_t rd_prediction_type_threshes[4][NB_PREDICTION_TYPES];
unsigned int intra_inter_count[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter_count[COMP_INTER_CONTEXTS][2];
unsigned int single_ref_count[REF_CONTEXTS][2][2];
@@ -650,7 +631,7 @@
unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1]
[SWITCHABLE_FILTERS];
- unsigned int txfm_stepdown_count[TX_SIZES];
+ unsigned int tx_stepdown_count[TX_SIZES];
int initial_width;
int initial_height;
@@ -713,9 +694,8 @@
void vp9_set_speed_features(VP9_COMP *cpi);
-extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest);
+int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
-extern void vp9_alloc_compressor_data(VP9_COMP *cpi);
+void vp9_alloc_compressor_data(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_ONYX_INT_H_
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 6c8b2a0..05e893e 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -337,10 +337,10 @@
vp9_mb_init_quantizer(cpi, &cpi->mb);
}
-void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) {
+void vp9_set_quantizer(struct VP9_COMP *cpi, int q) {
VP9_COMMON *cm = &cpi->common;
- cm->base_qindex = Q;
+ cm->base_qindex = q;
// if any of the delta_q values are changing update flag will
// have to be set.
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 3229eaa..3191c49 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -30,14 +30,14 @@
int y_blocks);
struct VP9_COMP;
-extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q);
+void vp9_set_quantizer(struct VP9_COMP *cpi, int q);
-extern void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
+void vp9_frame_init_quantizer(struct VP9_COMP *cpi);
-extern void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x);
+void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x);
-extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
+void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x);
-extern void vp9_init_quantizer(struct VP9_COMP *cpi);
+void vp9_init_quantizer(struct VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_QUANTIZE_H_
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 2d12ba9..bbcad17 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -76,35 +76,19 @@
// restored with a call to vp9_restore_coding_context. These functions are
// intended for use in a re-code loop in vp9_compress_frame where the
// quantizer value is adjusted between loop iterations.
-
- cc->nmvc = cm->fc.nmvc;
vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost);
vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts);
vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp);
- vp9_copy(cc->inter_mode_probs, cm->fc.inter_mode_probs);
-
- vp9_copy(cc->y_mode_prob, cm->fc.y_mode_prob);
- vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob);
- vp9_copy(cc->partition_prob, cm->fc.partition_prob);
-
vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs);
- vp9_copy(cc->intra_inter_prob, cm->fc.intra_inter_prob);
- vp9_copy(cc->comp_inter_prob, cm->fc.comp_inter_prob);
- vp9_copy(cc->single_ref_prob, cm->fc.single_ref_prob);
- vp9_copy(cc->comp_ref_prob, cm->fc.comp_ref_prob);
-
vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy,
cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols));
vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
- vp9_copy(cc->coef_probs, cm->fc.coef_probs);
- vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob);
- cc->tx_probs = cm->fc.tx_probs;
- vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs);
+ cc->fc = cm->fc;
}
void vp9_restore_coding_context(VP9_COMP *cpi) {
@@ -113,25 +97,12 @@
// Restore key state variables to the snapshot state stored in the
// previous call to vp9_save_coding_context.
-
- cm->fc.nmvc = cc->nmvc;
vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost);
vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts);
vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp);
- vp9_copy(cm->fc.inter_mode_probs, cc->inter_mode_probs);
-
- vp9_copy(cm->fc.y_mode_prob, cc->y_mode_prob);
- vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
- vp9_copy(cm->fc.partition_prob, cc->partition_prob);
-
vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs);
- vp9_copy(cm->fc.intra_inter_prob, cc->intra_inter_prob);
- vp9_copy(cm->fc.comp_inter_prob, cc->comp_inter_prob);
- vp9_copy(cm->fc.single_ref_prob, cc->single_ref_prob);
- vp9_copy(cm->fc.comp_ref_prob, cc->comp_ref_prob);
-
vpx_memcpy(cm->last_frame_seg_map,
cpi->coding_context.last_frame_seg_map_copy,
(cm->mi_rows * cm->mi_cols));
@@ -139,10 +110,7 @@
vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
- vp9_copy(cm->fc.coef_probs, cc->coef_probs);
- vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob);
- cm->fc.tx_probs = cc->tx_probs;
- vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs);
+ cm->fc = cc->fc;
}
void vp9_setup_key_frame(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 4733176..ddda713 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -32,8 +32,8 @@
double vp9_convert_qindex_to_q(int qindex);
int vp9_gfboost_qadjust(int qindex);
-extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
- double correction_factor);
+int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor);
void vp9_setup_inter_frame(VP9_COMP *cpi);
#endif // VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3013794..83cd612 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -186,6 +186,7 @@
// cpi->common.refresh_alt_ref_frame)
qindex = clamp(qindex, 0, MAXQ);
+ cpi->RDDIV = 100;
cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
if (cpi->twopass.next_iiratio > 31)
@@ -204,42 +205,18 @@
if (q < 8)
q = 8;
- if (cpi->RDMULT > 1000) {
- cpi->RDDIV = 1;
- cpi->RDMULT /= 100;
+ for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ for (i = 0; i < MAX_MODES; i++) {
+ // Threshold here seem unecessarily harsh but fine given actual
+ // range of values used for cpi->sf.thresh_mult[]
+ int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- for (i = 0; i < MAX_MODES; ++i) {
- // Threshold here seem unecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[]
- int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
-
- // *4 relates to the scaling of rd_thresh_block_size_factor[]
- if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
- cpi->rd_threshes[bsize][i] =
- cpi->sf.thresh_mult[i] * q *
- rd_thresh_block_size_factor[bsize] / (4 * 100);
- } else {
- cpi->rd_threshes[bsize][i] = INT_MAX;
- }
- }
- }
- } else {
- cpi->RDDIV = 100;
-
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- for (i = 0; i < MAX_MODES; i++) {
- // Threshold here seem unecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[]
- int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
-
- if (cpi->sf.thresh_mult[i] < thresh_max) {
- cpi->rd_threshes[bsize][i] =
+ if (cpi->sf.thresh_mult[i] < thresh_max) {
+ cpi->rd_threshes[bsize][i] =
cpi->sf.thresh_mult[i] * q *
rd_thresh_block_size_factor[bsize] / 4;
- } else {
- cpi->rd_threshes[bsize][i] = INT_MAX;
- }
+ } else {
+ cpi->rd_threshes[bsize][i] = INT_MAX;
}
}
}
@@ -269,7 +246,7 @@
cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
cost_token(vp9_inter_mode_tree,
cpi->common.fc.inter_mode_probs[i],
- vp9_inter_mode_encodings - NEARESTMV + m);
+ vp9_inter_mode_encodings + (m - NEARESTMV));
}
}
}
@@ -554,9 +531,13 @@
TX_SIZE tx_size;
int bw;
int bh;
- int rate;
- int64_t dist;
- int64_t sse;
+ int rate[256];
+ int64_t dist[256];
+ int64_t sse[256];
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_sse;
+ int64_t this_rd;
int64_t best_rd;
int skip;
const int16_t *scan, *nb;
@@ -573,17 +554,17 @@
int shift = args->tx_size == TX_32X32 ? 0 : 2;
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
+ args->dist[block] = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
- args->sse += this_sse >> shift;
+ args->sse[block] = this_sse >> shift;
if (x->skip_encode &&
xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME) {
// TODO(jingning): tune the model to better capture the distortion.
int64_t p = (pd->dequant[1] * pd->dequant[1] *
(1 << ss_txfrm_size)) >> shift;
- args->dist += p;
- args->sse += p;
+ args->dist[block] = p;
+ args->sse[block] = p;
}
}
@@ -594,10 +575,10 @@
int x_idx, y_idx;
txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
- args->rate += cost_coeffs(args->x, plane, block,
- args->t_above + x_idx,
- args->t_left + y_idx, args->tx_size,
- args->scan, args->nb);
+ args->rate[block] = cost_coeffs(args->x, plane, block,
+ args->t_above + x_idx,
+ args->t_left + y_idx, args->tx_size,
+ args->scan, args->nb);
}
static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -610,16 +591,6 @@
if (args->skip)
return;
- rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
- rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
- rd = MIN(rd1, rd2);
- if (rd > args->best_rd) {
- args->skip = 1;
- args->rate = INT_MAX;
- args->dist = INT64_MAX;
- args->sse = INT64_MAX;
- return;
- }
if (!is_inter_block(&xd->this_mi->mbmi))
vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
@@ -628,6 +599,56 @@
dist_block(plane, block, tx_size, args);
rate_block(plane, block, plane_bsize, tx_size, args);
+ rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
+ rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
+
+ // TODO(jingning): temporarily enabled only for luma component
+ rd = MIN(rd1, rd2);
+ if (plane == 0)
+ x->zcoeff_blk[tx_size][block] = rd1 > rd2;
+
+ args->this_rate += args->rate[block];
+ args->this_dist += args->dist[block];
+ args->this_sse += args->sse[block];
+ args->this_rd += rd;
+
+ if (args->this_rd > args->best_rd) {
+ args->skip = 1;
+ return;
+ }
+}
+
+void vp9_get_entropy_contexts(TX_SIZE tx_size,
+ ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
+ const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
+ int num_4x4_w, int num_4x4_h) {
+ int i;
+ switch (tx_size) {
+ case TX_4X4:
+ vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X8:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_16X16:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_32X32:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ default:
+ assert(!"Invalid transform size.");
+ }
}
static void txfm_rd_in_plane(MACROBLOCK *x,
@@ -638,45 +659,33 @@
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
- const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
- const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
- int i;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
+
struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size,
- num_4x4_blocks_wide, num_4x4_blocks_high,
- 0, 0, 0, ref_best_rd, 0 };
+ num_4x4_w, num_4x4_h,
+ { 0 }, { 0 }, { 0 },
+ 0, 0, 0, 0, ref_best_rd, 0 };
if (plane == 0)
xd->this_mi->mbmi.tx_size = tx_size;
+ vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
+ pd->above_context, pd->left_context,
+ num_4x4_w, num_4x4_h);
switch (tx_size) {
case TX_4X4:
- vpx_memcpy(&args.t_above, pd->above_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide);
- vpx_memcpy(&args.t_left, pd->left_context,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high);
get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0),
&args.scan, &args.nb);
break;
case TX_8X8:
- for (i = 0; i < num_4x4_blocks_wide; i += 2)
- args.t_above[i] = !!*(uint16_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 2)
- args.t_left[i] = !!*(uint16_t *)&pd->left_context[i];
get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd),
&args.scan, &args.nb);
break;
case TX_16X16:
- for (i = 0; i < num_4x4_blocks_wide; i += 4)
- args.t_above[i] = !!*(uint32_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 4)
- args.t_left[i] = !!*(uint32_t *)&pd->left_context[i];
get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd),
&args.scan, &args.nb);
break;
case TX_32X32:
- for (i = 0; i < num_4x4_blocks_wide; i += 8)
- args.t_above[i] = !!*(uint64_t *)&pd->above_context[i];
- for (i = 0; i < num_4x4_blocks_high; i += 8)
- args.t_left[i] = !!*(uint64_t *)&pd->left_context[i];
args.scan = vp9_default_scan_32x32;
args.nb = vp9_default_scan_32x32_neighbors;
break;
@@ -685,10 +694,17 @@
}
foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args);
- *distortion = args.dist;
- *rate = args.rate;
- *sse = args.sse;
- *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip);
+ if (args.skip) {
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ } else {
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
+ *skippable = vp9_is_skippable_in_plane(xd, bsize, plane);
+ }
}
static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
@@ -696,15 +712,15 @@
int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
(cm->tx_mode == ALLOW_32X32 ||
cm->tx_mode == TX_MODE_SELECT)) {
mbmi->tx_size = TX_32X32;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
(cm->tx_mode == ALLOW_16X16 ||
cm->tx_mode == ALLOW_32X32 ||
cm->tx_mode == TX_MODE_SELECT)) {
@@ -717,7 +733,7 @@
txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs,
mbmi->tx_size);
- cpi->txfm_stepdown_count[0]++;
+ cpi->tx_stepdown_count[0]++;
}
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -811,15 +827,15 @@
rd[TX_32X32][1] < rd[TX_16X16][1] &&
rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[0]++;
+ cpi->tx_stepdown_count[0]++;
} else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] < rd[TX_8X8][1] &&
rd[TX_16X16][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
- cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -829,7 +845,7 @@
int *s, int *skip, int64_t *sse,
int64_t ref_best_rd,
BLOCK_SIZE bs) {
- const TX_SIZE max_txfm_size = max_txsize_lookup[bs];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi;
@@ -845,9 +861,9 @@
// for (n = TX_4X4; n <= max_txfm_size; n++)
// r[n][0] = (r[n][0] * scale_r[n]);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
r[n][1] = r[n][0];
- for (m = 0; m <= n - (n == max_txfm_size); m++) {
+ for (m = 0; m <= n - (n == max_tx_size); m++) {
if (m == n)
r[n][1] += vp9_cost_zero(tx_probs[m]);
else
@@ -859,7 +875,7 @@
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
if (s[n]) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
} else {
@@ -867,19 +883,19 @@
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
}
- for (n = TX_4X4; n <= max_txfm_size; n++) {
+ for (n = TX_4X4; n <= max_tx_size; n++) {
rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
}
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
(cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
rd[TX_32X32][1] <= rd[TX_16X16][1] &&
rd[TX_32X32][1] <= rd[TX_8X8][1] &&
rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
mbmi->tx_size = TX_32X32;
- } else if (max_txfm_size >= TX_16X16 &&
+ } else if (max_tx_size >= TX_16X16 &&
(cm->tx_mode == ALLOW_16X16 ||
cm->tx_mode == ALLOW_32X32 ||
(cm->tx_mode == TX_MODE_SELECT &&
@@ -901,19 +917,19 @@
txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size],
ref_best_rd, 0, bs, mbmi->tx_size);
- if (max_txfm_size == TX_32X32 &&
+ if (max_tx_size == TX_32X32 &&
rd[TX_32X32][1] <= rd[TX_16X16][1] &&
rd[TX_32X32][1] <= rd[TX_8X8][1] &&
rd[TX_32X32][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[0]++;
- } else if (max_txfm_size >= TX_16X16 &&
+ cpi->tx_stepdown_count[0]++;
+ } else if (max_tx_size >= TX_16X16 &&
rd[TX_16X16][1] <= rd[TX_8X8][1] &&
rd[TX_16X16][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
} else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
- cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
} else {
- cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
+ cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
}
}
@@ -1058,6 +1074,7 @@
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
int64_t ssz;
const int16_t *scan;
+ const int16_t *nb;
uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
@@ -1083,10 +1100,10 @@
x->quantize_b_4x4(x, block, tx_type, 16);
}
- scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block));
+ get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block),
+ &scan, &nb);
ratey += cost_coeffs(x, 0, block,
- tempa + idx, templ + idy, TX_4X4, scan,
- vp9_get_coef_neighbors_handle(scan));
+ tempa + idx, templ + idy, TX_4X4, scan, nb);
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &ssz) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -1458,11 +1475,12 @@
switch (m = this_mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
- 102);
+ thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+ mvjcost, mvcost, 102);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
- thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
+ thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
+ &second_best_ref_mv->as_mv,
mvjcost, mvcost, 102);
}
break;
@@ -1515,25 +1533,21 @@
int k;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
+ struct macroblock_plane *const p = &x->plane[0];
MODE_INFO *const mi = xd->this_mi;
const BLOCK_SIZE bsize = mi->mbmi.sb_type;
const int width = plane_block_width(bsize, pd);
const int height = plane_block_height(bsize, pd);
int idx, idy;
- const int src_stride = x->plane[0].src.stride;
- uint8_t* const src = raster_block_offset_uint8(BLOCK_8X8, i,
- x->plane[0].src.buf,
- src_stride);
- int16_t* src_diff = raster_block_offset_int16(BLOCK_8X8, i,
- x->plane[0].src_diff);
- int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, i);
- uint8_t* const dst = raster_block_offset_uint8(BLOCK_8X8, i,
+
+ uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i,
+ p->src.buf, p->src.stride);
+ uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i,
pd->dst.buf, pd->dst.stride);
int64_t thisdistortion = 0, thissse = 0;
- int thisrate = 0;
- int ref, second_ref = has_second_ref(&mi->mbmi);
-
- for (ref = 0; ref < 1 + second_ref; ++ref) {
+ int thisrate = 0, ref;
+ const int is_compound = has_second_ref(&mi->mbmi);
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
pd->pre[ref].buf, pd->pre[ref].stride);
vp9_build_inter_predictor(pre, pd->pre[ref].stride,
@@ -1543,19 +1557,21 @@
width, height, ref, &xd->subpix, MV_PRECISION_Q3);
}
- vp9_subtract_block(height, width, src_diff, 8, src, src_stride,
+ vp9_subtract_block(height, width,
+ raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
+ src, p->src.stride,
dst, pd->dst.stride);
k = i;
for (idy = 0; idy < height / 4; ++idy) {
for (idx = 0; idx < width / 4; ++idx) {
int64_t ssz, rd, rd1, rd2;
+ int16_t* coeff;
k += (idy * 2 + idx);
- src_diff = raster_block_offset_int16(BLOCK_8X8, k,
- x->plane[0].src_diff);
- coeff = BLOCK_OFFSET(x->plane[0].coeff, k);
- x->fwd_txm4x4(src_diff, coeff, 16);
+ coeff = BLOCK_OFFSET(p->coeff, k);
+ x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
+ coeff, 16);
x->quantize_b_4x4(x, k, DCT_DCT, 16);
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
@@ -1572,6 +1588,7 @@
return INT64_MAX;
}
}
+
*distortion = thisdistortion >> 2;
*labelyrate = thisrate;
*sse = thissse >> 2;
@@ -1684,17 +1701,17 @@
i = idy * 2 + idx;
frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
- frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
&frame_mv[NEARESTMV][mbmi->ref_frame[0]],
&frame_mv[NEARMV][mbmi->ref_frame[0]],
i, 0, mi_row, mi_col);
- if (has_second_rf)
+ if (has_second_rf) {
+ frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd,
- &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
- &frame_mv[NEARMV][mbmi->ref_frame[1]],
- i, 1, mi_row, mi_col);
-
+ &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
+ &frame_mv[NEARMV][mbmi->ref_frame[1]],
+ i, 1, mi_row, mi_col);
+ }
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
const struct buf_2d orig_src = x->plane[0].src;
@@ -1797,20 +1814,23 @@
// adjust src pointer for this block
mi_buf_shift(x, i);
if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full,
+ bestsme = vp9_hex_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full,
+ bestsme = vp9_square_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full,
+ bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1, v_fn_ptr, 1,
- bsi->ref_mv, &mode_mv[NEWMV]);
+ &bsi->ref_mv->as_mv,
+ &mode_mv[NEWMV].as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
@@ -1841,8 +1861,10 @@
if (bestsme < INT_MAX) {
int distortion;
unsigned int sse;
- cpi->find_fractional_mv_step(x, &mode_mv[NEWMV],
- bsi->ref_mv, x->errorperbit, v_fn_ptr,
+ cpi->find_fractional_mv_step(x,
+ &mode_mv[NEWMV].as_mv,
+ &bsi->ref_mv->as_mv,
+ x->errorperbit, v_fn_ptr,
0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&distortion, &sse);
@@ -2221,11 +2243,12 @@
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
- // FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
- // doesn't actually work this way
- memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
- memcpy(ctx->best_filter_diff, best_filter_diff,
- sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
+ vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
+ sizeof(ctx->zcoeff_blk));
+
+ vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
+ vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
+ sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1));
}
static void setup_pred_block(const MACROBLOCKD *xd,
@@ -2297,7 +2320,7 @@
// Further refinement that is encode side only to test the top few candidates
// in full and choose the best as the centre point for subsequent searches.
// The current implementation doesn't support scaling.
- if (!vp9_is_scaled(&scale[frame_type]))
+ if (!vp9_is_scaled(&scale[frame_type]) && block_size >= BLOCK_8X8)
mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
frame_type, block_size);
}
@@ -2404,23 +2427,23 @@
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
if (cpi->sf.search_method == HEX) {
- bestsme = vp9_hex_search(x, &mvp_full,
+ bestsme = vp9_hex_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == SQUARE) {
- bestsme = vp9_square_search(x, &mvp_full,
+ bestsme = vp9_square_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
- bestsme = vp9_bigdia_search(x, &mvp_full,
+ bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
step_param,
sadpb, 1,
&cpi->fn_ptr[block_size], 1,
- &ref_mv, tmp_mv);
+ &ref_mv.as_mv, &tmp_mv->as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 1,
@@ -2436,16 +2459,15 @@
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
- cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv,
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv,
- x->nmvjointcost, x->mvcost,
- 96);
+ *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+ x->nmvjointcost, x->mvcost, 96);
if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
x->pred_mv[ref].as_int = tmp_mv->as_int;
@@ -2501,7 +2523,7 @@
for (i = 0; i < MAX_MB_PLANE; i++)
backup_second_yv12[i] = xd->plane[i].pre[1];
- setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL);
+ setup_pre_planes(xd, 1, scaled_ref_frame[1], mi_row, mi_col, NULL);
}
xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
@@ -2571,8 +2593,8 @@
unsigned int sse;
bestsme = cpi->find_fractional_mv_step_comp(
- x, &tmp_mv,
- &ref_mv[id],
+ x, &tmp_mv.as_mv,
+ &ref_mv[id].as_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
@@ -2604,11 +2626,11 @@
for (i = 0; i < MAX_MB_PLANE; i++)
xd->plane[i].pre[1] = backup_second_yv12[i];
}
- *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
- &mbmi->ref_mvs[refs[0]][0],
+ *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi->ref_mvs[refs[0]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
- *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
- &mbmi->ref_mvs[refs[1]][0],
+ *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &mbmi->ref_mvs[refs[1]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
vpx_free(second_pred);
@@ -2631,7 +2653,7 @@
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->this_mi->mbmi;
- const int is_comp_pred = (mbmi->ref_frame[1] > 0);
+ const int is_comp_pred = has_second_ref(mbmi);
const int num_refs = is_comp_pred ? 2 : 1;
const int this_mode = mbmi->mode;
int_mv *frame_mv = mode_mv[this_mode];
@@ -2660,11 +2682,11 @@
joint_motion_search(cpi, x, bsize, frame_mv,
mi_row, mi_col, single_newmv, &rate_mv);
} else {
- rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]],
- &mbmi->ref_mvs[refs[0]][0],
+ rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &mbmi->ref_mvs[refs[0]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
- rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]],
- &mbmi->ref_mvs[refs[1]][0],
+ rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &mbmi->ref_mvs[refs[1]][0].as_mv,
x->nmvjointcost, x->mvcost, 96);
}
if (frame_mv[refs[0]].as_int == INVALID_MV ||
@@ -3072,8 +3094,12 @@
vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
*returndist = dist_y + dist_uv;
if (cpi->sf.tx_size_search_method == USE_FULL_RD)
- for (i = 0; i < TX_MODES; i++)
- ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
+ for (i = 0; i < TX_MODES; i++) {
+ if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
+ ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
+ else
+ ctx->tx_rd_diff[i] = 0;
+ }
}
ctx->mic = *xd->this_mi;
@@ -3140,8 +3166,11 @@
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
+ unsigned char best_zcoeff_blk[256] = { 0 };
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
+ vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));
+ vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));
for (i = 0; i < 4; i++) {
int j;
@@ -3263,8 +3292,8 @@
continue;
// Test best rd so far against threshold for trying this mode.
- if ((best_rd < ((cpi->rd_threshes[bsize][mode_index] *
- cpi->rd_thresh_freq_fact[bsize][mode_index]) >> 5)) ||
+ if ((best_rd < ((int64_t)cpi->rd_threshes[bsize][mode_index] *
+ cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
cpi->rd_threshes[bsize][mode_index] == INT_MAX)
continue;
@@ -3813,6 +3842,8 @@
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;
+ vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
+ sizeof(best_zcoeff_blk));
if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
for (i = 0; i < 4; i++)
@@ -3994,13 +4025,11 @@
if (best_mbmode.ref_frame[0] != INTRA_FRAME &&
best_mbmode.sb_type < BLOCK_8X8) {
for (i = 0; i < 4; i++)
- xd->this_mi->bmi[i].as_mv[0].as_int =
- best_bmodes[i].as_mv[0].as_int;
+ xd->this_mi->bmi[i].as_mv[0].as_int = best_bmodes[i].as_mv[0].as_int;
- if (mbmi->ref_frame[1] > 0)
+ if (has_second_ref(mbmi))
for (i = 0; i < 4; i++)
- xd->this_mi->bmi[i].as_mv[1].as_int =
- best_bmodes[i].as_mv[1].as_int;
+ xd->this_mi->bmi[i].as_mv[1].as_int = best_bmodes[i].as_mv[1].as_int;
*x->partition_info = best_partition;
@@ -4008,6 +4037,9 @@
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
}
+ vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
+ sizeof(best_zcoeff_blk));
+
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index eba7df9..9796c0d 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -33,4 +33,9 @@
void vp9_set_mbmode_and_mvs(MACROBLOCK *x,
MB_PREDICTION_MODE mb, int_mv *mv);
+void vp9_get_entropy_contexts(TX_SIZE tx_size,
+ ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
+ const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
+ int num_4x4_w, int num_4x4_h);
+
#endif // VP9_ENCODER_VP9_RDOPT_H_
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 10655e8..874b71a 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -130,6 +130,8 @@
return;
segment_id = mi_8x8[0]->mbmi.segment_id;
+ xd->mi_8x8 = mi_8x8;
+ xd->this_mi = mi_8x8[0];
set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 63826ee..1768b5b 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -154,10 +154,10 @@
// TODO Check that the 16x16 vf & sdf are selected here
// Ignore mv costing by sending NULL pointer instead of cost arrays
ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0];
- bestsme = vp9_hex_search(x, &best_ref_mv1_full,
+ bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv,
step_param, sadpb, 1,
&cpi->fn_ptr[BLOCK_16X16],
- 0, &best_ref_mv1, ref_mv);
+ 0, &best_ref_mv1.as_mv, &ref_mv->as_mv);
#if ALT_REF_SUBPEL_ENABLED
// Try sub-pixel MC?
@@ -166,8 +166,8 @@
int distortion;
unsigned int sse;
// Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, ref_mv,
- &best_ref_mv1,
+ bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
+ &best_ref_mv1.as_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, cpi->sf.subpel_iters_per_step,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 0c9bf9d..7c14c18 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -129,19 +129,15 @@
const int ref = is_inter_block(mbmi);
uint8_t token_cache[1024];
const uint8_t *band_translate;
- ENTROPY_CONTEXT *A, *L;
const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
int aoff, loff;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff);
- A = pd->above_context + aoff;
- L = pd->left_context + loff;
-
assert((!type && !plane) || (type && plane));
- pt = get_entropy_context(xd, tx_size, type, block, A, L,
- &scan, &band_translate);
- nb = vp9_get_coef_neighbors_handle(scan);
+ pt = get_entropy_context(tx_size, pd->above_context + aoff,
+ pd->left_context + loff);
+ get_scan_and_band(xd, tx_size, type, block, &scan, &nb, &band_translate);
c = 0;
do {
const int band = get_coef_band(band_translate, c);
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index 155ba8a..991ef4d 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -8,14 +8,151 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include "vp9/encoder/vp9_variance.h"
-#include "vp9/common/vp9_filter.h"
-#include "vp9/common/vp9_subpelvar.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_integer.h"
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_convolve.h"
+#include "vp9/common/vp9_filter.h"
+#include "vp9/encoder/vp9_variance.h"
+
+static void variance(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ int w,
+ int h,
+ unsigned int *sse,
+ int *sum) {
+ int i, j;
+ int diff;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ diff = src_ptr[j] - ref_ptr[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ src_ptr += source_stride;
+ ref_ptr += recon_stride;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_first_pass
+ *
+ * INPUTS : uint8_t *src_ptr : Pointer to source block.
+ * uint32_t src_pixels_per_line : Stride of input block.
+ * uint32_t pixel_step : Offset between filter input
+ * samples (see notes).
+ * uint32_t output_height : Input block height.
+ * uint32_t output_width : Input block width.
+ * int32_t *vp9_filter : Array of 2 bi-linear filter
+ * taps.
+ *
+ * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement first-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=
+ * stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
+ uint16_t *output_ptr,
+ unsigned int src_pixels_per_line,
+ int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const int16_t *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
+ (int)src_ptr[pixel_step] * vp9_filter[1],
+ FILTER_BITS);
+
+ src_ptr++;
+ }
+
+ // Next row...
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
+/****************************************************************************
+ *
+ * ROUTINE : filter_block2d_bil_second_pass
+ *
+ * INPUTS : int32_t *src_ptr : Pointer to source block.
+ * uint32_t src_pixels_per_line : Stride of input block.
+ * uint32_t pixel_step : Offset between filter input
+ * samples (see notes).
+ * uint32_t output_height : Input block height.
+ * uint32_t output_width : Input block width.
+ * int32_t *vp9_filter : Array of 2 bi-linear filter
+ * taps.
+ *
+ * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
+ *
+ * RETURNS : void
+ *
+ * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
+ * either horizontal or vertical direction to produce the
+ * filtered output block. Used to implement second-pass
+ * of 2-D separable filter.
+ *
+ * SPECIAL NOTES : Requires 32-bit input as produced by
+ * filter_block2d_bil_first_pass.
+ * Two filter taps should sum to VP9_FILTER_WEIGHT.
+ * pixel_step defines whether the filter is applied
+ * horizontally (pixel_step=1) or vertically (pixel_step=
+ * stride).
+ * It defines the offset required to move from one input
+ * to the next.
+ *
+ ****************************************************************************/
+static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
+ uint8_t *output_ptr,
+ unsigned int src_pixels_per_line,
+ unsigned int pixel_step,
+ unsigned int output_height,
+ unsigned int output_width,
+ const int16_t *vp9_filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; i++) {
+ for (j = 0; j < output_width; j++) {
+ output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] +
+ (int)src_ptr[pixel_step] * vp9_filter[1],
+ FILTER_BITS);
+ src_ptr++;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ output_ptr += output_width;
+ }
+}
+
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index eb271fe..ad3d01d 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -171,22 +171,21 @@
void fdct4_1d_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
- const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
__m128i u[4], v[4];
- u[0] = _mm_add_epi16(in[0], in[3]);
- u[1] = _mm_add_epi16(in[1], in[2]);
- u[2] = _mm_sub_epi16(in[1], in[2]);
- u[3] = _mm_sub_epi16(in[0], in[3]);
+ u[0]=_mm_unpacklo_epi16(in[0], in[1]);
+ u[1]=_mm_unpacklo_epi16(in[3], in[2]);
- v[0] = _mm_unpacklo_epi16(u[0], u[1]);
- v[1] = _mm_unpacklo_epi16(u[2], u[3]);
+ v[0] = _mm_add_epi16(u[0], u[1]);
+ v[1] = _mm_sub_epi16(u[0], u[1]);
+
u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0
u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2
- u[2] = _mm_madd_epi16(v[1], k__cospi_p24_p08); // 1
- u[3] = _mm_madd_epi16(v[1], k__cospi_m08_p24); // 3
+ u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1
+ u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3
v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 687fb48..7d040f7 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -48,7 +48,6 @@
VP9_COMMON_SRCS-yes += common/vp9_rtcd.c
VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.sh
VP9_COMMON_SRCS-yes += common/vp9_sadmxn.h
-VP9_COMMON_SRCS-yes += common/vp9_subpelvar.h
VP9_COMMON_SRCS-yes += common/vp9_scale.h
VP9_COMMON_SRCS-yes += common/vp9_scale.c
VP9_COMMON_SRCS-yes += common/vp9_seg_common.h
@@ -92,7 +91,6 @@
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct16x16_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_idct32x32_neon.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_convolve8_avg_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_loopfilter_neon$(ASM)
@@ -109,5 +107,6 @@
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM)
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM)
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM)
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh))
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 28d8f36..157752a 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -89,7 +89,7 @@
unsigned int fixed_kf_cntr;
};
-static const VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
switch (frame) {
case VP8_LAST_FRAME:
return VP9_LAST_FLAG;
@@ -277,11 +277,9 @@
// CBR code has been deprectated for experimental phase.
// CQ mode not yet tested
oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
- /*
if (cfg.rc_end_usage == VPX_CQ)
oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
- */
- if (cfg.rc_end_usage == VPX_Q)
+ else if (cfg.rc_end_usage == VPX_Q)
oxcf->end_usage = USAGE_CONSTANT_QUALITY;
oxcf->target_bandwidth = cfg.rc_target_bitrate;
@@ -769,7 +767,7 @@
}
/* Add the frame packet to the list of returned packets. */
- round = 1000000 * ctx->cfg.g_timebase.num / 2 - 1;
+ round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1;
delta = (dst_end_time_stamp - dst_time_stamp);
pkt.kind = VPX_CODEC_CX_FRAME_PKT;
pkt.data.frame.pts =
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 10b3238..7a5b786 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -14,7 +14,7 @@
#include "vpx/vpx_decoder.h"
#include "vpx/vp8dx.h"
#include "vpx/internal/vpx_codec_internal.h"
-#include "vpx_version.h"
+#include "./vpx_version.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
@@ -205,7 +205,6 @@
static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx,
vpx_codec_stream_info_t *si) {
-
unsigned int sz;
if (si->sz >= sizeof(vp9_stream_info_t))
@@ -323,15 +322,20 @@
vp9_ppflags_t flags = {0};
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) {
- flags.post_proc_flag = ctx->postproc_cfg.post_proc_flag
+ flags.post_proc_flag =
#if CONFIG_POSTPROC_VISUALIZER
-
- | ((ctx->dbg_color_ref_frame_flag != 0) ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0)
- | ((ctx->dbg_color_mb_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0)
- | ((ctx->dbg_color_b_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0)
- | ((ctx->dbg_display_mv_flag != 0) ? VP9D_DEBUG_DRAW_MV : 0)
+ ((ctx->dbg_color_ref_frame_flag != 0) ?
+ VP9D_DEBUG_CLR_FRM_REF_BLKS : 0)
+ | ((ctx->dbg_color_mb_modes_flag != 0) ?
+ VP9D_DEBUG_CLR_BLK_MODES : 0)
+ | ((ctx->dbg_color_b_modes_flag != 0) ?
+ VP9D_DEBUG_CLR_BLK_MODES : 0)
+ | ((ctx->dbg_display_mv_flag != 0) ?
+ VP9D_DEBUG_DRAW_MV : 0)
+ |
#endif
-;
+ ctx->postproc_cfg.post_proc_flag;
+
flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
flags.noise_level = ctx->postproc_cfg.noise_level;
#if CONFIG_POSTPROC_VISUALIZER
@@ -496,8 +500,9 @@
mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags);
res = VPX_CODEC_OK;
- } else
+ } else {
res = VPX_CODEC_LIST_END;
+ }
} while (!mmap->sz && res != VPX_CODEC_LIST_END);
return res;
@@ -542,7 +547,6 @@
static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
-
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
if (data) {
@@ -553,15 +557,14 @@
return vp9_set_reference_dec(ctx->pbi,
(VP9_REFFRAME)frame->frame_type, &sd);
- } else
+ } else {
return VPX_CODEC_INVALID_PARAM;
-
+ }
}
static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx,
int ctr_id,
va_list args) {
-
vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
if (data) {
@@ -572,9 +575,9 @@
return vp9_copy_reference_dec(ctx->pbi,
(VP9_REFFRAME)frame->frame_type, &sd);
- } else
+ } else {
return VPX_CODEC_INVALID_PARAM;
-
+ }
}
static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
@@ -603,9 +606,9 @@
ctx->postproc_cfg_set = 1;
ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data);
return VPX_CODEC_OK;
- } else
+ } else {
return VPX_CODEC_INVALID_PARAM;
-
+ }
#else
return VPX_CODEC_INCAPABLE;
#endif
@@ -642,15 +645,15 @@
*update_info = pbi->refresh_frame_flags;
return VPX_CODEC_OK;
- } else
+ } else {
return VPX_CODEC_INVALID_PARAM;
+ }
}
static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
int ctrl_id,
va_list args) {
-
int *corrupted = va_arg(args, int *);
if (corrupted) {
@@ -658,9 +661,9 @@
*corrupted = pbi->common.frame_to_show->corrupted;
return VPX_CODEC_OK;
- } else
+ } else {
return VPX_CODEC_INVALID_PARAM;
-
+ }
}
static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
@@ -699,13 +702,13 @@
ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */
vp9_xma_get_mmap, /* vpx_codec_get_mmap_fn_t get_mmap; */
vp9_xma_set_mmap, /* vpx_codec_set_mmap_fn_t set_mmap; */
- {
+ { // NOLINT
vp9_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */
vp9_get_si, /* vpx_codec_get_si_fn_t get_si; */
vp9_decode, /* vpx_codec_decode_fn_t decode; */
vp9_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */
},
- {
+ { // NOLINT
/* encoder functions */
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index be3afe8..54c9706 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -38,6 +38,3 @@
VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h
VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes))
-
-VP9_DX_SRCS-$(HAVE_SSE2) += decoder/x86/vp9_dequantize_sse2.c
-VP9_DX_SRCS-$(HAVE_NEON) += decoder/arm/neon/vp9_add_constant_residual_neon$(ASM)
diff --git a/vpx_scale/mips/dspr2/yv12extend_dspr2.c b/vpx_scale/mips/dspr2/yv12extend_dspr2.c
new file mode 100644
index 0000000..2c5cd1a
--- /dev/null
+++ b/vpx_scale/mips/dspr2/yv12extend_dspr2.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx_scale/yv12config.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_scale/vpx_scale.h"
+
+#if HAVE_DSPR2
+static void extend_plane(uint8_t *const src, int src_stride,
+ int width, int height,
+ int extend_top, int extend_left,
+ int extend_bottom, int extend_right) {
+ int i, j;
+ uint8_t *left_src, *right_src;
+ uint8_t *left_dst_start, *right_dst_start;
+ uint8_t *left_dst, *right_dst;
+ uint8_t *top_src, *bot_src;
+ uint8_t *top_dst, *bot_dst;
+ uint32_t left_pix;
+ uint32_t right_pix;
+ uint32_t linesize;
+
+ /* copy the left and right most columns out */
+ left_src = src;
+ right_src = src + width - 1;
+ left_dst_start = src - extend_left;
+ right_dst_start = src + width;
+
+ for (i = height; i--; ) {
+ left_dst = left_dst_start;
+ right_dst = right_dst_start;
+
+ __asm__ __volatile__ (
+ "lb %[left_pix], 0(%[left_src]) \n\t"
+ "lb %[right_pix], 0(%[right_src]) \n\t"
+ "replv.qb %[left_pix], %[left_pix] \n\t"
+ "replv.qb %[right_pix], %[right_pix] \n\t"
+
+ : [left_pix] "=&r" (left_pix), [right_pix] "=&r" (right_pix)
+ : [left_src] "r" (left_src), [right_src] "r" (right_src)
+ );
+
+ for (j = extend_left/4; j--; ) {
+ __asm__ __volatile__ (
+ "sw %[left_pix], 0(%[left_dst]) \n\t"
+ "sw %[right_pix], 0(%[right_dst]) \n\t"
+
+ :
+ : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix),
+ [right_dst] "r" (right_dst), [right_pix] "r" (right_pix)
+ );
+
+ left_dst += 4;
+ right_dst += 4;
+ }
+
+ for (j = extend_left%4; j--; ) {
+ __asm__ __volatile__ (
+ "sb %[left_pix], 0(%[left_dst]) \n\t"
+ "sb %[right_pix], 0(%[right_dst]) \n\t"
+
+ :
+ : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix),
+ [right_dst] "r" (right_dst), [right_pix] "r" (right_pix)
+ );
+
+ left_dst += 1;
+ right_dst += 1;
+ }
+
+ left_src += src_stride;
+ right_src += src_stride;
+ left_dst_start += src_stride;
+ right_dst_start += src_stride;
+ }
+
+ /* Now copy the top and bottom lines into each line of the respective
+ * borders
+ */
+ top_src = src - extend_left;
+ bot_src = src + src_stride * (height - 1) - extend_left;
+ top_dst = src + src_stride * (-extend_top) - extend_left;
+ bot_dst = src + src_stride * (height) - extend_left;
+ linesize = extend_left + extend_right + width;
+
+ for (i = 0; i < extend_top; i++) {
+ vpx_memcpy(top_dst, top_src, linesize);
+ top_dst += src_stride;
+ }
+
+ for (i = 0; i < extend_bottom; i++) {
+ vpx_memcpy(bot_dst, bot_src, linesize);
+ bot_dst += src_stride;
+ }
+}
+
+static void extend_frame(YV12_BUFFER_CONFIG *const ybf,
+ int subsampling_x, int subsampling_y,
+ int ext_size) {
+ const int c_w = (ybf->y_crop_width + subsampling_x) >> subsampling_x;
+ const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y;
+ const int c_et = ext_size >> subsampling_y;
+ const int c_el = ext_size >> subsampling_x;
+ const int c_eb = (ext_size + ybf->y_height - ybf->y_crop_height +
+ subsampling_y) >> subsampling_y;
+ const int c_er = (ext_size + ybf->y_width - ybf->y_crop_width +
+ subsampling_x) >> subsampling_x;
+
+ assert(ybf->y_height - ybf->y_crop_height < 16);
+ assert(ybf->y_width - ybf->y_crop_width < 16);
+ assert(ybf->y_height - ybf->y_crop_height >= 0);
+ assert(ybf->y_width - ybf->y_crop_width >= 0);
+
+ extend_plane(ybf->y_buffer, ybf->y_stride,
+ ybf->y_crop_width, ybf->y_crop_height,
+ ext_size, ext_size,
+ ext_size + ybf->y_height - ybf->y_crop_height,
+ ext_size + ybf->y_width - ybf->y_crop_width);
+
+ extend_plane(ybf->u_buffer, ybf->uv_stride,
+ c_w, c_h, c_et, c_el, c_eb, c_er);
+
+ extend_plane(ybf->v_buffer, ybf->uv_stride,
+ c_w, c_h, c_et, c_el, c_eb, c_er);
+}
+
+void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
+ int subsampling_x, int subsampling_y) {
+ extend_frame(ybf, subsampling_x, subsampling_y, ybf->border);
+}
+
+void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
+ int subsampling_x,
+ int subsampling_y) {
+ const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ?
+ VP9INNERBORDERINPIXELS : ybf->border;
+ extend_frame(ybf, subsampling_x, subsampling_y, inner_bw);
+}
+#endif
diff --git a/vpx_scale/vpx_scale.mk b/vpx_scale/vpx_scale.mk
index 76c11e7..50d3e9d 100644
--- a/vpx_scale/vpx_scale.mk
+++ b/vpx_scale/vpx_scale.mk
@@ -16,6 +16,9 @@
SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM)
SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c
+#mips(dspr2)
+SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c
+
SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
$(eval $(call asm_offsets_template,\
diff --git a/vpx_scale/vpx_scale_rtcd.sh b/vpx_scale/vpx_scale_rtcd.sh
index ea7b0e2..a5faf11 100644
--- a/vpx_scale/vpx_scale_rtcd.sh
+++ b/vpx_scale/vpx_scale_rtcd.sh
@@ -27,8 +27,8 @@
if [ "$CONFIG_VP9" = "yes" ]; then
prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y"
- specialize vp9_extend_frame_borders
+ specialize vp9_extend_frame_borders dspr2
prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y"
- specialize vp9_extend_frame_inner_borders_c
+ specialize vp9_extend_frame_inner_borders dspr2
fi
diff --git a/vpxenc.c b/vpxenc.c
index 0c742ca..71cf01f 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -725,14 +725,12 @@
{
unsigned int pixelWidth = cfg->g_w;
unsigned int pixelHeight = cfg->g_h;
- float frameRate = (float)fps->num / (float)fps->den;
EbmlLoc videoStart;
Ebml_StartSubElement(glob, &videoStart, Video);
Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
- Ebml_SerializeFloat(glob, FrameRate, frameRate);
Ebml_EndSubElement(glob, &videoStart);
}
Ebml_EndSubElement(glob, &start); /* Track Entry */
@@ -883,10 +881,10 @@
while (len >= 4) {
unsigned int k;
- k = data[0];
- k |= data[1] << 8;
- k |= data[2] << 16;
- k |= data[3] << 24;
+ k = (unsigned int)data[0];
+ k |= (unsigned int)data[1] << 8;
+ k |= (unsigned int)data[2] << 16;
+ k |= (unsigned int)data[3] << 24;
k *= m;
k ^= k >> r;
@@ -1765,9 +1763,13 @@
/* Validate global config */
if (global->passes == 0) {
+#if CONFIG_VP9_ENCODER
// Make default VP9 passes = 2 until there is a better quality 1-pass
// encoder
global->passes = (global->codec->iface == vpx_codec_vp9_cx ? 2 : 1);
+#else
+ global->passes = 1;
+#endif
}
if (global->pass) {
@@ -2671,8 +2673,8 @@
fprintf(stderr, "%7"PRId64" %s %.2f %s ",
cx_time > 9999999 ? cx_time / 1000 : cx_time,
cx_time > 9999999 ? "ms" : "us",
- fps >= 1.0 ? fps : 1000.0 / fps,
- fps >= 1.0 ? "fps" : "ms/f");
+ fps >= 1.0 ? fps : fps * 60,
+ fps >= 1.0 ? "fps" : "fpm");
print_time("ETA", estimated_time_left);
fprintf(stderr, "\033[K");
}