Merge "add vp10 ActiveMap/ActiveMapRefreshTest" into nextgenv2
diff --git a/test/assertion_helpers.h b/test/assertion_helpers.h
new file mode 100644
index 0000000..108c40a
--- /dev/null
+++ b/test/assertion_helpers.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef TEST_ASSERTION_HELPERS_H_
+#define TEST_ASSERTION_HELPERS_H_
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace libvpx_test {
+namespace assertion_helpers {
+
+// Arrays (1D) are element-wise equal
+template<typename E, size_t n>
+::testing::AssertionResult ArraysEq(const E (&a)[n],
+ const E (&b)[n]) {
+ for (size_t i = 0; i < n; i++) {
+ const E &va = a[i];
+ const E &vb = b[i];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Arrays (1D) are element-wise equal
+// within the index interval [lo, hi)
+template<typename E, size_t n>
+::testing::AssertionResult ArraysEqWithin(const E (&a)[n],
+ const E (&b)[n],
+ const size_t lo,
+ const size_t hi) {
+ assert(hi > lo);
+ assert(hi <= n);
+
+ for (size_t i = lo; i < hi; i++) {
+ const E &va = a[i];
+ const E &vb = b[i];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Arrays (1D) are element-wise equal
+// outside the index interval [lo, hi)
+template<typename E, size_t n>
+::testing::AssertionResult ArraysEqOutside(const E (&a)[n],
+ const E (&b)[n],
+ const size_t lo,
+ const size_t hi) {
+ assert(hi > lo);
+ assert(hi <= n);
+
+ for (size_t i = 0; i < n; i++) {
+ if (lo <= i && i < hi)
+ continue;
+
+ const E &va = a[i];
+ const E &vb = b[i];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Arrays (2D) are element-wise equal
+template<typename E, size_t n, size_t m>
+::testing::AssertionResult ArraysEq(const E (&a)[n][m],
+ const E (&b)[n][m]) {
+ for (size_t i = 0; i < n; i++) {
+ for (size_t j = 0; j < m; j++) {
+ const E &va = a[i][j];
+ const E &vb = b[i][j];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "][" << j << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Arrays (2D) are element-wise equal
+// within the index interval [lo0, hi0) x [lo1, hi1) (Cartesian product)
+template<typename E, size_t n, size_t m>
+::testing::AssertionResult ArraysEqWithin(const E (&a)[n][m],
+ const E (&b)[n][m],
+ const size_t lo0,
+ const size_t hi0,
+ const size_t lo1,
+ const size_t hi1) {
+ assert(hi0 > lo0);
+ assert(hi0 <= n);
+ assert(hi1 > lo1);
+ assert(hi1 <= m);
+
+ for (size_t i = lo0; i < hi0; i++) {
+ for (size_t j = lo1; j < hi1; j++) {
+ const E &va = a[i][j];
+ const E &vb = b[i][j];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "][" << j << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Arrays (2D) are element-wise equal
+// outside the index interval [lo0, hi0) x [lo1, hi1) (Cartesian product)
+template<typename E, size_t n, size_t m>
+::testing::AssertionResult ArraysEqOutside(const E (&a)[n][m],
+ const E (&b)[n][m],
+ const size_t lo0,
+ const size_t hi0,
+ const size_t lo1,
+ const size_t hi1) {
+ assert(hi0 > lo0);
+ assert(hi0 <= n);
+ assert(hi1 > lo1);
+ assert(hi1 <= m);
+
+ for (size_t i = 0; i < n; i++) {
+ if (lo0 <= i && i < hi0)
+ continue;
+
+ for (size_t j = 0; j < m; j++) {
+ if (lo1 <= j && j < hi1)
+ continue;
+
+ const E &va = a[i][j];
+ const E &vb = b[i][j];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << i << "][" << j << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Non contiguous 2D array buffers are element-wise equal
+// at corresponding linear indices specified by rows/cols/stride/offset
+template<typename E, size_t n, size_t m>
+::testing::AssertionResult BuffersEqWithin(const E (&a)[n][m],
+ const E (&b)[n][m],
+ const size_t stridea,
+ const size_t strideb,
+ const size_t offseta,
+ const size_t offsetb,
+ const size_t rows,
+ const size_t cols) {
+ assert(rows <= n);
+ assert(cols <= m);
+ assert(stridea <= m);
+ assert(strideb <= m);
+ assert(cols <= stridea);
+ assert(cols <= strideb);
+ assert(offseta < n * m);
+ assert(offsetb < n * m);
+ assert(offseta + (rows - 1) * stridea + (cols - 1) < n * m);
+ assert(offsetb + (rows - 1) * strideb + (cols - 1) < n * m);
+
+ const E *pa = &a[0][0] + offseta;
+ const E *pb = &b[0][0] + offsetb;
+
+ for (size_t r = 0 ; r < rows ; r++) {
+ for (size_t c = 0 ; c < cols ; c++) {
+ const E &va = pa[c];
+ const E &vb = pb[c];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at linear index "
+ << "[" << pa - &a[0][0] << "] vs [" << pb - &b[0][0] << "]"
+ << " row=" << r << " col=" << c
+ << " values are: " << va << " vs " << vb;
+ }
+ }
+ pa += stridea;
+ pb += strideb;
+ }
+
+ return ::testing::AssertionSuccess();
+}
+
+// Non contiguous 2D array buffers are element-wise equal
+// except at corresponding linear indices specified by
+// rows/cols/stride/offset.
+template<typename E, size_t n, size_t m>
+::testing::AssertionResult BuffersEqOutside(const E (&a)[n][m],
+ const E (&b)[n][m],
+ const size_t stride,
+ const size_t offset,
+ const size_t rows,
+ const size_t cols ) {
+ assert(rows <= n);
+ assert(cols <= m);
+ assert(stride <= m);
+ assert(cols <= stride);
+ assert(offset < n * m);
+ assert(offset + (rows - 1) * stride + (cols - 1) < n * m);
+
+ const E *const pa = &a[0][0];
+ const E *const pb = &b[0][0];
+
+ size_t idx = 0;
+ size_t r = 0;
+ size_t end = offset; // beginning of first row
+
+ while (idx < n * m) {
+ while (idx < end) { // until beginning of row or end of buffer
+ const E &va = pa[idx];
+ const E &vb = pb[idx];
+ if (va != vb) {
+ return ::testing::AssertionFailure()
+ << "Arrays do not equal at index "
+ << "[" << idx / m << "][" << idx % m << "]"
+ << " values are: " << va << " vs " << vb;
+ }
+
+ idx++;
+ }
+
+ // Move past row end
+ idx += cols;
+
+ if (++r < rows) {
+ // Move to next row
+ end += stride;
+ } else {
+ // Move to end of buffer
+ end = n * m;
+ }
+ }
+
+ // Sanity check
+ assert(idx == n * m + cols);
+
+ return ::testing::AssertionSuccess();
+}
+
+} // namespace assertion_helpers
+} // namespace libvpx_test
+
+#endif // TEST_ASSERTION_HELPERS_H_
diff --git a/test/blend_mask6_test.cc b/test/blend_mask6_test.cc
new file mode 100644
index 0000000..d737ddd
--- /dev/null
+++ b/test/blend_mask6_test.cc
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/register_state_check.h"
+
+#include "test/function_equivalence_test.h"
+#include "test/randomise.h"
+#include "test/snapshot.h"
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "./vp10_rtcd.h"
+
+#include "test/assertion_helpers.h"
+#include "vp10/common/enums.h"
+
+using libvpx_test::assertion_helpers::BuffersEqWithin;
+using libvpx_test::assertion_helpers::BuffersEqOutside;
+using libvpx_test::assertion_helpers::ArraysEq;
+using libvpx_test::FunctionEquivalenceTest;
+using libvpx_test::Snapshot;
+using libvpx_test::Randomise;
+using std::tr1::make_tuple;
+
+namespace {
+
+template<typename F, typename T>
+class BlendMask6Test : public FunctionEquivalenceTest<F> {
+ protected:
+ virtual ~BlendMask6Test() {}
+
+ virtual void Execute(T *p_src0, T *p_src1) = 0;
+
+ void Common() {
+ w = 1 << randomise.uniform<int>(2, MAX_SB_SIZE_LOG2 + 1);
+ h = 1 << randomise.uniform<int>(2, MAX_SB_SIZE_LOG2 + 1);
+
+ randomise(subx);
+ randomise(suby);
+
+ randomise(dst_offset, 0, 32);
+ randomise(dst_stride, w, MAX_SB_SIZE * 5 + 1);
+
+ randomise(src0_offset, 0, 32);
+ randomise(src0_stride, w, MAX_SB_SIZE * 5 + 1);
+
+ randomise(src1_offset, 0, 32);
+ randomise(src1_stride, w, MAX_SB_SIZE * 5 + 1);
+
+ randomise(mask_stride, w * (subx ? 2: 1), 2 * MAX_SB_SIZE + 1);
+
+ T *p_src0;
+ T *p_src1;
+
+ switch (randomise.uniform<int>(3)) {
+ case 0: // Separate sources
+ p_src0 = &src0[0][0];
+ p_src1 = &src1[0][0];
+ break;
+ case 1: // src0 == dst
+ p_src0 = &dst_tst[0][0];
+ src0_stride = dst_stride;
+ src0_offset = dst_offset;
+ p_src1 = &src1[0][0];
+ break;
+ case 2: // src1 == dst
+ p_src0 = &src0[0][0];
+ p_src1 = &dst_tst[0][0];
+ src1_stride = dst_stride;
+ src1_offset = dst_offset;
+ break;
+ default:
+ FAIL();
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ // Prepare
+ //////////////////////////////////////////////////////////////////////////
+
+ snapshot(dst_ref);
+ snapshot(dst_tst);
+
+ snapshot(src0);
+ snapshot(src1);
+
+ snapshot(mask);
+
+ //////////////////////////////////////////////////////////////////////////
+ // Execute
+ //////////////////////////////////////////////////////////////////////////
+
+ Execute(p_src0, p_src1);
+
+ //////////////////////////////////////////////////////////////////////////
+ // Check
+ //////////////////////////////////////////////////////////////////////////
+
+ ASSERT_TRUE(BuffersEqWithin(dst_ref, dst_tst,
+ dst_stride, dst_stride,
+ dst_offset, dst_offset,
+ h, w));
+
+ ASSERT_TRUE(ArraysEq(snapshot.get(src0), src0));
+ ASSERT_TRUE(ArraysEq(snapshot.get(src1), src1));
+ ASSERT_TRUE(ArraysEq(snapshot.get(mask), mask));
+
+ ASSERT_TRUE(BuffersEqOutside(snapshot.get(dst_ref), dst_ref,
+ dst_stride,
+ dst_offset,
+ h, w));
+
+ ASSERT_TRUE(BuffersEqOutside(snapshot.get(dst_tst), dst_tst,
+ dst_stride,
+ dst_offset,
+ h, w));
+ }
+
+ Snapshot snapshot;
+ Randomise randomise;
+
+ T dst_ref[MAX_SB_SIZE][MAX_SB_SIZE * 5];
+ T dst_tst[MAX_SB_SIZE][MAX_SB_SIZE * 5];
+ size_t dst_stride;
+ size_t dst_offset;
+
+ T src0[MAX_SB_SIZE][MAX_SB_SIZE * 5];
+ size_t src0_stride;
+ size_t src0_offset;
+
+ T src1[MAX_SB_SIZE][MAX_SB_SIZE * 5];
+ size_t src1_stride;
+ size_t src1_offset;
+
+ uint8_t mask[2 * MAX_SB_SIZE][2 * MAX_SB_SIZE];
+ size_t mask_stride;
+
+ int w;
+ int h;
+
+ bool suby;
+ bool subx;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+// 8 bit version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*F8B)(uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int suby, int subx);
+
+class BlendMask6Test8B : public BlendMask6Test<F8B, uint8_t> {
+ protected:
+ void Execute(uint8_t *p_src0, uint8_t *p_src1) {
+ ref_func_(&dst_ref[0][dst_offset], dst_stride,
+ p_src0 + src0_offset, src0_stride,
+ p_src1 + src1_offset, src1_stride,
+ &mask[0][0], sizeof(mask[0]),
+ h, w, suby, subx);
+
+ ASM_REGISTER_STATE_CHECK(
+ tst_func_(&dst_tst[0][dst_offset], dst_stride,
+ p_src0 + src0_offset, src0_stride,
+ p_src1 + src1_offset, src1_stride,
+ &mask[0][0], sizeof(mask[0]),
+ h, w, suby, subx));
+ }
+};
+
+TEST_P(BlendMask6Test8B, RandomValues) {
+ for (int i = 0 ; i < 10000 && !HasFatalFailure(); i++) {
+ //////////////////////////////////////////////////////////////////////////
+ // Randomise
+ //////////////////////////////////////////////////////////////////////////
+
+ randomise(dst_ref);
+ randomise(dst_tst);
+
+ randomise(src0);
+ randomise(src1);
+
+ randomise(mask, 65);
+
+ Common();
+ }
+}
+
+TEST_P(BlendMask6Test8B, ExtremeValues) {
+ for (int i = 0 ; i < 1000 && !HasFatalFailure(); i++) {
+ //////////////////////////////////////////////////////////////////////////
+ // Randomise
+ //////////////////////////////////////////////////////////////////////////
+
+ randomise(dst_ref, 254, 256);
+ randomise(dst_tst, 254, 256);
+
+ randomise(src0, 254, 256);
+ randomise(src1, 254, 256);
+
+ randomise(mask, 63, 65);
+
+ Common();
+ }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1_C_COMPARE, BlendMask6Test8B,
+ ::testing::Values(make_tuple(&vpx_blend_mask6_c, &vpx_blend_mask6_sse4_1)));
+#endif // HAVE_SSE4_1
+
+#if CONFIG_VP9_HIGHBITDEPTH
+//////////////////////////////////////////////////////////////////////////////
+// High bit-depth version
+//////////////////////////////////////////////////////////////////////////////
+
+typedef void (*FHBD)(uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int suby, int subx, int bd);
+
+class BlendMask6TestHBD : public BlendMask6Test<FHBD, uint16_t> {
+ protected:
+ void Execute(uint16_t *p_src0, uint16_t *p_src1) {
+ ref_func_(CONVERT_TO_BYTEPTR(&dst_ref[0][dst_offset]), dst_stride,
+ CONVERT_TO_BYTEPTR(p_src0 + src0_offset), src0_stride,
+ CONVERT_TO_BYTEPTR(p_src1 + src1_offset), src1_stride,
+ &mask[0][0], sizeof(mask[0]),
+ h, w, suby, subx, bit_depth);
+
+ ASM_REGISTER_STATE_CHECK(
+ tst_func_(CONVERT_TO_BYTEPTR(&dst_tst[0][dst_offset]), dst_stride,
+ CONVERT_TO_BYTEPTR(p_src0 + src0_offset), src0_stride,
+ CONVERT_TO_BYTEPTR(p_src1 + src1_offset), src1_stride,
+ &mask[0][0], sizeof(mask[0]),
+ h, w, suby, subx, bit_depth));
+ }
+
+ int bit_depth;
+};
+
+TEST_P(BlendMask6TestHBD, RandomValues) {
+ for (int i = 0 ; i < 10000 && !HasFatalFailure(); i++) {
+ //////////////////////////////////////////////////////////////////////////
+ // Randomise
+ //////////////////////////////////////////////////////////////////////////
+
+ bit_depth = randomise.choice(8, 10, 12);
+
+ const int hi = 1 << bit_depth;
+
+ randomise(dst_ref, hi);
+ randomise(dst_tst, hi);
+
+ randomise(src0, hi);
+ randomise(src1, hi);
+
+ randomise(mask, 65);
+
+ Common();
+ }
+}
+
+TEST_P(BlendMask6TestHBD, ExtremeValues) {
+ for (int i = 0 ; i < 1000 && !HasFatalFailure(); i++) {
+ //////////////////////////////////////////////////////////////////////////
+ // Randomise
+ //////////////////////////////////////////////////////////////////////////
+
+ bit_depth = randomise.choice(8, 10, 12);
+
+ const int hi = 1 << bit_depth;
+ const int lo = hi - 2;
+
+ randomise(dst_ref, lo, hi);
+ randomise(dst_tst, lo, hi);
+
+ randomise(src0, lo, hi);
+ randomise(src1, lo, hi);
+
+ randomise(mask, 63, 65);
+
+ Common();
+ }
+}
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1_C_COMPARE, BlendMask6TestHBD,
+ ::testing::Values(make_tuple(&vpx_highbd_blend_mask6_c,
+ &vpx_highbd_blend_mask6_sse4_1)));
+#endif // HAVE_SSE4_1
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} // namespace
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index ad861c3..7fb3e37 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -18,6 +18,7 @@
namespace libvpx_test {
const char kVP8Name[] = "WebM Project VP8";
+const char kVP10Name[] = "WebM Project VP10";
vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
vpx_codec_stream_info_t *stream_info) {
@@ -46,6 +47,11 @@
return strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;
}
+bool Decoder::IsVP10() const {
+ const char *codec_name = GetDecoderName();
+ return strncmp(kVP10Name, codec_name, sizeof(kVP10Name) - 1) == 0;
+}
+
void DecoderTest::HandlePeekResult(Decoder *const decoder,
CompressedVideoSource *video,
const vpx_codec_err_t res_peek) {
diff --git a/test/decode_test_driver.h b/test/decode_test_driver.h
index f566c53..1492c5a 100644
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -107,6 +107,8 @@
bool IsVP8() const;
+ bool IsVP10() const;
+
vpx_codec_ctx_t * GetDecoder() {
return &decoder_;
}
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index e24c9bf..f4c4c4b 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -58,8 +58,10 @@
#endif
{
#if CONFIG_VP8_ENCODER
- ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
- << "Unknown Codec Interface";
+ if (CodecInterface() == &vpx_codec_vp8_cx_algo) {
+ ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
+ << "Unknown Codec Interface";
+ }
#endif
}
}
@@ -261,12 +263,6 @@
void EncoderTest::RunLoop(VideoSource *video) {
vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
-#if CONFIG_EXT_TILE
- // Decode all tiles.
- dec_cfg.tile_col = -1;
- dec_cfg.tile_row = -1;
-#endif // CONFIG_EXT_TILE
-
stats_.Reset();
ASSERT_TRUE(passes_ == 1 || passes_ == 2);
@@ -295,6 +291,15 @@
if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
+#if CONFIG_VP10 && CONFIG_EXT_TILE
+ if (decoder->IsVP10()) {
+ // Set dec_cfg.tile_row = -1 and dec_cfg.tile_col = -1 so that the whole
+ // frame is decoded.
+ decoder->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ decoder->Control(VP10_SET_DECODE_TILE_COL, -1);
+ }
+#endif
+
bool again;
for (again = true; again; video->Next()) {
again = (video->img() != NULL);
diff --git a/test/function_equivalence_test.h b/test/function_equivalence_test.h
new file mode 100644
index 0000000..50ad4c5
--- /dev/null
+++ b/test/function_equivalence_test.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_FUNCTION_EQUIVALENCE_TEST_H_
+#define TEST_FUNCTION_EQUIVALENCE_TEST_H_
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/clear_system_state.h"
+#include "test/util.h"
+
+namespace libvpx_test {
+template <typename T>
+class FunctionEquivalenceTest :
+ public ::testing::TestWithParam< std::tr1::tuple< T, T > > {
+ public:
+ virtual ~FunctionEquivalenceTest() {}
+
+ virtual void SetUp() {
+ ref_func_ = std::tr1::get<0>(this->GetParam());
+ tst_func_ = std::tr1::get<1>(this->GetParam());
+ }
+
+ virtual void TearDown() {
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ T ref_func_;
+ T tst_func_;
+};
+
+} // namespace libvpx_test
+#endif // TEST_FUNCTION_EQUIVALENCE_TEST_H_
diff --git a/test/randomise.h b/test/randomise.h
new file mode 100644
index 0000000..fbf419c
--- /dev/null
+++ b/test/randomise.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_RANDOMISE_H_
+#define TEST_RANDOMISE_H_
+
+#include <stdint.h>
+
+#include <limits>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+
+namespace libvpx_test {
+
+// TODO(any): Replace this when built with C++11
+#define STATIC_ASSERT_INTEGER_TYPE_(T) \
+ GTEST_COMPILE_ASSERT_(std::numeric_limits<T>::is_integer, \
+ integer_type_required);
+
+/**
+ * Deterministic random number generator with various convenience methods.
+ */
+class Randomise {
+ public:
+ Randomise() {
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ virtual ~Randomise() { }
+
+ // Uniformly distributed random number from the range
+ // [std::numeric_limits<R>::min(), and std::numeric_limits<R>::max()]
+ template<typename R>
+ R uniform() {
+ STATIC_ASSERT_INTEGER_TYPE_(R);
+ }
+
+ // Uniformly distributed random number from the range
+ // [0, hi)
+ template<typename R, typename H>
+ R uniform(H hi) {
+ assert(hi > 0);
+ R v = uniform<R>();
+ if (std::numeric_limits<R>::is_signed && v < 0)
+ return -v % hi;
+ else
+ return v % hi;
+ }
+
+ // Uniformly distributed random number from the range
+ // [lo, hi)
+ template<typename R, typename L, typename H>
+ R uniform(L lo, H hi) {
+ assert(hi > lo);
+ return uniform<R, H>(hi - lo) + lo;
+ }
+
+ // Randomly pick and return one of the arguments
+ template<typename T>
+ T choice(T v0, T v1) {
+ switch (uniform<int>(2)) {
+ case 0: return v0;
+ default: return v1;
+ }
+ }
+
+ // Randomly pick and return one of the arguments
+ template<typename T>
+ T choice(T v0, T v1, T v2) {
+ switch (uniform<int>(3)) {
+ case 0: return v0;
+ case 1: return v1;
+ default: return v2;
+ }
+ }
+
+ template<typename T>
+ void operator()(T &e) { // NOLINT
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ e = uniform<T>();
+ }
+
+ template<typename T, typename H>
+ void operator()(T &e, H hi) { // NOLINT
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ e = uniform<T, H>(hi);
+ }
+
+ template<typename T, typename L, typename H>
+ void operator()(T &e, L lo, H hi) { // NOLINT
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ e = uniform<T, L, H>(lo, hi);
+ }
+
+ template<typename T, size_t n>
+ void operator()(T (&arr)[n]) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ arr[i] = uniform<T>();
+ }
+ }
+
+ template<typename T, size_t n, typename H>
+ void operator()(T (&arr)[n], H hi) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ arr[i] = uniform<T, H>(hi);
+ }
+ }
+
+ template<typename T, size_t n, typename L, typename H>
+ void operator()(T (&arr)[n], L lo, H hi) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ arr[i] = uniform<T, L, H>(lo, hi);
+ }
+ }
+
+ template<typename T, size_t n, size_t m>
+ void operator()(T (&arr)[n][m]) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ for (size_t j = 0; j < m ; j++) {
+ arr[i][j] = uniform<T>();
+ }
+ }
+ }
+
+ template<typename T, size_t n, size_t m, typename H>
+ void operator()(T (&arr)[n][m], H hi) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ for (size_t j = 0; j < m ; j++) {
+ arr[i][j] = uniform<T, H>(hi);
+ }
+ }
+ }
+
+ template<typename T, size_t n, size_t m, typename L, typename H>
+ void operator()(T (&arr)[n][m], L lo, H hi) {
+ STATIC_ASSERT_INTEGER_TYPE_(T);
+ for (size_t i = 0; i < n ; i++) {
+ for (size_t j = 0; j < m ; j++) {
+ arr[i][j] = uniform<T, L, H>(lo, hi);
+ }
+ }
+ }
+
+ private:
+ libvpx_test::ACMRandom rnd_;
+};
+
+// Add further specialisations as necessary
+
+template<>
+bool Randomise::uniform<bool>() {
+ return rnd_.Rand8() & 1 ? true : false;
+}
+
+template<>
+uint8_t Randomise::uniform<uint8_t>() {
+ return rnd_.Rand8();
+}
+
+template<>
+uint16_t Randomise::uniform<uint16_t>() {
+ return rnd_.Rand16();
+}
+
+template<>
+uint32_t Randomise::uniform<uint32_t>() {
+ const uint32_t l = uniform<uint16_t>();
+ const uint32_t h = uniform<uint16_t>();
+ return h << 16 | l;
+}
+
+template<>
+uint64_t Randomise::uniform<uint64_t>() {
+ const uint64_t l = uniform<uint32_t>();
+ const uint64_t h = uniform<uint32_t>();
+ return h << 32 | l;
+}
+
+template<>
+int8_t Randomise::uniform<int8_t>() { return uniform<uint8_t>(); }
+
+template<>
+int16_t Randomise::uniform<int16_t>() { return uniform<uint16_t>(); }
+
+template<>
+int32_t Randomise::uniform<int32_t>() { return uniform<uint32_t>(); }
+
+template<>
+int64_t Randomise::uniform<int64_t>() { return uniform<uint64_t>(); }
+
+} // namespace libvpx_test
+
+#endif // TEST_RANDOMISE_H_
diff --git a/test/snapshot.h b/test/snapshot.h
new file mode 100644
index 0000000..b67edde
--- /dev/null
+++ b/test/snapshot.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef TEST_SNAPSHOT_H_
+#define TEST_SNAPSHOT_H_
+
+#include <map>
+
+namespace libvpx_test {
+
+/**
+ * Allows capturing and retrieving snapshots of arbitrary blobs of memory,
+ * blob size is based on compile time type information.
+ *
+ * Usage:
+ * void example() {
+ * Snapshot snapshot;
+ *
+ * int foo = 4;
+ *
+ * snapshot(foo);
+ *
+ * foo = 10;
+ *
+ * assert(snapshot.get(foo) == 4); // Pass
+ * assert(snapshot.get(foo) == foo); // Fail (4 != 10)
+ *
+ * char bar[10][10];
+ * memset(bar, 3, sizeof(bar));
+ *
+ * snapshot(bar);
+ *
+ * memset(bar, 8, sizeof(bar));
+ *
+ * assert(sum(bar) == 800); // Pass
+ * assert(sum(snapshot.get(bar)) == 300); // Pass
+ * }
+ */
+class Snapshot {
+ public:
+ virtual ~Snapshot() {
+ for (snapshot_map_t::iterator it = snapshots_.begin();
+ it != snapshots_.end(); it++) {
+ delete[] it->second;
+ }
+ }
+
+ /**
+ * Take new snapshot for object
+ */
+ template<typename E>
+ void take(const E &e) {
+ const void *const key = reinterpret_cast<const void*>(&e);
+
+ snapshot_map_t::iterator it = snapshots_.find(key);
+
+ if (it != snapshots_.end())
+ delete[] it->second;
+
+ char *const buf = new char[sizeof(E)];
+
+ memcpy(buf, &e, sizeof(E));
+
+ snapshots_[key] = buf;
+ }
+
+ /**
+ * Same as 'take'
+ */
+ template<typename E>
+ void operator()(const E &e) {
+ take(e);
+ }
+
+ /**
+ * Retrieve last snapshot for object
+ */
+ template<typename E>
+ const E& get(const E &e) const {
+ const void *const key = reinterpret_cast<const void*>(&e);
+
+ snapshot_map_t::const_iterator it = snapshots_.find(key);
+
+ assert(it != snapshots_.end());
+
+ return *reinterpret_cast<const E*>(it->second);
+ }
+
+ private:
+ typedef std::map<const void*, const char*> snapshot_map_t;
+
+ snapshot_map_t snapshots_;
+};
+
+} // namespace libvpx_test
+
+#endif // TEST_SNAPSHOT_H_
diff --git a/test/svc_test.cc b/test/svc_test.cc
index e573e10..1ad17be 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -61,12 +61,14 @@
codec_enc_.kf_max_dist = 100;
vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
-#if CONFIG_EXT_TILE
- dec_cfg.tile_col = -1;
- dec_cfg.tile_row = -1;
-#endif // CONFIG_EXT_TILE
VP9CodecFactory codec_factory;
decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
+#if CONFIG_VP10 && CONFIG_EXT_TILE
+ if (decoder_->IsVP10()) {
+ decoder_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ decoder_->Control(VP10_SET_DECODE_TILE_COL, -1);
+ }
+#endif
tile_columns_ = 0;
tile_rows_ = 0;
diff --git a/test/test.mk b/test/test.mk
index 8682a88..8eda2dd 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -174,6 +174,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_fht16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ANS) += vp10_ans_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_EXT_TILE) += vp10_ext_tile_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += sum_squares_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += subtract_test.cc
@@ -181,7 +182,12 @@
ifeq ($(CONFIG_EXT_INTER),yes)
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_variance_test.cc
LIBVPX_TEST_SRCS-$(HAVE_SSSE3) += masked_sad_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += blend_mask6_test.cc
endif
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+LIBVPX_TEST_SRCS-$(HAVE_SSE4_1) += vp10_highbd_iht_test.cc
+endif # CONFIG_VP9_HIGHBITDEPTH
endif # VP10
## Multi-codec / unconditional whitebox tests.
@@ -193,6 +199,7 @@
endif
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index 9a049bf..dc31d06 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -35,13 +35,18 @@
cfg.w = 704;
cfg.h = 144;
cfg.threads = 1;
-#if CONFIG_EXT_TILE
- cfg.tile_col = -1;
- cfg.tile_row = -1;
-#endif // CONFIG_EXT_TILE
fw_dec_ = codec_->CreateDecoder(cfg, 0);
inv_dec_ = codec_->CreateDecoder(cfg, 0);
inv_dec_->Control(VP9_INVERT_TILE_DECODE_ORDER, 1);
+
+#if CONFIG_VP10 && CONFIG_EXT_TILE
+ if (fw_dec_->IsVP10() && inv_dec_->IsVP10()) {
+ fw_dec_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ fw_dec_->Control(VP10_SET_DECODE_TILE_COL, -1);
+ inv_dec_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ inv_dec_->Control(VP10_SET_DECODE_TILE_COL, -1);
+ }
+#endif
}
virtual ~TileIndependenceTest() {
diff --git a/test/vp10_ext_tile_test.cc b/test/vp10_ext_tile_test.cc
new file mode 100644
index 0000000..ad04eeb
--- /dev/null
+++ b/test/vp10_ext_tile_test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+
+namespace {
+// The number of frames to be encoded/decoded
+const int kLimit = 8;
+// Skip 1 frame to check the frame decoding independency.
+const int kSkip = 5;
+const int kTileSize = 1;
+const int kTIleSizeInPixels = (kTileSize << 6);
+// Fake width and height so that they can be multiples of the tile size.
+const int kImgWidth = 704;
+const int kImgHeight = 576;
+
+class VP10ExtTileTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+ VP10ExtTileTest()
+ : EncoderTest(GET_PARAM(0)),
+ encoding_mode_(GET_PARAM(1)),
+ set_cpu_used_(GET_PARAM(2)) {
+ init_flags_ = VPX_CODEC_USE_PSNR;
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+ cfg.w = kImgWidth;
+ cfg.h = kImgHeight;
+
+ decoder_ = codec_->CreateDecoder(cfg, 0);
+ decoder_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ decoder_->Control(VP10_SET_DECODE_TILE_COL, -1);
+
+ // Allocate buffer to store tile image.
+ vpx_img_alloc(&tile_img_, VPX_IMG_FMT_I420, kImgWidth, kImgHeight, 32);
+
+ md5_.clear();
+ tile_md5_.clear();
+ }
+
+ virtual ~VP10ExtTileTest() {
+ vpx_img_free(&tile_img_);
+ delete decoder_;
+ }
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(encoding_mode_);
+
+ cfg_.g_lag_in_frames = 0;
+ cfg_.rc_end_usage = VPX_VBR;
+ cfg_.g_error_resilient = 1;
+
+ cfg_.rc_max_quantizer = 56;
+ cfg_.rc_min_quantizer = 0;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ // Encode setting
+ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
+ encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+
+ // The tile size is 64x64.
+ encoder->Control(VP9E_SET_TILE_COLUMNS, kTileSize);
+ encoder->Control(VP9E_SET_TILE_ROWS, kTileSize);
+#if CONFIG_EXT_PARTITION
+ // Always use 64x64 max partition.
+ encoder->Control(VP10E_SET_SUPERBLOCK_SIZE, VPX_SUPERBLOCK_SIZE_64X64);
+#endif
+ }
+
+ if (video->frame() == 1) {
+ frame_flags_ = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ }
+ }
+
+ virtual void DecompressedFrameHook(const vpx_image_t &img,
+ vpx_codec_pts_t pts) {
+ // Skip 1 already decoded frame to be consistent with the decoder in this
+ // test.
+ if (pts == (vpx_codec_pts_t)kSkip)
+ return;
+
+ // Calculate MD5 as the reference.
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&img);
+ md5_.push_back(md5_res.Get());
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ // Skip decoding 1 frame.
+ if (pkt->data.frame.pts == (vpx_codec_pts_t)kSkip)
+ return;
+
+ bool IsLastFrame = (pkt->data.frame.pts == (vpx_codec_pts_t)(kLimit - 1));
+
+ // Decode the first (kLimit - 1) frames as whole frame, and decode the last
+ // frame in single tiles.
+ for (int r = 0; r < kImgHeight / kTIleSizeInPixels; ++r) {
+ for (int c = 0; c < kImgWidth / kTIleSizeInPixels; ++c) {
+ if (!IsLastFrame) {
+ decoder_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ decoder_->Control(VP10_SET_DECODE_TILE_COL, -1);
+ } else {
+ decoder_->Control(VP10_SET_DECODE_TILE_ROW, r);
+ decoder_->Control(VP10_SET_DECODE_TILE_COL, c);
+ }
+
+ const vpx_codec_err_t res = decoder_->DecodeFrame(
+ reinterpret_cast<uint8_t*>(pkt->data.frame.buf),
+ pkt->data.frame.sz);
+ if (res != VPX_CODEC_OK) {
+ abort_ = true;
+ ASSERT_EQ(VPX_CODEC_OK, res);
+ }
+ const vpx_image_t *img = decoder_->GetDxData().Next();
+
+ if (!IsLastFrame) {
+ if (img) {
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(img);
+ tile_md5_.push_back(md5_res.Get());
+ }
+ break;
+ }
+
+ const int kMaxMBPlane = 3;
+ for (int plane = 0; plane < kMaxMBPlane; ++plane) {
+ const int shift = (plane == 0) ? 0 : 1;
+ int tile_height = kTIleSizeInPixels >> shift;
+ int tile_width = kTIleSizeInPixels >> shift;
+
+ for (int tr = 0; tr < tile_height; ++tr) {
+ memcpy(tile_img_.planes[plane] +
+ tile_img_.stride[plane] * (r * tile_height + tr) +
+ c * tile_width,
+ img->planes[plane] + img->stride[plane] * tr, tile_width);
+ }
+ }
+ }
+
+ if (!IsLastFrame)
+ break;
+ }
+
+ if (IsLastFrame) {
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&tile_img_);
+ tile_md5_.push_back(md5_res.Get());
+ }
+ }
+
+ ::libvpx_test::TestMode encoding_mode_;
+ int set_cpu_used_;
+ ::libvpx_test::Decoder *decoder_;
+ vpx_image_t tile_img_;
+ std::vector<std::string> md5_;
+ std::vector<std::string> tile_md5_;
+};
+
+TEST_P(VP10ExtTileTest, DecoderResultTest) {
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv",
+ kImgWidth, kImgHeight, 30, 1, 0, kLimit);
+ cfg_.rc_target_bitrate = 500;
+ cfg_.g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_threads = 1;
+
+ // Tile encoding
+ init_flags_ = VPX_CODEC_USE_PSNR;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ // Compare to check if two vectors are equal.
+ ASSERT_EQ(md5_, tile_md5_);
+}
+
+VP10_INSTANTIATE_TEST_CASE(
+ // Now only test 2-pass mode.
+ VP10ExtTileTest,
+ ::testing::Values(::libvpx_test::kTwoPassGood),
+ ::testing::Range(0, 4));
+} // namespace
diff --git a/test/vp10_fht16x16_test.cc b/test/vp10_fht16x16_test.cc
index 8833250..deccc81 100644
--- a/test/vp10_fht16x16_test.cc
+++ b/test/vp10_fht16x16_test.cc
@@ -132,7 +132,7 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
int i, j;
const int stride = 16;
- const int num_tests = 200000;
+ const int num_tests = 1000;
for (i = 0; i < num_tests; ++i) {
for (j = 0; j < num_coeffs_; ++j) {
@@ -207,7 +207,19 @@
make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 2, 10),
make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 2, 12),
make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 3, 10),
- make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 3, 12)
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 4, 10),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 4, 12),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 5, 10),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 5, 12),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 6, 10),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 6, 12),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 7, 10),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 7, 12),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 8, 10),
+ make_tuple(&vp10_fwd_txfm2d_16x16_sse4_1, 8, 12),
+#endif // CONFIG_EXT_TX
};
INSTANTIATE_TEST_CASE_P(
SSE4_1, VP10HighbdTrans16x16HT,
diff --git a/test/vp10_fht4x4_test.cc b/test/vp10_fht4x4_test.cc
index 5b81095..c5a4382 100644
--- a/test/vp10_fht4x4_test.cc
+++ b/test/vp10_fht4x4_test.cc
@@ -38,8 +38,10 @@
typedef void (*IhighbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
int tx_type, int bd);
typedef void (*HBDFhtFunc)(const int16_t *input, int32_t *output, int stride,
- int tx_type, int bd);
-// Target optimized function, tx_type, bit depth
+ int tx_type, int bd);
+
+// HighbdHt4x4Param argument list:
+// <Target optimized function, tx_type, bit depth>
typedef tuple<HBDFhtFunc, int, int> HighbdHt4x4Param;
void highbe_fht4x4_ref(const int16_t *in, int32_t *out, int stride,
@@ -96,12 +98,12 @@
mask_ = (1 << bit_depth_) - 1;
num_coeffs_ = 16;
- input_ = reinterpret_cast<int16_t *>
- (vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
- output_ = reinterpret_cast<int32_t *>
- (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
- output_ref_ = reinterpret_cast<int32_t *>
- (vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
+ input_ = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, sizeof(int16_t) * num_coeffs_));
+ output_ = reinterpret_cast<int32_t *>(
+ vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
+ output_ref_ = reinterpret_cast<int32_t *>(
+ vpx_memalign(16, sizeof(int32_t) * num_coeffs_));
}
virtual void TearDown() {
@@ -130,7 +132,7 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
int i, j;
const int stride = 4;
- const int num_tests = 200000;
+ const int num_tests = 1000;
const int num_coeffs = 16;
for (i = 0; i < num_tests; ++i) {
@@ -197,9 +199,7 @@
#endif // HAVE_SSE2
#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
- SSE4_1, VP10HighbdTrans4x4HT,
- ::testing::Values(
+const HighbdHt4x4Param kArrayHighbdHt4x4Param[] = {
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 10),
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 0, 12),
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 1, 10),
@@ -207,7 +207,25 @@
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 10),
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 2, 12),
make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 10),
- make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12)));
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 4, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 4, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 5, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 5, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 6, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 6, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 7, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 7, 12),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 8, 10),
+ make_tuple(&vp10_fwd_txfm2d_4x4_sse4_1, 8, 12),
+#endif // CONFIG_EXT_TX
+};
+
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VP10HighbdTrans4x4HT,
+ ::testing::ValuesIn(kArrayHighbdHt4x4Param));
+
#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/test/vp10_fht8x8_test.cc b/test/vp10_fht8x8_test.cc
index aadd77d..da278c4 100644
--- a/test/vp10_fht8x8_test.cc
+++ b/test/vp10_fht8x8_test.cc
@@ -131,7 +131,7 @@
ACMRandom rnd(ACMRandom::DeterministicSeed());
int i, j;
const int stride = 8;
- const int num_tests = 200000;
+ const int num_tests = 1000;
const int num_coeffs = 64;
for (i = 0; i < num_tests; ++i) {
@@ -207,7 +207,19 @@
make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 2, 10),
make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 2, 12),
make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 3, 10),
- make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 3, 12)
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 3, 12),
+#if CONFIG_EXT_TX
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 4, 10),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 4, 12),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 5, 10),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 5, 12),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 6, 10),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 6, 12),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 7, 10),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 7, 12),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 8, 10),
+ make_tuple(&vp10_fwd_txfm2d_8x8_sse4_1, 8, 12),
+#endif // CONFIG_EXT_TX
};
INSTANTIATE_TEST_CASE_P(
SSE4_1, VP10HighbdTrans8x8HT,
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
index 8e90dc2..0e35782 100644
--- a/test/vp10_fwd_txfm2d_test.cc
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -13,8 +13,9 @@
#include <stdlib.h>
#include "test/acm_random.h"
+#include "test/util.h"
#include "test/vp10_txfm_test.h"
-#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "vp10/common/vp10_txfm.h"
#include "./vp10_rtcd.h"
using libvpx_test::ACMRandom;
@@ -23,95 +24,156 @@
using libvpx_test::compute_avg_abs_error;
using libvpx_test::Fwd_Txfm2d_Func;
using libvpx_test::TYPE_TXFM;
-using libvpx_test::TYPE_DCT;
-using libvpx_test::TYPE_ADST;
namespace {
-
#if CONFIG_VP9_HIGHBITDEPTH
-const int txfm_size_num = 5;
-const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
-const TXFM_2D_CFG* fwd_txfm_cfg_ls[5][4] = {
- {&fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_adst_4,
- &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_dct_4},
- {&fwd_txfm_2d_cfg_dct_dct_8, &fwd_txfm_2d_cfg_dct_adst_8,
- &fwd_txfm_2d_cfg_adst_adst_8, &fwd_txfm_2d_cfg_adst_dct_8},
- {&fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_adst_16,
- &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_dct_16},
- {&fwd_txfm_2d_cfg_dct_dct_32, &fwd_txfm_2d_cfg_dct_adst_32,
- &fwd_txfm_2d_cfg_adst_adst_32, &fwd_txfm_2d_cfg_adst_dct_32},
- {&fwd_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tr1::tuple<TX_TYPE, TX_SIZE, double, double> VP10FwdTxfm2dParam;
-const Fwd_Txfm2d_Func fwd_txfm_func_ls[5] = {
- vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c,
- vp10_fwd_txfm2d_32x32_c, vp10_fwd_txfm2d_64x64_c};
+class VP10FwdTxfm2d : public ::testing::TestWithParam<VP10FwdTxfm2dParam> {
+ public:
+ virtual void SetUp() {
+ tx_type_ = GET_PARAM(0);
+ tx_size_ = GET_PARAM(1);
+ max_error_ = GET_PARAM(2);
+ max_avg_error_ = GET_PARAM(3);
+ count_ = 500;
+ TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg =
+ vp10_get_fwd_txfm_cfg(tx_type_, tx_size_);
+ const TXFM_2D_CFG *fwd_txfm_cfg = fwd_txfm_flip_cfg.cfg;
+ int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
+ fwd_txfm_cfg->shift[2];
+ ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
+ lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
+ amplify_factor_ =
+ amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
-const int txfm_type_num = 4;
-const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST};
-const TYPE_TXFM type_ls_1[4] = {TYPE_DCT, TYPE_ADST, TYPE_ADST, TYPE_DCT};
+ fwd_txfm_ = libvpx_test::fwd_txfm_func_ls[tx_size_];
+ txfm1d_size_ = libvpx_test::get_txfm1d_size(tx_size_);
+ txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
+ get_txfm1d_type(tx_type_, &type0_, &type1_);
+ input_ = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * txfm2d_size_));
+ output_ = reinterpret_cast<int32_t *>
+ (vpx_memalign(16, sizeof(int32_t) * txfm2d_size_));
+ ref_input_ = reinterpret_cast<double *>
+ (vpx_memalign(16, sizeof(double) * txfm2d_size_));
+ ref_output_ = reinterpret_cast<double *>
+ (vpx_memalign(16, sizeof(double) * txfm2d_size_));
+ }
-TEST(vp10_fwd_txfm2d, accuracy) {
- for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
- int txfm_size = txfm_size_ls[txfm_size_idx];
- int sqr_txfm_size = txfm_size * txfm_size;
- int16_t* input = new int16_t[sqr_txfm_size];
- int32_t* output = new int32_t[sqr_txfm_size];
- double* ref_input = new double[sqr_txfm_size];
- double* ref_output = new double[sqr_txfm_size];
-
- for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
- ++txfm_type_idx) {
- const TXFM_2D_CFG* fwd_txfm_cfg =
- fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- if (fwd_txfm_cfg != NULL) {
- Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
- TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
- TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
- int amplify_bit = fwd_txfm_cfg->shift[0] + fwd_txfm_cfg->shift[1] +
- fwd_txfm_cfg->shift[2];
- double amplify_factor =
- amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
- int tx_type = libvpx_test::get_tx_type(fwd_txfm_cfg);
-
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- int count = 500;
- double avg_abs_error = 0;
- for (int ci = 0; ci < count; ci++) {
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- input[ni] = rnd.Rand16() % input_base;
- ref_input[ni] = static_cast<double>(input[ni]);
- output[ni] = 0;
- ref_output[ni] = 0;
- }
-
- fwd_txfm_func(input, output, txfm_size, tx_type, bd);
- reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
-
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- ref_output[ni] = round(ref_output[ni] * amplify_factor);
- EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 70);
- }
- avg_abs_error += compute_avg_abs_error<int32_t, double>(
- output, ref_output, sqr_txfm_size);
- }
-
- avg_abs_error /= amplify_factor;
- avg_abs_error /= count;
- // max_abs_avg_error comes from upper bound of avg_abs_error
- // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
- // %f\n",
- // type0, type1, txfm_size, avg_abs_error);
- double max_abs_avg_error = 7;
- EXPECT_LE(avg_abs_error, max_abs_avg_error);
+ void RunFwdAccuracyCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ double avg_abs_error = 0;
+ for (int ci = 0; ci < count_; ci++) {
+ for (int ni = 0; ni < txfm2d_size_; ++ni) {
+ input_[ni] = rnd.Rand16() % input_base;
+ ref_input_[ni] = static_cast<double>(input_[ni]);
+ output_[ni] = 0;
+ ref_output_[ni] = 0;
}
+
+ fwd_txfm_(input_, output_, txfm1d_size_, tx_type_, bd);
+
+ if (lr_flip_ && ud_flip_)
+ libvpx_test::fliplrud(ref_input_, txfm1d_size_, txfm1d_size_);
+ else if (lr_flip_)
+ libvpx_test::fliplr(ref_input_, txfm1d_size_, txfm1d_size_);
+ else if (ud_flip_)
+ libvpx_test::flipud(ref_input_, txfm1d_size_, txfm1d_size_);
+
+ reference_hybrid_2d(ref_input_, ref_output_, txfm1d_size_,
+ type0_, type1_);
+
+ for (int ni = 0; ni < txfm2d_size_; ++ni) {
+ ref_output_[ni] = round(ref_output_[ni] * amplify_factor_);
+ EXPECT_GE(max_error_,
+ fabs(output_[ni] - ref_output_[ni]) / amplify_factor_);
+ }
+ avg_abs_error += compute_avg_abs_error<int32_t, double>(
+ output_, ref_output_, txfm2d_size_);
}
- delete[] input;
- delete[] output;
- delete[] ref_input;
- delete[] ref_output;
+ avg_abs_error /= amplify_factor_;
+ avg_abs_error /= count_;
+ // max_abs_avg_error comes from upper bound of avg_abs_error
+ // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+ // %f\n", type0_, type1_, txfm1d_size_, avg_abs_error);
+ EXPECT_GE(max_avg_error_, avg_abs_error);
}
+
+ virtual void TearDown() {
+ vpx_free(input_);
+ vpx_free(output_);
+ vpx_free(ref_input_);
+ vpx_free(ref_output_);
+ }
+
+ private:
+ double max_error_;
+ double max_avg_error_;
+ int count_;
+ double amplify_factor_;
+ TX_TYPE tx_type_;
+ TX_SIZE tx_size_;
+ int txfm1d_size_;
+ int txfm2d_size_;
+ Fwd_Txfm2d_Func fwd_txfm_;
+ TYPE_TXFM type0_;
+ TYPE_TXFM type1_;
+ int16_t* input_;
+ int32_t* output_;
+ double* ref_input_;
+ double* ref_output_;
+ int ud_flip_; // flip upside down
+ int lr_flip_; // flip left to right
+};
+
+TEST_P(VP10FwdTxfm2d, RunFwdAccuracyCheck) {
+ RunFwdAccuracyCheck();
}
+
+INSTANTIATE_TEST_CASE_P(
+ C, VP10FwdTxfm2d,
+ ::testing::Values(
+#if CONFIG_EXT_TX
+ VP10FwdTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(FLIPADST_DCT, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(DCT_FLIPADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(ADST_FLIPADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(FLIPADST_ADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(FLIPADST_DCT, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(DCT_FLIPADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(ADST_FLIPADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(FLIPADST_ADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(FLIPADST_DCT, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(DCT_FLIPADST, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(ADST_FLIPADST, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(FLIPADST_ADST, TX_32X32, 70, 7),
+#endif
+ VP10FwdTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.2),
+ VP10FwdTxfm2dParam(DCT_DCT, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(ADST_DCT, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(DCT_ADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(ADST_ADST, TX_8X8, 5, 0.6),
+ VP10FwdTxfm2dParam(DCT_DCT, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(ADST_DCT, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(DCT_ADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(ADST_ADST, TX_16X16, 11, 1.5),
+ VP10FwdTxfm2dParam(DCT_DCT, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(ADST_DCT, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(DCT_ADST, TX_32X32, 70, 7),
+ VP10FwdTxfm2dParam(ADST_ADST, TX_32X32, 70, 7)));
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/test/vp10_highbd_iht_test.cc b/test/vp10_highbd_iht_test.cc
new file mode 100644
index 0000000..0b7597d
--- /dev/null
+++ b/test/vp10_highbd_iht_test.cc
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp10_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_ports/mem.h"
+
+namespace {
+
+using std::tr1::tuple;
+using libvpx_test::ACMRandom;
+
+typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd);
+
+typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
+ int tx_type, int bd);
+
+// Test parameter argument list:
+// <transform reference function,
+// optimized inverse transform function,
+// inverse transform reference function,
+// num_coeffs,
+// tx_type,
+// bit_depth>
+typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, int, int> IHbdHtParam;
+
+class VP10HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
+ public:
+ virtual ~VP10HighbdInvHTNxN() {}
+
+ virtual void SetUp() {
+ txfm_ref_ = GET_PARAM(0);
+ inv_txfm_ = GET_PARAM(1);
+ inv_txfm_ref_ = GET_PARAM(2);
+ num_coeffs_ = GET_PARAM(3);
+ tx_type_ = GET_PARAM(4);
+ bit_depth_ = GET_PARAM(5);
+
+ input_ = reinterpret_cast<int16_t *>(
+ vpx_memalign(16, sizeof(input_[0]) * num_coeffs_));
+
+ // Note:
+ // Inverse transform input buffer is 32-byte aligned
+ // Refer to <root>/vp10/encoder/context_tree.c, function,
+ // void alloc_mode_context().
+ coeffs_ = reinterpret_cast<int32_t *>(
+ vpx_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
+ output_ = reinterpret_cast<uint16_t *>(
+ vpx_memalign(32, sizeof(output_[0]) * num_coeffs_));
+ output_ref_ = reinterpret_cast<uint16_t *>(
+ vpx_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
+ }
+
+ virtual void TearDown() {
+ vpx_free(input_);
+ vpx_free(coeffs_);
+ vpx_free(output_);
+ vpx_free(output_ref_);
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ void RunBitexactCheck();
+
+ private:
+ int GetStride() const {
+ if (16 == num_coeffs_) {
+ return 4;
+ } else if (64 == num_coeffs_) {
+ return 8;
+ } else if (256 == num_coeffs_) {
+ return 16;
+ } else {
+ return 0;
+ }
+ }
+
+ HbdHtFunc txfm_ref_;
+ IHbdHtFunc inv_txfm_;
+ IHbdHtFunc inv_txfm_ref_;
+ int num_coeffs_;
+ int tx_type_;
+ int bit_depth_;
+
+ int16_t *input_;
+ int32_t *coeffs_;
+ uint16_t *output_;
+ uint16_t *output_ref_;
+};
+
+void VP10HighbdInvHTNxN::RunBitexactCheck() {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int stride = GetStride();
+ const int num_tests = 20000;
+ const uint16_t mask = (1 << bit_depth_) - 1;
+
+ for (int i = 0; i < num_tests; ++i) {
+ for (int j = 0; j < num_coeffs_; ++j) {
+ input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
+ output_ref_[j] = rnd.Rand16() & mask;
+ output_[j] = output_ref_[j];
+ }
+
+ txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
+ inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
+ ASM_REGISTER_STATE_CHECK(inv_txfm_(coeffs_, output_, stride, tx_type_,
+ bit_depth_));
+
+ for (int j = 0; j < num_coeffs_; ++j) {
+ EXPECT_EQ(output_ref_[j], output_[j])
+ << "Not bit-exact result at index: " << j
+ << " At test block: " << i;
+ }
+ }
+}
+
+TEST_P(VP10HighbdInvHTNxN, InvTransResultCheck) {
+ RunBitexactCheck();
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+#define PARAM_LIST_4X4 &vp10_fwd_txfm2d_4x4_c, \
+ &vp10_inv_txfm2d_add_4x4_sse4_1, \
+ &vp10_inv_txfm2d_add_4x4_c, 16
+
+#define PARAM_LIST_8X8 &vp10_fwd_txfm2d_8x8_c, \
+ &vp10_inv_txfm2d_add_8x8_sse4_1, \
+ &vp10_inv_txfm2d_add_8x8_c, 64
+
+#define PARAM_LIST_16X16 &vp10_fwd_txfm2d_16x16_c, \
+ &vp10_inv_txfm2d_add_16x16_sse4_1, \
+ &vp10_inv_txfm2d_add_16x16_c, 256
+
+const IHbdHtParam kArrayIhtParam[] = {
+ // 16x16
+ make_tuple(PARAM_LIST_16X16, 0, 10),
+ make_tuple(PARAM_LIST_16X16, 0, 12),
+ make_tuple(PARAM_LIST_16X16, 1, 10),
+ make_tuple(PARAM_LIST_16X16, 1, 12),
+ make_tuple(PARAM_LIST_16X16, 2, 10),
+ make_tuple(PARAM_LIST_16X16, 2, 12),
+ make_tuple(PARAM_LIST_16X16, 3, 10),
+ make_tuple(PARAM_LIST_16X16, 3, 12),
+ // 8x8
+ make_tuple(PARAM_LIST_8X8, 0, 10),
+ make_tuple(PARAM_LIST_8X8, 0, 12),
+ make_tuple(PARAM_LIST_8X8, 1, 10),
+ make_tuple(PARAM_LIST_8X8, 1, 12),
+ make_tuple(PARAM_LIST_8X8, 2, 10),
+ make_tuple(PARAM_LIST_8X8, 2, 12),
+ make_tuple(PARAM_LIST_8X8, 3, 10),
+ make_tuple(PARAM_LIST_8X8, 3, 12),
+ // 4x4
+ make_tuple(PARAM_LIST_4X4, 0, 10),
+ make_tuple(PARAM_LIST_4X4, 0, 12),
+ make_tuple(PARAM_LIST_4X4, 1, 10),
+ make_tuple(PARAM_LIST_4X4, 1, 12),
+ make_tuple(PARAM_LIST_4X4, 2, 10),
+ make_tuple(PARAM_LIST_4X4, 2, 12),
+ make_tuple(PARAM_LIST_4X4, 3, 10),
+ make_tuple(PARAM_LIST_4X4, 3, 12),
+};
+
+INSTANTIATE_TEST_CASE_P(
+ SSE4_1, VP10HighbdInvHTNxN,
+ ::testing::ValuesIn(kArrayIhtParam));
+#endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
+
+} // namespace
diff --git a/test/vp10_inv_txfm2d_test.cc b/test/vp10_inv_txfm2d_test.cc
index 80ac78b..fef4629 100644
--- a/test/vp10_inv_txfm2d_test.cc
+++ b/test/vp10_inv_txfm2d_test.cc
@@ -14,8 +14,8 @@
#include "./vp10_rtcd.h"
#include "test/acm_random.h"
+#include "test/util.h"
#include "test/vp10_txfm_test.h"
-#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/common/vp10_inv_txfm2d_cfg.h"
using libvpx_test::ACMRandom;
@@ -28,86 +28,131 @@
namespace {
#if CONFIG_VP9_HIGHBITDEPTH
-const int txfm_size_num = 5;
-const int txfm_size_ls[5] = {4, 8, 16, 32, 64};
-const int txfm_type[4] = {DCT_DCT, DCT_ADST, ADST_ADST, ADST_DCT};
-const TXFM_2D_CFG* inv_txfm_cfg_ls[5][4] = {
- {&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_adst_4,
- &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_dct_4},
- {&inv_txfm_2d_cfg_dct_dct_8, &inv_txfm_2d_cfg_dct_adst_8,
- &inv_txfm_2d_cfg_adst_adst_8, &inv_txfm_2d_cfg_adst_dct_8},
- {&inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_adst_16,
- &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_dct_16},
- {&inv_txfm_2d_cfg_dct_dct_32, &inv_txfm_2d_cfg_dct_adst_32,
- &inv_txfm_2d_cfg_adst_adst_32, &inv_txfm_2d_cfg_adst_dct_32},
- {&inv_txfm_2d_cfg_dct_dct_64, NULL, NULL, NULL}};
+// VP10InvTxfm2dParam argument list:
+// tx_type_, tx_size_, max_error_, max_avg_error_
+typedef std::tr1::tuple<TX_TYPE, TX_SIZE, double, double> VP10InvTxfm2dParam;
-const Fwd_Txfm2d_Func fwd_txfm_func_ls[5] = {
- vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c,
- vp10_fwd_txfm2d_32x32_c, vp10_fwd_txfm2d_64x64_c};
-const Inv_Txfm2d_Func inv_txfm_func_ls[5] = {
- vp10_inv_txfm2d_add_4x4_c, vp10_inv_txfm2d_add_8x8_c,
- vp10_inv_txfm2d_add_16x16_c, vp10_inv_txfm2d_add_32x32_c,
- vp10_inv_txfm2d_add_64x64_c};
+class VP10InvTxfm2d : public ::testing::TestWithParam<VP10InvTxfm2dParam> {
+ public:
+ virtual void SetUp() {
+ tx_type_ = GET_PARAM(0);
+ tx_size_ = GET_PARAM(1);
+ max_error_ = GET_PARAM(2);
+ max_avg_error_ = GET_PARAM(3);
+ txfm1d_size_ = libvpx_test::get_txfm1d_size(tx_size_);
+ txfm2d_size_ = txfm1d_size_ * txfm1d_size_;
+ count_ = 500;
+ input_ = new int16_t[txfm2d_size_];
+ ref_input_ = new uint16_t[txfm2d_size_];
+ output_ = new int32_t[txfm2d_size_];
-const int txfm_type_num = 4;
+ input_ = reinterpret_cast<int16_t *>
+ (vpx_memalign(16, sizeof(int16_t) * txfm2d_size_));
+ ref_input_ = reinterpret_cast<uint16_t *>
+ (vpx_memalign(16, sizeof(uint16_t) * txfm2d_size_));
+ output_ = reinterpret_cast<int32_t *>
+ (vpx_memalign(16, sizeof(int32_t) * txfm2d_size_));
+ }
-TEST(vp10_inv_txfm2d, round_trip) {
- for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
- const int txfm_size = txfm_size_ls[txfm_size_idx];
- const int sqr_txfm_size = txfm_size * txfm_size;
- int16_t* input = new int16_t[sqr_txfm_size];
- uint16_t* ref_input = new uint16_t[sqr_txfm_size];
- int32_t* output = new int32_t[sqr_txfm_size];
-
- for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
- ++txfm_type_idx) {
- const TXFM_2D_CFG* inv_txfm_cfg =
- inv_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
- if (inv_txfm_cfg != NULL) {
- int tx_type = txfm_type[txfm_type_idx];
- const Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
- const Inv_Txfm2d_Func inv_txfm_func = inv_txfm_func_ls[txfm_size_idx];
- const int count = 1000;
- double avg_abs_error = 0;
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int ci = 0; ci < count; ci++) {
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- if (ci == 0) {
- int extreme_input = input_base - 1;
- input[ni] = extreme_input; // extreme case
- ref_input[ni] = 0;
- } else {
- input[ni] = rnd.Rand16() % input_base;
- ref_input[ni] = 0;
- }
- }
-
- fwd_txfm_func(input, output, txfm_size, tx_type, bd);
- inv_txfm_func(output, ref_input, txfm_size, tx_type, bd);
-
- for (int ni = 0; ni < sqr_txfm_size; ++ni) {
- EXPECT_LE(abs(input[ni] - ref_input[ni]), 4);
- }
- avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
- input, ref_input, sqr_txfm_size);
+ void RunRoundtripCheck() {
+ const Fwd_Txfm2d_Func fwd_txfm_func =
+ libvpx_test::fwd_txfm_func_ls[tx_size_];
+ const Inv_Txfm2d_Func inv_txfm_func =
+ libvpx_test::inv_txfm_func_ls[tx_size_];
+ double avg_abs_error = 0;
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ for (int ci = 0; ci < count_; ci++) {
+ for (int ni = 0; ni < txfm2d_size_; ++ni) {
+ if (ci == 0) {
+ int extreme_input = input_base - 1;
+ input_[ni] = extreme_input; // extreme case
+ ref_input_[ni] = 0;
+ } else {
+ input_[ni] = rnd.Rand16() % input_base;
+ ref_input_[ni] = 0;
}
-
- avg_abs_error /= count;
- // max_abs_avg_error comes from upper bound of
- // printf("txfm_size: %d accuracy_avg_abs_error: %f\n",
- // txfm_size, avg_abs_error);
- // TODO(angiebird): this upper bound is from adst_adst_8
- const double max_abs_avg_error = 0.4;
- EXPECT_LE(avg_abs_error, max_abs_avg_error);
}
+
+ fwd_txfm_func(input_, output_, txfm1d_size_, tx_type_, bd);
+ inv_txfm_func(output_, ref_input_, txfm1d_size_, tx_type_, bd);
+
+ for (int ni = 0; ni < txfm2d_size_; ++ni) {
+ EXPECT_GE(max_error_, abs(input_[ni] - ref_input_[ni]));
+ }
+ avg_abs_error += compute_avg_abs_error<int16_t, uint16_t>(
+ input_, ref_input_, txfm2d_size_);
}
- delete[] input;
- delete[] ref_input;
- delete[] output;
+ avg_abs_error /= count_;
+ // max_abs_avg_error comes from upper bound of
+ // printf("txfm1d_size: %d accuracy_avg_abs_error: %f\n",
+ // txfm1d_size_, avg_abs_error);
+ EXPECT_GE(max_avg_error_, avg_abs_error);
}
-}
+
+ virtual void TearDown() {
+ vpx_free(input_);
+ vpx_free(output_);
+ vpx_free(ref_input_);
+ }
+
+ private:
+ int count_;
+ int max_error_;
+ double max_avg_error_;
+ TX_TYPE tx_type_;
+ TX_SIZE tx_size_;
+ int txfm1d_size_;
+ int txfm2d_size_;
+ int16_t* input_;
+ uint16_t* ref_input_;
+ int32_t* output_;
+};
+
+TEST_P(VP10InvTxfm2d, RunRoundtripCheck) { RunRoundtripCheck(); }
+
+INSTANTIATE_TEST_CASE_P(
+ C, VP10InvTxfm2d,
+ ::testing::Values(
+#if CONFIG_EXT_TX
+ VP10InvTxfm2dParam(FLIPADST_DCT, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(DCT_FLIPADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(FLIPADST_FLIPADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(ADST_FLIPADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(FLIPADST_ADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(FLIPADST_DCT, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(DCT_FLIPADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(FLIPADST_FLIPADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(ADST_FLIPADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(FLIPADST_ADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(FLIPADST_DCT, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(DCT_FLIPADST, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(FLIPADST_FLIPADST, TX_16X16, 11, 0.04),
+ VP10InvTxfm2dParam(ADST_FLIPADST, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(FLIPADST_ADST, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(FLIPADST_DCT, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(DCT_FLIPADST, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(FLIPADST_FLIPADST, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(ADST_FLIPADST, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(FLIPADST_ADST, TX_32X32, 4, 0.4),
+#endif
+ VP10InvTxfm2dParam(DCT_DCT, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(ADST_DCT, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(DCT_ADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(ADST_ADST, TX_4X4, 2, 0.002),
+ VP10InvTxfm2dParam(DCT_DCT, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(ADST_DCT, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(DCT_ADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(ADST_ADST, TX_8X8, 2, 0.02),
+ VP10InvTxfm2dParam(DCT_DCT, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(ADST_DCT, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(DCT_ADST, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(ADST_ADST, TX_16X16, 2, 0.04),
+ VP10InvTxfm2dParam(DCT_DCT, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(ADST_DCT, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(DCT_ADST, TX_32X32, 4, 0.4),
+ VP10InvTxfm2dParam(ADST_ADST, TX_32X32, 4, 0.4)));
+
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/test/vp10_txfm_test.cc b/test/vp10_txfm_test.cc
new file mode 100644
index 0000000..6b36126
--- /dev/null
+++ b/test/vp10_txfm_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include "test/vp10_txfm_test.h"
+
+namespace libvpx_test {
+
+int get_txfm1d_size(TX_SIZE tx_size) {
+ return 1 << (tx_size + 2);
+}
+
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM* type0,
+ TYPE_TXFM* type1) {
+ switch (txfm2d_type) {
+ case DCT_DCT:
+ *type0 = TYPE_DCT;
+ *type1 = TYPE_DCT;
+ break;
+ case ADST_DCT:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_DCT;
+ break;
+ case DCT_ADST:
+ *type0 = TYPE_DCT;
+ *type1 = TYPE_ADST;
+ break;
+ case ADST_ADST:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_ADST;
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_DCT;
+ break;
+ case DCT_FLIPADST:
+ *type0 = TYPE_DCT;
+ *type1 = TYPE_ADST;
+ break;
+ case FLIPADST_FLIPADST:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_ADST;
+ break;
+ case ADST_FLIPADST:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_ADST;
+ break;
+ case FLIPADST_ADST:
+ *type0 = TYPE_ADST;
+ *type1 = TYPE_ADST;
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ *type0 = TYPE_DCT;
+ *type1 = TYPE_DCT;
+ assert(0);
+ break;
+ }
+}
+
+double invSqrt2 = 1 / pow(2, 0.5);
+
+void reference_dct_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
+ }
+ if (k == 0) out[k] = out[k] * invSqrt2;
+ }
+}
+
+void reference_adst_1d(const double* in, double* out, int size) {
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
+ }
+ }
+}
+
+void reference_hybrid_1d(double* in, double* out, int size, int type) {
+ if (type == TYPE_DCT)
+ reference_dct_1d(in, out, size);
+ else
+ reference_adst_1d(in, out, size);
+}
+
+void reference_hybrid_2d(double* in, double* out, int size,
+ int type0, int type1) {
+ double* tempOut = new double[size * size];
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = in[c * size + r];
+ }
+ }
+
+ // dct each row: in -> out
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
+ }
+
+ for (int r = 0; r < size; r++) {
+ // out ->tempOut
+ for (int c = 0; c < size; c++) {
+ tempOut[r * size + c] = out[c * size + r];
+ }
+ }
+
+ for (int r = 0; r < size; r++) {
+ reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
+ }
+ delete[] tempOut;
+}
+
+template<typename Type>
+void fliplr(Type *dest, int stride, int length) {
+ int i, j;
+ for (i = 0; i < length; ++i) {
+ for (j = 0; j < length / 2; ++j) {
+ const Type tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[i * stride + length - 1 - j];
+ dest[i * stride + length - 1 - j] = tmp;
+ }
+ }
+}
+
+template<typename Type>
+void flipud(Type *dest, int stride, int length) {
+ int i, j;
+ for (j = 0; j < length; ++j) {
+ for (i = 0; i < length / 2; ++i) {
+ const Type tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(length - 1 - i) * stride + j];
+ dest[(length - 1 - i) * stride + j] = tmp;
+ }
+ }
+}
+
+template<typename Type>
+void fliplrud(Type *dest, int stride, int length) {
+ int i, j;
+ for (i = 0; i < length / 2; ++i) {
+ for (j = 0; j < length; ++j) {
+ const Type tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(length - 1 - i) * stride + length - 1 - j];
+ dest[(length - 1 - i) * stride + length - 1 - j] = tmp;
+ }
+ }
+}
+
+template void fliplr<double>(double *dest, int stride, int length);
+template void flipud<double>(double *dest, int stride, int length);
+template void fliplrud<double>(double *dest, int stride, int length);
+
+} // namespace libvpx_test
diff --git a/test/vp10_txfm_test.h b/test/vp10_txfm_test.h
index c4d03ce..fb9e12e 100644
--- a/test/vp10_txfm_test.h
+++ b/test/vp10_txfm_test.h
@@ -23,6 +23,7 @@
#include "test/acm_random.h"
#include "vp10/common/enums.h"
#include "vp10/common/vp10_txfm.h"
+#include "./vp10_rtcd.h"
namespace libvpx_test {
typedef enum {
@@ -33,63 +34,19 @@
TYPE_LAST
} TYPE_TXFM;
-static double invSqrt2 = 1 / pow(2, 0.5);
+int get_txfm1d_size(TX_SIZE tx_size);
-static void reference_dct_1d(const double* in, double* out, int size) {
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (2 * size));
- }
- if (k == 0) out[k] = out[k] * invSqrt2;
- }
-}
+void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM* type0,
+ TYPE_TXFM* type1);
-static void reference_adst_1d(const double* in, double* out, int size) {
- for (int k = 0; k < size; ++k) {
- out[k] = 0;
- for (int n = 0; n < size; ++n) {
- out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
- }
- }
-}
+void reference_dct_1d(const double* in, double* out, int size);
-static void reference_hybrid_1d(double* in, double* out, int size, int type) {
- if (type == TYPE_DCT)
- reference_dct_1d(in, out, size);
- else
- reference_adst_1d(in, out, size);
-}
+void reference_adst_1d(const double* in, double* out, int size);
-static INLINE void reference_hybrid_2d(double* in, double* out, int size,
- int type0, int type1) {
- double* tempOut = new double[size * size];
+void reference_hybrid_1d(double* in, double* out, int size, int type);
- for (int r = 0; r < size; r++) {
- // out ->tempOut
- for (int c = 0; c < size; c++) {
- tempOut[r * size + c] = in[c * size + r];
- }
- }
-
- // dct each row: in -> out
- for (int r = 0; r < size; r++) {
- reference_hybrid_1d(tempOut + r * size, out + r * size, size, type0);
- }
-
- for (int r = 0; r < size; r++) {
- // out ->tempOut
- for (int c = 0; c < size; c++) {
- tempOut[r * size + c] = out[c * size + r];
- }
- }
-
- for (int r = 0; r < size; r++) {
- reference_hybrid_1d(tempOut + r * size, out + r * size, size, type1);
- }
- delete[] tempOut;
-}
-
+void reference_hybrid_2d(double* in, double* out, int size,
+ int type0, int type1);
template <typename Type1, typename Type2>
static double compute_avg_abs_error(const Type1* a, const Type2* b,
const int size) {
@@ -101,6 +58,15 @@
return error;
}
+template<typename Type>
+void fliplr(Type *dest, int stride, int length);
+
+template<typename Type>
+void flipud(Type *dest, int stride, int length);
+
+template<typename Type>
+void fliplrud(Type *dest, int stride, int length);
+
typedef void (*TxfmFunc)(const int32_t* in, int32_t* out, const int8_t* cos_bit,
const int8_t* range_bit);
@@ -110,22 +76,15 @@
static const int bd = 10;
static const int input_base = (1 << bd);
-static INLINE int get_tx_type(const TXFM_2D_CFG *cfg) {
- int tx_type;
- if (cfg->txfm_type_col <= TXFM_TYPE_DCT64) {
- if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
- tx_type = DCT_DCT;
- } else {
- tx_type = DCT_ADST;
- }
- } else {
- if (cfg->txfm_type_row <= TXFM_TYPE_DCT64) {
- tx_type = ADST_DCT;
- } else {
- tx_type = ADST_ADST;
- }
- }
- return tx_type;
-}
+#if CONFIG_VP9_HIGHBITDEPTH
+static const Fwd_Txfm2d_Func fwd_txfm_func_ls[TX_SIZES] = {
+ vp10_fwd_txfm2d_4x4_c, vp10_fwd_txfm2d_8x8_c, vp10_fwd_txfm2d_16x16_c,
+ vp10_fwd_txfm2d_32x32_c};
+
+static const Inv_Txfm2d_Func inv_txfm_func_ls[TX_SIZES] = {
+ vp10_inv_txfm2d_add_4x4_c, vp10_inv_txfm2d_add_8x8_c,
+ vp10_inv_txfm2d_add_16x16_c, vp10_inv_txfm2d_add_32x32_c};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
} // namespace libvpx_test
#endif // VP10_TXFM_TEST_H_
diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc
index d6b6951..35a6619 100644
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -33,11 +33,13 @@
vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
cfg.w = 1280;
cfg.h = 720;
-#if CONFIG_EXT_TILE
- cfg.tile_col = -1;
- cfg.tile_row = -1;
-#endif // CONFIG_EXT_TILE
decoder_ = codec_->CreateDecoder(cfg, 0);
+#if CONFIG_VP10 && CONFIG_EXT_TILE
+ if (decoder_->IsVP10()) {
+ decoder_->Control(VP10_SET_DECODE_TILE_ROW, -1);
+ decoder_->Control(VP10_SET_DECODE_TILE_COL, -1);
+ }
+#endif
size_enc_.clear();
md5_dec_.clear();
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index 21147af..87e5d1c 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -53,43 +53,6 @@
}
#if CONFIG_EXT_INTER
-#define WEDGE_BITS_SML 2
-#define WEDGE_BITS_MED 3
-#define WEDGE_BITS_BIG 4
-#define WEDGE_NONE -1
-#define WEDGE_WEIGHT_BITS 6
-
-static const int get_wedge_bits_lookup[BLOCK_SIZES] = {
- 0,
- 0,
- 0,
- WEDGE_BITS_SML,
- WEDGE_BITS_MED,
- WEDGE_BITS_MED,
- WEDGE_BITS_MED,
- WEDGE_BITS_MED,
- WEDGE_BITS_MED,
- WEDGE_BITS_MED,
- WEDGE_BITS_BIG,
- WEDGE_BITS_BIG,
- WEDGE_BITS_BIG,
-#if CONFIG_EXT_PARTITION
- WEDGE_BITS_BIG,
- WEDGE_BITS_BIG,
- WEDGE_BITS_BIG,
-#endif // CONFIG_EXT_PARTITION
-};
-
-static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
- (void) sb_type;
- return get_wedge_bits_lookup[sb_type] > 0;
-}
-
-static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
- (void) sb_type;
- return get_wedge_bits_lookup[sb_type] > 0;
-}
-
static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
}
@@ -133,7 +96,9 @@
#define NONE -1
#define INTRA_FRAME 0
#define LAST_FRAME 1
+
#if CONFIG_EXT_REFS
+
#define LAST2_FRAME 2
#define LAST3_FRAME 3
#define LAST4_FRAME 4
@@ -141,10 +106,24 @@
#define ALTREF_FRAME 6
#define MAX_REF_FRAMES 7
#define LAST_REF_FRAMES (LAST4_FRAME - LAST_FRAME + 1)
-#else
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+
+#define GOLDEN_FRAME 2
+#define BWDREF_FRAME 3
+#define ALTREF_FRAME 4
+#define MAX_REF_FRAMES 5
+
+#else // CONFIG_BIDIR_PRED
+
#define GOLDEN_FRAME 2
#define ALTREF_FRAME 3
#define MAX_REF_FRAMES 4
+
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
typedef int8_t MV_REFERENCE_FRAME;
diff --git a/vp10/common/divide.c b/vp10/common/divide.c
index 00b43a0..3f144d7 100644
--- a/vp10/common/divide.c
+++ b/vp10/common/divide.c
@@ -26,68 +26,68 @@
}
*/
const struct fastdiv_elem vp10_fastdiv_tab[256] = {
- {0, 0}, {0, 0}, {0, 1}, {1431655766, 2},
- {0, 2}, {2576980378, 3}, {1431655766, 3}, {613566757, 3},
- {0, 3}, {3340530120, 4}, {2576980378, 4}, {1952257862, 4},
- {1431655766, 4}, {991146300, 4}, {613566757, 4}, {286331154, 4},
- {0, 4}, {3789677026, 5}, {3340530120, 5}, {2938661835, 5},
- {2576980378, 5}, {2249744775, 5}, {1952257862, 5}, {1680639377, 5},
- {1431655766, 5}, {1202590843, 5}, {991146300, 5}, {795364315, 5},
- {613566757, 5}, {444306962, 5}, {286331154, 5}, {138547333, 5},
- {0, 5}, {4034666248, 6}, {3789677026, 6}, {3558687189, 6},
- {3340530120, 6}, {3134165325, 6}, {2938661835, 6}, {2753184165, 6},
- {2576980378, 6}, {2409371898, 6}, {2249744775, 6}, {2097542168, 6},
- {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6},
- {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6},
- {991146300, 6}, {891408307, 6}, {795364315, 6}, {702812831, 6},
- {613566757, 6}, {527452125, 6}, {444306962, 6}, {363980280, 6},
- {286331154, 6}, {211227900, 6}, {138547333, 6}, {68174085, 6},
- {0, 6}, {4162814457, 7}, {4034666248, 7}, {3910343360, 7},
- {3789677026, 7}, {3672508268, 7}, {3558687189, 7}, {3448072337, 7},
- {3340530120, 7}, {3235934265, 7}, {3134165325, 7}, {3035110223, 7},
- {2938661835, 7}, {2844718599, 7}, {2753184165, 7}, {2663967058, 7},
- {2576980378, 7}, {2492141518, 7}, {2409371898, 7}, {2328596727, 7},
- {2249744775, 7}, {2172748162, 7}, {2097542168, 7}, {2024065048, 7},
- {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7},
- {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7},
- {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7},
- {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7},
- {991146300, 7}, {940802361, 7}, {891408307, 7}, {842937507, 7},
- {795364315, 7}, {748664025, 7}, {702812831, 7}, {657787785, 7},
- {613566757, 7}, {570128403, 7}, {527452125, 7}, {485518043, 7},
- {444306962, 7}, {403800345, 7}, {363980280, 7}, {324829460, 7},
- {286331154, 7}, {248469183, 7}, {211227900, 7}, {174592167, 7},
- {138547333, 7}, {103079216, 7}, {68174085, 7}, {33818641, 7},
- {0, 7}, {4228378656, 8}, {4162814457, 8}, {4098251237, 8},
- {4034666248, 8}, {3972037425, 8}, {3910343360, 8}, {3849563281, 8},
- {3789677026, 8}, {3730665024, 8}, {3672508268, 8}, {3615188300, 8},
- {3558687189, 8}, {3502987511, 8}, {3448072337, 8}, {3393925206, 8},
- {3340530120, 8}, {3287871517, 8}, {3235934265, 8}, {3184703642, 8},
- {3134165325, 8}, {3084305374, 8}, {3035110223, 8}, {2986566663, 8},
- {2938661835, 8}, {2891383213, 8}, {2844718599, 8}, {2798656110, 8},
- {2753184165, 8}, {2708291480, 8}, {2663967058, 8}, {2620200175, 8},
- {2576980378, 8}, {2534297473, 8}, {2492141518, 8}, {2450502814, 8},
- {2409371898, 8}, {2368739540, 8}, {2328596727, 8}, {2288934667, 8},
- {2249744775, 8}, {2211018668, 8}, {2172748162, 8}, {2134925265, 8},
- {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8},
- {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8},
- {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8},
- {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8},
- {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8},
- {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8},
- {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8},
- {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8},
- {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8},
- {991146300, 8}, {965853890, 8}, {940802361, 8}, {915988286, 8},
- {891408307, 8}, {867059126, 8}, {842937507, 8}, {819040276, 8},
- {795364315, 8}, {771906565, 8}, {748664025, 8}, {725633745, 8},
- {702812831, 8}, {680198441, 8}, {657787785, 8}, {635578121, 8},
- {613566757, 8}, {591751050, 8}, {570128403, 8}, {548696263, 8},
- {527452125, 8}, {506393524, 8}, {485518043, 8}, {464823301, 8},
- {444306962, 8}, {423966729, 8}, {403800345, 8}, {383805589, 8},
- {363980280, 8}, {344322273, 8}, {324829460, 8}, {305499766, 8},
- {286331154, 8}, {267321616, 8}, {248469183, 8}, {229771913, 8},
- {211227900, 8}, {192835267, 8}, {174592167, 8}, {156496785, 8},
- {138547333, 8}, {120742053, 8}, {103079216, 8}, {85557118, 8},
- {68174085, 8}, {50928466, 8}, {33818641, 8}, {16843010, 8},
+ {0, 0}, {0, 0}, {0, 1}, {1431655766, 2},
+ {0, 2}, {2576980378u, 3}, {1431655766, 3}, {613566757, 3},
+ {0, 3}, {3340530120u, 4}, {2576980378u, 4}, {1952257862, 4},
+ {1431655766, 4}, {991146300, 4}, {613566757, 4}, {286331154u, 4},
+ {0, 4}, {3789677026u, 5}, {3340530120u, 5}, {2938661835u, 5},
+ {2576980378u, 5}, {2249744775u, 5}, {1952257862, 5}, {1680639377, 5},
+ {1431655766, 5}, {1202590843, 5}, {991146300, 5}, {795364315, 5},
+ {613566757, 5}, {444306962, 5}, {286331154, 5}, {138547333, 5},
+ {0, 5}, {4034666248u, 6}, {3789677026u, 6}, {3558687189u, 6},
+ {3340530120u, 6}, {3134165325u, 6}, {2938661835u, 6}, {2753184165u, 6},
+ {2576980378u, 6}, {2409371898u, 6}, {2249744775u, 6}, {2097542168u, 6},
+ {1952257862, 6}, {1813430637, 6}, {1680639377, 6}, {1553498810, 6},
+ {1431655766, 6}, {1314785907, 6}, {1202590843, 6}, {1094795586, 6},
+ {991146300, 6}, {891408307, 6}, {795364315, 6}, {702812831, 6},
+ {613566757, 6}, {527452125, 6}, {444306962, 6}, {363980280, 6},
+ {286331154, 6}, {211227900, 6}, {138547333, 6}, {68174085, 6},
+ {0, 6}, {4162814457u, 7}, {4034666248u, 7}, {3910343360u, 7},
+ {3789677026u, 7}, {3672508268u, 7}, {3558687189u, 7}, {3448072337u, 7},
+ {3340530120u, 7}, {3235934265u, 7}, {3134165325u, 7}, {3035110223u, 7},
+ {2938661835u, 7}, {2844718599u, 7}, {2753184165u, 7}, {2663967058u, 7},
+ {2576980378u, 7}, {2492141518u, 7}, {2409371898u, 7}, {2328596727u, 7},
+ {2249744775u, 7}, {2172748162u, 7}, {2097542168, 7}, {2024065048, 7},
+ {1952257862, 7}, {1882064321, 7}, {1813430637, 7}, {1746305385, 7},
+ {1680639377, 7}, {1616385542, 7}, {1553498810, 7}, {1491936009, 7},
+ {1431655766, 7}, {1372618415, 7}, {1314785907, 7}, {1258121734, 7},
+ {1202590843, 7}, {1148159575, 7}, {1094795586, 7}, {1042467791, 7},
+ {991146300, 7}, {940802361, 7}, {891408307, 7}, {842937507, 7},
+ {795364315, 7}, {748664025, 7}, {702812831, 7}, {657787785, 7},
+ {613566757, 7}, {570128403, 7}, {527452125, 7}, {485518043, 7},
+ {444306962, 7}, {403800345, 7}, {363980280, 7}, {324829460, 7},
+ {286331154, 7}, {248469183, 7}, {211227900, 7}, {174592167, 7},
+ {138547333, 7}, {103079216, 7}, {68174085, 7}, {33818641, 7},
+ {0, 7}, {4228378656u, 8}, {4162814457u, 8}, {4098251237u, 8},
+ {4034666248u, 8}, {3972037425u, 8}, {3910343360u, 8}, {3849563281u, 8},
+ {3789677026u, 8}, {3730665024u, 8}, {3672508268u, 8}, {3615188300u, 8},
+ {3558687189u, 8}, {3502987511u, 8}, {3448072337u, 8}, {3393925206u, 8},
+ {3340530120u, 8}, {3287871517u, 8}, {3235934265u, 8}, {3184703642u, 8},
+ {3134165325u, 8}, {3084305374u, 8}, {3035110223u, 8}, {2986566663u, 8},
+ {2938661835u, 8}, {2891383213u, 8}, {2844718599u, 8}, {2798656110u, 8},
+ {2753184165u, 8}, {2708291480u, 8}, {2663967058u, 8}, {2620200175u, 8},
+ {2576980378u, 8}, {2534297473u, 8}, {2492141518u, 8}, {2450502814u, 8},
+ {2409371898u, 8}, {2368739540u, 8}, {2328596727u, 8}, {2288934667u, 8},
+ {2249744775u, 8}, {2211018668u, 8}, {2172748162u, 8}, {2134925265u, 8},
+ {2097542168, 8}, {2060591247, 8}, {2024065048, 8}, {1987956292, 8},
+ {1952257862, 8}, {1916962805, 8}, {1882064321, 8}, {1847555765, 8},
+ {1813430637, 8}, {1779682582, 8}, {1746305385, 8}, {1713292966, 8},
+ {1680639377, 8}, {1648338801, 8}, {1616385542, 8}, {1584774030, 8},
+ {1553498810, 8}, {1522554545, 8}, {1491936009, 8}, {1461638086, 8},
+ {1431655766, 8}, {1401984144, 8}, {1372618415, 8}, {1343553873, 8},
+ {1314785907, 8}, {1286310003, 8}, {1258121734, 8}, {1230216764, 8},
+ {1202590843, 8}, {1175239808, 8}, {1148159575, 8}, {1121346142, 8},
+ {1094795586, 8}, {1068504060, 8}, {1042467791, 8}, {1016683080, 8},
+ {991146300, 8}, {965853890, 8}, {940802361, 8}, {915988286, 8},
+ {891408307, 8}, {867059126, 8}, {842937507, 8}, {819040276, 8},
+ {795364315, 8}, {771906565, 8}, {748664025, 8}, {725633745, 8},
+ {702812831, 8}, {680198441, 8}, {657787785, 8}, {635578121, 8},
+ {613566757, 8}, {591751050, 8}, {570128403, 8}, {548696263, 8},
+ {527452125, 8}, {506393524, 8}, {485518043, 8}, {464823301, 8},
+ {444306962, 8}, {423966729, 8}, {403800345, 8}, {383805589, 8},
+ {363980280, 8}, {344322273, 8}, {324829460, 8}, {305499766, 8},
+ {286331154, 8}, {267321616, 8}, {248469183, 8}, {229771913, 8},
+ {211227900, 8}, {192835267, 8}, {174592167, 8}, {156496785, 8},
+ {138547333, 8}, {120742053, 8}, {103079216, 8}, {85557118, 8},
+ {68174085, 8}, {50928466, 8}, {33818641, 8}, {16843010, 8},
};
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index 0ae2572..29fb27e 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -281,16 +281,16 @@
};
static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
- 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
#if CONFIG_EXT_PARTITION
208, 208, 208
#endif // CONFIG_EXT_PARTITION
};
static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
- 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208,
+ 208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
#if CONFIG_EXT_PARTITION
- 208, 208, 208
+ 255, 255, 255
#endif // CONFIG_EXT_PARTITION
};
#endif // CONFIG_EXT_INTER
@@ -383,19 +383,36 @@
239, 183, 119, 96, 41
};
-static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
+
#if CONFIG_EXT_REFS
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
// TODO(zoeliu): To adjust the initial prob values.
{ 33, 16, 16, 16 },
{ 77, 74, 74, 74 },
{ 142, 142, 142, 142 },
{ 172, 170, 170, 170 },
{ 238, 247, 247, 247 }
-#else
- { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
-#endif // CONFIG_EXT_REFS
};
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+// TODO(zoeliu): To adjust the initial prob values.
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = {
+// { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
+ { 33 }, { 77 }, { 142 }, { 172 }, { 238 }
+};
+static const vpx_prob default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1] = {
+ { 16 }, { 74 }, { 142 }, { 170 }, { 247 }
+};
+#else // CONFIG_BIDIR_PRED
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
+ { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
+};
+#endif // CONFIG_BIDIR_PRED
+
+#endif // CONFIG_EXT_REFS
+
static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
#if CONFIG_EXT_REFS
{ 33, 16, 16, 16, 16 },
@@ -403,12 +420,20 @@
{ 142, 142, 142, 142, 142 },
{ 172, 170, 170, 170, 170 },
{ 238, 247, 247, 247, 247 }
-#else
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ { 33, 16, 16 },
+ { 77, 74, 74 },
+ { 142, 142, 142 },
+ { 172, 170, 170 },
+ { 238, 247, 247 }
+#else // CONFIG_BIDIR_PRED
{ 33, 16 },
{ 77, 74 },
{ 142, 142 },
{ 172, 170 },
{ 238, 247 }
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
};
@@ -904,14 +929,68 @@
#if CONFIG_EXT_INTERP
static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1] = {
+#if CONFIG_DUAL_FILTER
{ 235, 192, 128, 128},
{ 36, 243, 208, 128},
{ 34, 16, 128, 128},
{ 36, 243, 48, 128},
{ 34, 16, 128, 128},
{ 149, 160, 128, 128},
+
+ { 235, 192, 128, 128},
+ { 36, 243, 208, 128},
+ { 34, 16, 128, 128},
+ { 36, 243, 48, 128},
+ { 34, 16, 128, 128},
+ { 149, 160, 128, 128},
+
+ { 235, 192, 128, 128},
+ { 36, 243, 208, 128},
+ { 34, 16, 128, 128},
+ { 36, 243, 48, 128},
+ { 34, 16, 128, 128},
+ { 149, 160, 128, 128},
+
+ { 235, 192, 128, 128},
+ { 36, 243, 208, 128},
+ { 34, 16, 128, 128},
+ { 36, 243, 48, 128},
+ { 34, 16, 128, 128},
+ { 149, 160, 128, 128},
+#else
+ { 235, 192, 128, 128},
+ { 36, 243, 208, 128},
+ { 34, 16, 128, 128},
+ { 36, 243, 48, 128},
+ { 34, 16, 128, 128},
+ { 149, 160, 128, 128},
+#endif
};
#else // CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
+ [SWITCHABLE_FILTERS - 1] = {
+ { 235, 162, },
+ { 36, 255, },
+ { 34, 3, },
+ { 149, 144, },
+
+ { 235, 162, },
+ { 36, 255, },
+ { 34, 3, },
+ { 10, 3, },
+
+ { 235, 162, },
+ { 36, 255, },
+ { 34, 3, },
+ { 149, 144, },
+
+ { 235, 162, },
+ { 36, 255, },
+ { 34, 3, },
+ { 10, 3, },
+};
+#else
static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
[SWITCHABLE_FILTERS - 1] = {
{ 235, 162, },
@@ -919,6 +998,7 @@
{ 34, 3, },
{ 149, 144, },
};
+#endif
#endif // CONFIG_EXT_INTERP
#if CONFIG_EXT_TX
@@ -1188,6 +1268,9 @@
vp10_copy(fc->intra_inter_prob, default_intra_inter_p);
vp10_copy(fc->comp_inter_prob, default_comp_inter_p);
vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vp10_copy(fc->comp_bwdref_prob, default_comp_bwdref_p);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
vp10_copy(fc->single_ref_prob, default_single_ref_p);
vp10_copy(fc->tx_size_probs, default_tx_size_prob);
#if CONFIG_VAR_TX
@@ -1255,10 +1338,23 @@
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
fc->comp_inter_prob[i] = vp10_mode_mv_merge_probs(
pre_fc->comp_inter_prob[i], counts->comp_inter[i]);
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (FWD_REFS - 1); j++)
+ fc->comp_ref_prob[i][j] = mode_mv_merge_probs(
+ pre_fc->comp_ref_prob[i][j], counts->comp_ref[i][j]);
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (BWD_REFS - 1); j++)
+ fc->comp_bwdref_prob[i][j] = mode_mv_merge_probs(
+ pre_fc->comp_bwdref_prob[i][j], counts->comp_bwdref[i][j]);
+#else
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < (COMP_REFS - 1); j++)
- fc->comp_ref_prob[i][j] = vp10_mode_mv_merge_probs(
+ fc->comp_ref_prob[i][j] = mode_mv_merge_probs(
pre_fc->comp_ref_prob[i][j], counts->comp_ref[i][j]);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < (SINGLE_REFS - 1); j++)
fc->single_ref_prob[i][j] = vp10_mode_mv_merge_probs(
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 4a6ccae..42b93d6 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -85,7 +85,12 @@
vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1];
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vpx_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS-1];
+ vpx_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS-1];
+#else
vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
#if CONFIG_VAR_TX
vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
@@ -155,7 +160,12 @@
unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2];
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ unsigned int comp_ref[REF_CONTEXTS][FWD_REFS-1][2];
+ unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS-1][2];
+#else
unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
unsigned int tx_size_totals[TX_SIZES];
unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
#if CONFIG_VAR_TX
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 3b2ef29..cdebc69 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -204,10 +204,17 @@
VP9_GOLD_FLAG = 1 << 4,
VP9_ALT_FLAG = 1 << 5,
VP9_REFFRAME_ALL = (1 << 6) - 1
-#else
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ VP9_GOLD_FLAG = 1 << 1,
+ VP9_BWD_FLAG = 1 << 2,
+ VP9_ALT_FLAG = 1 << 3,
+ VP9_REFFRAME_ALL = (1 << 4) - 1
+#else // CONFIG_BIDIR_PRED
VP9_GOLD_FLAG = 1 << 1,
VP9_ALT_FLAG = 1 << 2,
VP9_REFFRAME_ALL = (1 << 3) - 1
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} VP9_REFFRAME;
@@ -367,11 +374,24 @@
#endif
#if CONFIG_EXT_REFS
+
#define SINGLE_REFS 6
#define COMP_REFS 5
-#else
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+#define FWD_REFS 2
+#define BWD_REFS 2
+#define SINGLE_REFS (FWD_REFS + BWD_REFS)
+#define COMP_REFS (FWD_REFS * BWD_REFS)
+
+#else // CONFIG_BIDIR_PRED
+
#define SINGLE_REFS 3
#define COMP_REFS 2
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
#if CONFIG_SUPERTX
diff --git a/vp10/common/filter.h b/vp10/common/filter.h
index f70d0cc..a51e2d0 100644
--- a/vp10/common/filter.h
+++ b/vp10/common/filter.h
@@ -47,7 +47,13 @@
#define BILINEAR (SWITCHABLE_FILTERS)
#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */
+#if CONFIG_DUAL_FILTER
+#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
+#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1)
+#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2)
+#else
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#endif
typedef uint8_t INTERP_FILTER;
diff --git a/vp10/common/idct.c b/vp10/common/idct.c
index a5d50bb..717c914 100644
--- a/vp10/common/idct.c
+++ b/vp10/common/idct.c
@@ -1297,7 +1297,8 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
+ vp10_inv_txfm2d_add_4x4_c(input, CONVERT_TO_SHORTPTR(dest), stride,
+ tx_type, bd);
break;
case V_DCT:
case H_DCT:
@@ -1336,7 +1337,8 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
+ vp10_inv_txfm2d_add_8x8_c(input, CONVERT_TO_SHORTPTR(dest), stride,
+ tx_type, bd);
break;
case V_DCT:
case H_DCT:
@@ -1375,7 +1377,8 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
+ vp10_inv_txfm2d_add_16x16_c(input, CONVERT_TO_SHORTPTR(dest), stride,
+ tx_type, bd);
break;
case V_DCT:
case H_DCT:
diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h
index 2a88003..8fb5ef3 100644
--- a/vp10/common/loopfilter.h
+++ b/vp10/common/loopfilter.h
@@ -45,7 +45,7 @@
uint8_t mode_ref_delta_update;
// 0 = Intra, Last, Last2+Last3+LAST4(CONFIG_EXT_REFS),
- // GF, ARF
+ // GF, BRF(CONFIG_BIDIR_PRED), ARF
signed char ref_deltas[MAX_REF_FRAMES];
signed char last_ref_deltas[MAX_REF_FRAMES];
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index 455ca2d..d3b407a 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -186,6 +186,9 @@
int show_frame;
int last_show_frame;
int show_existing_frame;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int is_reference_frame; // A frame used as a reference
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
// Flag signaling that the frame is encoded using only INTRA modes.
uint8_t intra_only;
@@ -270,8 +273,13 @@
int frame_parallel_decode; // frame-based threading.
// Context probabilities for reference frame prediction
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
+ MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
+#else
MV_REFERENCE_FRAME comp_fixed_ref;
MV_REFERENCE_FRAME comp_var_ref[COMP_REFS];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
REFERENCE_MODE reference_mode;
FRAME_CONTEXT *fc; /* this frame entropy */
diff --git a/vp10/common/pred_common.c b/vp10/common/pred_common.c
index 0c698a7..37ae288 100644
--- a/vp10/common/pred_common.c
+++ b/vp10/common/pred_common.c
@@ -11,45 +11,61 @@
#include "vp10/common/common.h"
#include "vp10/common/pred_common.h"
+#include "vp10/common/reconinter.h"
#include "vp10/common/seg_common.h"
// Returns a context number for the given MB prediction signal
#if CONFIG_DUAL_FILTER
+static INTERP_FILTER get_ref_filter_type(const MODE_INFO *mi,
+ const MACROBLOCKD *xd,
+ int dir,
+ MV_REFERENCE_FRAME ref_frame) {
+ INTERP_FILTER ref_type = SWITCHABLE_FILTERS;
+ const MB_MODE_INFO *ref_mbmi = &mi->mbmi;
+ int use_subpel[2] = {
+ has_subpel_mv_component(mi, xd, dir),
+ has_subpel_mv_component(mi, xd, dir + 2),
+ };
+
+ if (ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0])
+ ref_type = ref_mbmi->interp_filter[(dir & 0x01)];
+ else if (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1])
+ ref_type = ref_mbmi->interp_filter[(dir & 0x01) + 2];
+
+ return ref_type;
+}
+
int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx_offset =
+ (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET;
MV_REFERENCE_FRAME ref_frame = (dir < 2) ?
mbmi->ref_frame[0] : mbmi->ref_frame[1];
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
- const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
- const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ int filter_type_ctx = ctx_offset + (dir & 0x01) * INTER_FILTER_DIR_OFFSET;
int left_type = SWITCHABLE_FILTERS;
int above_type = SWITCHABLE_FILTERS;
- if (xd->left_available) {
- if (left_mbmi->ref_frame[0] == ref_frame)
- left_type = left_mbmi->interp_filter[(dir & 0x01)];
- else if (left_mbmi->ref_frame[1] == ref_frame)
- left_type = left_mbmi->interp_filter[(dir & 0x01) + 2];
- }
+ if (xd->left_available)
+ left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame);
- if (xd->up_available) {
- if (above_mbmi->ref_frame[0] == ref_frame)
- above_type = above_mbmi->interp_filter[(dir & 0x01)];
- else if (above_mbmi->ref_frame[1] == ref_frame)
- above_type = above_mbmi->interp_filter[(dir & 0x01) + 2];
- }
+ if (xd->up_available)
+ above_type = get_ref_filter_type(xd->mi[-xd->mi_stride], xd,
+ dir, ref_frame);
if (left_type == above_type)
- return left_type;
+ filter_type_ctx += left_type;
else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
- return above_type;
+ filter_type_ctx += above_type;
else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
- return left_type;
+ filter_type_ctx += left_type;
else
- return SWITCHABLE_FILTERS;
+ filter_type_ctx += SWITCHABLE_FILTERS;
+
+ return filter_type_ctx;
}
#else
int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
@@ -161,8 +177,57 @@
}
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
+#define CHECK_COMP_BWD_REF(ref_frame) \
+ (((ref_frame) == cm->comp_bwd_ref[0]) || ((ref_frame) == cm->comp_bwd_ref[1]))
+
int vp10_get_reference_mode_context(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
+ int ctx;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (has_above && has_left) { // both edges available
+ if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+ // neither edge uses comp pred (0/1)
+ ctx = CHECK_COMP_BWD_REF(above_mbmi->ref_frame[0]) ^
+ CHECK_COMP_BWD_REF(left_mbmi->ref_frame[0]);
+ else if (!has_second_ref(above_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (CHECK_COMP_BWD_REF(above_mbmi->ref_frame[0]) ||
+ !is_inter_block(above_mbmi));
+ else if (!has_second_ref(left_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (CHECK_COMP_BWD_REF(left_mbmi->ref_frame[0]) ||
+ !is_inter_block(left_mbmi));
+ else // both edges use comp pred (4)
+ ctx = 4;
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!has_second_ref(edge_mbmi))
+ // edge does not use comp pred (0/1)
+ ctx = CHECK_COMP_BWD_REF(edge_mbmi->ref_frame[0]);
+ else
+ // edge uses comp pred (3)
+ ctx = 3;
+ } else { // no edges available (1)
+ ctx = 1;
+ }
+ assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+ return ctx;
+}
+
+#else // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
int ctx;
const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
@@ -203,6 +268,8 @@
return ctx;
}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
#if CONFIG_EXT_REFS
// TODO(zoeliu): Future work will be conducted to optimize the context design
@@ -618,6 +685,200 @@
#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_fwd_ref[1]);
+ else // comp pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx]
+ != cm->comp_fwd_ref[1]);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME frfa = a_sg ?
+ above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME frfl = l_sg ?
+ left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (frfa == frfl && frfa == cm->comp_fwd_ref[1]) {
+ pred_context = 0;
+ } else if (l_sg && a_sg) { // single/single
+ if ((frfa != frfl) &&
+ (frfa != cm->comp_fwd_ref[1]) && (frfl != cm->comp_fwd_ref[1]))
+ pred_context = 4;
+ else if (frfa == frfl)
+ pred_context = 3;
+ else
+ pred_context = 1;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+ if (frfc == cm->comp_fwd_ref[1] && rfs != cm->comp_fwd_ref[1])
+ pred_context = 1;
+ else if (rfs == cm->comp_fwd_ref[1] && frfc != cm->comp_fwd_ref[1])
+ pred_context = 2;
+ else
+ pred_context = 4;
+ } else if (frfa == frfl) { // comp/comp
+ pred_context = 4;
+ } else {
+ pred_context = 2;
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx]
+ != cm->comp_fwd_ref[1]);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_fwd_ref[1]);
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[1] != cm->comp_bwd_ref[1]);
+ else // comp pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx]
+ != cm->comp_bwd_ref[1]);
+ } else { // inter/inter
+ const int l_comp = has_second_ref(left_mbmi);
+ const int a_comp = has_second_ref(above_mbmi);
+
+ const MV_REFERENCE_FRAME l_brf = l_comp ?
+ left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+ const MV_REFERENCE_FRAME a_brf = a_comp ?
+ above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+
+ const MV_REFERENCE_FRAME l_frf = !l_comp ?
+ left_mbmi->ref_frame[0] : left_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME a_frf = !a_comp ?
+ above_mbmi->ref_frame[0] : above_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (l_comp && a_comp) {
+ if (l_brf == a_brf && l_brf == cm->comp_bwd_ref[1]) {
+ pred_context = 0;
+ } else if (l_brf == cm->comp_bwd_ref[1] ||
+ a_brf == cm->comp_bwd_ref[1]) {
+ pred_context = 1;
+ } else {
+ // NOTE: Backward ref should be either BWDREF or ALTREF.
+ assert(l_brf == a_brf && l_brf != cm->comp_bwd_ref[1]);
+ pred_context = 3;
+ }
+ } else if (!l_comp && !a_comp) {
+ if (l_frf == a_frf && l_frf == cm->comp_bwd_ref[1]) {
+ pred_context = 0;
+ } else if (l_frf == cm->comp_bwd_ref[1] ||
+ a_frf == cm->comp_bwd_ref[1]) {
+ pred_context = 1;
+ } else if (l_frf == a_frf) {
+ pred_context = 3;
+ } else {
+ assert(l_frf != a_frf &&
+ l_frf != cm->comp_bwd_ref[1] && a_frf != cm->comp_bwd_ref[1]);
+ pred_context = 4;
+ }
+ } else {
+ assert((l_comp && !a_comp) || (!l_comp && a_comp));
+
+ if ((l_comp && l_brf == cm->comp_bwd_ref[1] &&
+ a_frf == cm->comp_bwd_ref[1]) ||
+ (a_comp && a_brf == cm->comp_bwd_ref[1] &&
+ l_frf == cm->comp_bwd_ref[1])) {
+ pred_context = 1;
+ } else if ((l_comp && l_brf == cm->comp_bwd_ref[1]) ||
+ (a_comp && a_brf == cm->comp_bwd_ref[1]) ||
+ (!l_comp && l_frf == cm->comp_bwd_ref[1]) ||
+ (!a_comp && a_frf == cm->comp_bwd_ref[1])) {
+ pred_context = 2;
+ } else {
+ pred_context = 4;
+ }
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context = 4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx]
+ != cm->comp_bwd_ref[1]);
+ } else {
+ pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#else // CONFIG_BIDIR_PRED
+
// Returns a context number for the given MB prediction signal
int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
const MACROBLOCKD *xd) {
@@ -701,6 +962,8 @@
return pred_context;
}
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
@@ -1284,7 +1547,7 @@
if (rfs == GOLDEN_FRAME)
pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
- else if (rfs == ALTREF_FRAME)
+ else if (rfs != GOLDEN_FRAME && rfs != LAST_FRAME)
pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
else
pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
@@ -1296,8 +1559,8 @@
: above0;
pred_context = 4 * (edge0 == GOLDEN_FRAME);
} else {
- pred_context = 2 * (above0 == GOLDEN_FRAME) +
- 2 * (left0 == GOLDEN_FRAME);
+ pred_context =
+ 2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME);
}
}
}
@@ -1319,4 +1582,105 @@
return pred_context;
}
+#if CONFIG_BIDIR_PRED
+
+#define CHECK_BWDREF_OR_ALTREF(ref_frame) \
+ ((ref_frame == BWDREF_FRAME) || (ref_frame == ALTREF_FRAME))
+// For the bit to signal whether the single reference is ALTREF_FRAME or
+// BWDREF_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF_FRAME, conditioning
+// on it is either ALTREF_FRAME or BWDREF_FRAME.
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+ edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context =
+ 3 * (above0 == BWDREF_FRAME || above1 == BWDREF_FRAME ||
+ left0 == BWDREF_FRAME || left1 == BWDREF_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME srf = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf0 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above1 : left1;
+
+ if (srf == BWDREF_FRAME)
+ pred_context = 3 + (crf0 == BWDREF_FRAME || crf0 == BWDREF_FRAME);
+ else if (srf == ALTREF_FRAME)
+ pred_context = (crf0 == BWDREF_FRAME || crf1 == BWDREF_FRAME);
+ else
+ pred_context = 1 + 2 * (crf0 == BWDREF_FRAME || crf1 == BWDREF_FRAME);
+ } else {
+ if (!CHECK_BWDREF_OR_ALTREF(above0) &&
+ !CHECK_BWDREF_OR_ALTREF(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_BWDREF_OR_ALTREF(above0) ||
+ !CHECK_BWDREF_OR_ALTREF(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == BWDREF_FRAME);
+ } else {
+ pred_context =
+ 2 * (above0 == BWDREF_FRAME) + 2 * (left0 == BWDREF_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+ edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index f321599..1c3c721 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -87,7 +87,7 @@
const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_reference_mode_prob(const VP10_COMMON *cm,
- const MACROBLOCKD *xd) {
+ const MACROBLOCKD *xd) {
return cm->fc->comp_inter_prob[vp10_get_reference_mode_context(cm, xd)];
}
@@ -127,6 +127,20 @@
const int pred_context = vp10_get_pred_context_comp_ref_p3(cm, xd);
return cm->fc->comp_ref_prob[pred_context][3];
}
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_bwdref_p(cm, xd);
+ return cm->fc->comp_bwdref_prob[pred_context][0];
+}
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
@@ -143,14 +157,16 @@
return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p2(xd)][1];
}
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BIDIR_PRED
int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_pred_prob_single_ref_p3(const VP10_COMMON *cm,
const MACROBLOCKD *xd) {
return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p3(xd)][2];
}
+#endif // CONFIG_EXT_REFS || CONFIR_BIDIR_PRED
+#if CONFIG_EXT_REFS
int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
static INLINE vpx_prob vp10_get_pred_prob_single_ref_p4(const VP10_COMMON *cm,
diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c
index ecfb54c..713831b 100644
--- a/vp10/common/reconinter.c
+++ b/vp10/common/reconinter.c
@@ -11,6 +11,7 @@
#include <assert.h>
#include "./vpx_scale_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -23,217 +24,287 @@
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
-static int get_masked_weight(int m) {
+
+// Set to one to use larger codebooks
+#define USE_LARGE_WEDGE_CODEBOOK 0
+
+#define NSMOOTHERS 1
+static int get_masked_weight(int m, int smoothness) {
#define SMOOTHER_LEN 32
- static const uint8_t smoothfn[2 * SMOOTHER_LEN + 1] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 2, 4, 6, 10, 16, 23,
- 32,
- 41, 48, 54, 58, 60, 62, 63, 63,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 64, 64, 64, 64, 64, 64,
+ static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = {
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 2, 4, 7, 13, 21,
+ 32,
+ 43, 51, 57, 60, 62, 63, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64,
+ }
};
if (m < -SMOOTHER_LEN)
return 0;
else if (m > SMOOTHER_LEN)
return (1 << WEDGE_WEIGHT_BITS);
else
- return smoothfn[m + SMOOTHER_LEN];
+ return smoothfn[smoothness][m + SMOOTHER_LEN];
}
-#define WEDGE_OBLIQUE 1
-#define WEDGE_STRAIGHT 0
-#define WEDGE_PARMS 5
+// [smoother][negative][direction]
+DECLARE_ALIGNED(
+ 16, static uint8_t,
+ wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
+ [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
-// [negative][transpose][reverse]
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_mask_obl[2][2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
-// [negative][transpose]
-DECLARE_ALIGNED(16, static uint8_t,
- wedge_mask_str[2][2][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
+DECLARE_ALIGNED(
+ 16, static uint8_t,
+ wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]);
-// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
-void vp10_init_wedge_masks() {
- int i, j;
- const int w = MASK_MASTER_SIZE;
- const int h = MASK_MASTER_SIZE;
- const int stride = MASK_MASTER_STRIDE;
- const int a[4] = {2, 1, 4, 4};
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int x = (2 * j + 1 - (a[2] * w) / 4);
- int y = (2 * i + 1 - (a[3] * h) / 4);
- int m = (a[0] * x + a[1] * y) / 2;
- wedge_mask_obl[1][0][0][i * stride + j] =
- wedge_mask_obl[1][1][0][j * stride + i] =
- get_masked_weight(m);
- wedge_mask_obl[1][0][1][i * stride + w - 1 - j] =
- wedge_mask_obl[1][1][1][(w - 1 - j) * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
- wedge_mask_obl[0][0][0][i * stride + j] =
- wedge_mask_obl[0][1][0][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m);
- wedge_mask_obl[0][0][1][i * stride + w - 1 - j] =
- wedge_mask_obl[0][1][1][(w - 1 - j) * stride + i] =
- get_masked_weight(m);
- wedge_mask_str[1][0][i * stride + j] =
- wedge_mask_str[1][1][j * stride + i] =
- get_masked_weight(x);
- wedge_mask_str[0][0][i * stride + j] =
- wedge_mask_str[0][1][j * stride + i] =
- (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x);
- }
-}
-
-static const int wedge_params_sml[1 << WEDGE_BITS_SML]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
+// Some unused wedge codebooks left temporarily to facilitate experiments.
+// To be removed when setteld.
+static wedge_code_type wedge_codebook_8_hgtw[8] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 6},
};
-static const int wedge_params_med_hgtw[1 << WEDGE_BITS_MED]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_OBLIQUE, 1, 1, 4, 2},
- {WEDGE_OBLIQUE, 1, 1, 4, 6},
- {WEDGE_OBLIQUE, 1, 0, 4, 2},
- {WEDGE_OBLIQUE, 1, 0, 4, 6},
+static wedge_code_type wedge_codebook_8_hltw[8] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
};
-static const int wedge_params_med_hltw[1 << WEDGE_BITS_MED]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_OBLIQUE, 0, 1, 2, 4},
- {WEDGE_OBLIQUE, 0, 1, 6, 4},
- {WEDGE_OBLIQUE, 0, 0, 2, 4},
- {WEDGE_OBLIQUE, 0, 0, 6, 4},
+static wedge_code_type wedge_codebook_8_heqw[8] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 2},
+ {WEDGE_HORIZONTAL, 4, 6},
+ {WEDGE_VERTICAL, 2, 4},
+ {WEDGE_VERTICAL, 6, 4},
};
-static const int wedge_params_med_heqw[1 << WEDGE_BITS_MED]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_STRAIGHT, 1, 0, 4, 2},
- {WEDGE_STRAIGHT, 1, 0, 4, 6},
- {WEDGE_STRAIGHT, 0, 0, 2, 4},
- {WEDGE_STRAIGHT, 0, 0, 6, 4},
+#if !USE_LARGE_WEDGE_CODEBOOK
+static const wedge_code_type wedge_codebook_16_hgtw[16] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 2},
+ {WEDGE_HORIZONTAL, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 6},
+ {WEDGE_VERTICAL, 4, 4},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
};
-static const int wedge_params_big_hgtw[1 << WEDGE_BITS_BIG]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_OBLIQUE, 1, 1, 4, 2},
- {WEDGE_OBLIQUE, 1, 1, 4, 6},
- {WEDGE_OBLIQUE, 1, 0, 4, 2},
- {WEDGE_OBLIQUE, 1, 0, 4, 6},
-
- {WEDGE_OBLIQUE, 0, 1, 2, 4},
- {WEDGE_OBLIQUE, 0, 1, 6, 4},
- {WEDGE_OBLIQUE, 0, 0, 2, 4},
- {WEDGE_OBLIQUE, 0, 0, 6, 4},
-
- {WEDGE_STRAIGHT, 1, 0, 4, 2},
- {WEDGE_STRAIGHT, 1, 0, 4, 4},
- {WEDGE_STRAIGHT, 1, 0, 4, 6},
- {WEDGE_STRAIGHT, 0, 0, 4, 4},
+static const wedge_code_type wedge_codebook_16_hltw[16] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_VERTICAL, 2, 4},
+ {WEDGE_VERTICAL, 4, 4},
+ {WEDGE_VERTICAL, 6, 4},
+ {WEDGE_HORIZONTAL, 4, 4},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
};
-static const int wedge_params_big_hltw[1 << WEDGE_BITS_BIG]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_OBLIQUE, 1, 1, 4, 2},
- {WEDGE_OBLIQUE, 1, 1, 4, 6},
- {WEDGE_OBLIQUE, 1, 0, 4, 2},
- {WEDGE_OBLIQUE, 1, 0, 4, 6},
-
- {WEDGE_OBLIQUE, 0, 1, 2, 4},
- {WEDGE_OBLIQUE, 0, 1, 6, 4},
- {WEDGE_OBLIQUE, 0, 0, 2, 4},
- {WEDGE_OBLIQUE, 0, 0, 6, 4},
-
- {WEDGE_STRAIGHT, 0, 0, 2, 4},
- {WEDGE_STRAIGHT, 0, 0, 4, 4},
- {WEDGE_STRAIGHT, 0, 0, 6, 4},
- {WEDGE_STRAIGHT, 1, 0, 4, 4},
+static const wedge_code_type wedge_codebook_16_heqw[16] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 2},
+ {WEDGE_HORIZONTAL, 4, 6},
+ {WEDGE_VERTICAL, 2, 4},
+ {WEDGE_VERTICAL, 6, 4},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
};
-static const int wedge_params_big_heqw[1 << WEDGE_BITS_BIG]
- [WEDGE_PARMS] = {
- {WEDGE_OBLIQUE, 1, 1, 4, 4},
- {WEDGE_OBLIQUE, 1, 0, 4, 4},
- {WEDGE_OBLIQUE, 0, 1, 4, 4},
- {WEDGE_OBLIQUE, 0, 0, 4, 4},
-
- {WEDGE_OBLIQUE, 1, 1, 4, 2},
- {WEDGE_OBLIQUE, 1, 1, 4, 6},
- {WEDGE_OBLIQUE, 1, 0, 4, 2},
- {WEDGE_OBLIQUE, 1, 0, 4, 6},
-
- {WEDGE_OBLIQUE, 0, 1, 2, 4},
- {WEDGE_OBLIQUE, 0, 1, 6, 4},
- {WEDGE_OBLIQUE, 0, 0, 2, 4},
- {WEDGE_OBLIQUE, 0, 0, 6, 4},
-
- {WEDGE_STRAIGHT, 1, 0, 4, 2},
- {WEDGE_STRAIGHT, 1, 0, 4, 6},
- {WEDGE_STRAIGHT, 0, 0, 2, 4},
- {WEDGE_STRAIGHT, 0, 0, 6, 4},
-};
-
-static const int *get_wedge_params_lookup[BLOCK_SIZES] = {
- NULL,
- NULL,
- NULL,
- &wedge_params_sml[0][0],
- &wedge_params_med_hgtw[0][0],
- &wedge_params_med_hltw[0][0],
- &wedge_params_med_heqw[0][0],
- &wedge_params_med_hgtw[0][0],
- &wedge_params_med_hltw[0][0],
- &wedge_params_med_heqw[0][0],
- &wedge_params_big_hgtw[0][0],
- &wedge_params_big_hltw[0][0],
- &wedge_params_big_heqw[0][0],
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {4, wedge_codebook_16_heqw, wedge_signflip_lookup[3], 0},
+ {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[4], 0},
+ {4, wedge_codebook_16_hltw, wedge_signflip_lookup[5], 0},
+ {4, wedge_codebook_16_heqw, wedge_signflip_lookup[6], 0},
+ {4, wedge_codebook_16_hgtw, wedge_signflip_lookup[7], 0},
+ {4, wedge_codebook_16_hltw, wedge_signflip_lookup[8], 0},
+ {4, wedge_codebook_16_heqw, wedge_signflip_lookup[9], 0},
+ {0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0},
+ {0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0},
+ {0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0},
#if CONFIG_EXT_PARTITION
- &wedge_params_big_hgtw[0][0],
- &wedge_params_big_hltw[0][0],
- &wedge_params_big_heqw[0][0],
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
#endif // CONFIG_EXT_PARTITION
};
-static const int *get_wedge_params(int wedge_index,
- BLOCK_SIZE sb_type) {
- const int *a = NULL;
- if (wedge_index != WEDGE_NONE) {
- return get_wedge_params_lookup[sb_type] + WEDGE_PARMS * wedge_index;
- }
- return a;
-}
+#else
+
+static const wedge_code_type wedge_codebook_32_hgtw[32] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 2},
+ {WEDGE_HORIZONTAL, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 6},
+ {WEDGE_VERTICAL, 4, 4},
+ {WEDGE_OBLIQUE27, 4, 1},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 3},
+ {WEDGE_OBLIQUE27, 4, 5},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE27, 4, 7},
+ {WEDGE_OBLIQUE153, 4, 1},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 3},
+ {WEDGE_OBLIQUE153, 4, 5},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 7},
+ {WEDGE_OBLIQUE63, 1, 4},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 3, 4},
+ {WEDGE_OBLIQUE63, 5, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE63, 7, 4},
+ {WEDGE_OBLIQUE117, 1, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 3, 4},
+ {WEDGE_OBLIQUE117, 5, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
+ {WEDGE_OBLIQUE117, 7, 4},
+};
+
+static const wedge_code_type wedge_codebook_32_hltw[32] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_VERTICAL, 2, 4},
+ {WEDGE_VERTICAL, 4, 4},
+ {WEDGE_VERTICAL, 6, 4},
+ {WEDGE_HORIZONTAL, 4, 4},
+ {WEDGE_OBLIQUE27, 4, 1},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 3},
+ {WEDGE_OBLIQUE27, 4, 5},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE27, 4, 7},
+ {WEDGE_OBLIQUE153, 4, 1},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 3},
+ {WEDGE_OBLIQUE153, 4, 5},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 7},
+ {WEDGE_OBLIQUE63, 1, 4},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 3, 4},
+ {WEDGE_OBLIQUE63, 5, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE63, 7, 4},
+ {WEDGE_OBLIQUE117, 1, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 3, 4},
+ {WEDGE_OBLIQUE117, 5, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
+ {WEDGE_OBLIQUE117, 7, 4},
+};
+
+static const wedge_code_type wedge_codebook_32_heqw[32] = {
+ {WEDGE_OBLIQUE27, 4, 4},
+ {WEDGE_OBLIQUE63, 4, 4},
+ {WEDGE_OBLIQUE117, 4, 4},
+ {WEDGE_OBLIQUE153, 4, 4},
+ {WEDGE_HORIZONTAL, 4, 2},
+ {WEDGE_HORIZONTAL, 4, 6},
+ {WEDGE_VERTICAL, 2, 4},
+ {WEDGE_VERTICAL, 6, 4},
+ {WEDGE_OBLIQUE27, 4, 1},
+ {WEDGE_OBLIQUE27, 4, 2},
+ {WEDGE_OBLIQUE27, 4, 3},
+ {WEDGE_OBLIQUE27, 4, 5},
+ {WEDGE_OBLIQUE27, 4, 6},
+ {WEDGE_OBLIQUE27, 4, 7},
+ {WEDGE_OBLIQUE153, 4, 1},
+ {WEDGE_OBLIQUE153, 4, 2},
+ {WEDGE_OBLIQUE153, 4, 3},
+ {WEDGE_OBLIQUE153, 4, 5},
+ {WEDGE_OBLIQUE153, 4, 6},
+ {WEDGE_OBLIQUE153, 4, 7},
+ {WEDGE_OBLIQUE63, 1, 4},
+ {WEDGE_OBLIQUE63, 2, 4},
+ {WEDGE_OBLIQUE63, 3, 4},
+ {WEDGE_OBLIQUE63, 5, 4},
+ {WEDGE_OBLIQUE63, 6, 4},
+ {WEDGE_OBLIQUE63, 7, 4},
+ {WEDGE_OBLIQUE117, 1, 4},
+ {WEDGE_OBLIQUE117, 2, 4},
+ {WEDGE_OBLIQUE117, 3, 4},
+ {WEDGE_OBLIQUE117, 5, 4},
+ {WEDGE_OBLIQUE117, 6, 4},
+ {WEDGE_OBLIQUE117, 7, 4},
+};
+
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {5, wedge_codebook_32_heqw, wedge_signflip_lookup[3], 0},
+ {5, wedge_codebook_32_hgtw, wedge_signflip_lookup[4], 0},
+ {5, wedge_codebook_32_hltw, wedge_signflip_lookup[5], 0},
+ {5, wedge_codebook_32_heqw, wedge_signflip_lookup[6], 0},
+ {5, wedge_codebook_32_hgtw, wedge_signflip_lookup[7], 0},
+ {5, wedge_codebook_32_hltw, wedge_signflip_lookup[8], 0},
+ {5, wedge_codebook_32_heqw, wedge_signflip_lookup[9], 0},
+ {0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0},
+ {0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0},
+ {0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0},
+#if CONFIG_EXT_PARTITION
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+ {0, NULL, NULL, 0},
+#endif // CONFIG_EXT_PARTITION
+};
+#endif // USE_LARGE_WEDGE_CODEBOOK
static const uint8_t *get_wedge_mask_inplace(int wedge_index,
int neg,
@@ -241,14 +312,17 @@
const uint8_t *master;
const int bh = 4 << b_height_log2_lookup[sb_type];
const int bw = 4 << b_width_log2_lookup[sb_type];
- const int *a = get_wedge_params(wedge_index, sb_type);
+ const wedge_code_type *a =
+ wedge_params_lookup[sb_type].codebook + wedge_index;
+ const int smoother = wedge_params_lookup[sb_type].smoother;
int woff, hoff;
- if (!a) return NULL;
- woff = (a[3] * bw) >> 3;
- hoff = (a[4] * bh) >> 3;
- master = (a[0] ?
- wedge_mask_obl[neg][a[1]][a[2]] :
- wedge_mask_str[neg][a[1]]) +
+ const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
+
+ assert(wedge_index >= 0 &&
+ wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
+ woff = (a->x_offset * bw) >> 3;
+ hoff = (a->y_offset * bh) >> 3;
+ master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
MASK_MASTER_SIZE / 2 - woff;
return master;
@@ -266,124 +340,76 @@
return mask;
}
-static void build_masked_compound(uint8_t *dst, int dst_stride,
- uint8_t *dst1, int dst1_stride,
- uint8_t *dst2, int dst2_stride,
- const uint8_t *mask,
- int h, int w, int subh, int subw) {
- int i, j;
- if (subw == 0 && subh == 0) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = mask[i * MASK_MASTER_STRIDE + j];
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
-
- }
- } else if (subw == 1 && subh == 1) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j)] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j)] +
- mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j + 1)] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j + 1)] + 2) >> 2;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
- } else if (subw == 1 && subh == 0) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[i * MASK_MASTER_STRIDE + (2 * j)] +
- mask[i * MASK_MASTER_STRIDE + (2 * j + 1)] + 1) >> 1;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
- } else {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[(2 * i) * MASK_MASTER_STRIDE + j] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + j] + 1) >> 1;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
+// If the signs for the wedges for various blocksizes are
+// inconsistent flip the sign flag. Do it only once for every
+// wedge codebook.
+static void init_wedge_signs() {
+ BLOCK_SIZE sb_type;
+ memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
+ for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) {
+ const int bw = 4 * num_4x4_blocks_wide_lookup[sb_type];
+ const int bh = 4 * num_4x4_blocks_high_lookup[sb_type];
+ const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
+ const int wbits = wedge_params.bits;
+ const int wtypes = 1 << wbits;
+ int i, w;
+ if (wbits == 0) continue;
+ for (w = 0; w < wtypes; ++w) {
+ const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
+ int sum = 0;
+ for (i = 0; i < bw; ++i)
+ sum += mask[i];
+ for (i = 0; i < bh; ++i)
+ sum += mask[i * MASK_MASTER_STRIDE];
+ sum = (sum + (bw + bh) / 2) / (bw + bh);
+ wedge_params.signflip[w] = (sum < 32);
+ }
}
}
-#if CONFIG_VP9_HIGHBITDEPTH
-static void build_masked_compound_highbd(uint8_t *dst_8, int dst_stride,
- uint8_t *dst1_8, int dst1_stride,
- uint8_t *dst2_8, int dst2_stride,
- const uint8_t *mask,
- int h, int w, int subh, int subw) {
- int i, j;
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
- uint16_t *dst1 = CONVERT_TO_SHORTPTR(dst1_8);
- uint16_t *dst2 = CONVERT_TO_SHORTPTR(dst2_8);
- if (subw == 0 && subh == 0) {
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
+void vp10_init_wedge_masks() {
+ int i, j, s;
+ const int w = MASK_MASTER_SIZE;
+ const int h = MASK_MASTER_SIZE;
+ const int stride = MASK_MASTER_STRIDE;
+ const int a[2] = {2, 1};
+ const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+ for (s = 0; s < NSMOOTHERS; s++) {
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) {
- int m = mask[i * MASK_MASTER_STRIDE + j];
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
- } else if (subw == 1 && subh == 1) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j)] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j)] +
- mask[(2 * i) * MASK_MASTER_STRIDE + (2 * j + 1)] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + (2 * j + 1)] + 2) >> 2;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
- } else if (subw == 1 && subh == 0) {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[i * MASK_MASTER_STRIDE + (2 * j)] +
- mask[i * MASK_MASTER_STRIDE + (2 * j + 1)] + 1) >> 1;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
- }
- } else {
- for (i = 0; i < h; ++i)
- for (j = 0; j < w; ++j) {
- int m = (mask[(2 * i) * MASK_MASTER_STRIDE + j] +
- mask[(2 * i + 1) * MASK_MASTER_STRIDE + j] + 1) >> 1;
- dst[i * dst_stride + j] = (dst1[i * dst1_stride + j] * m +
- dst2[i * dst2_stride + j] *
- ((1 << WEDGE_WEIGHT_BITS) - m) +
- (1 << (WEDGE_WEIGHT_BITS - 1))) >>
- WEDGE_WEIGHT_BITS;
+ int x = (2 * j + 1 - w);
+ int y = (2 * i + 1 - h);
+ int m = (int)rint((a[0] * x + a[1] * y) / asqrt);
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
+ get_masked_weight(m, s);
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] =
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+ get_masked_weight(m, s);
+ wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
+ wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
+ get_masked_weight(x, s);
+ wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] =
+ wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s);
}
}
+ init_wedge_signs();
}
-#endif // CONFIG_VP9_HIGHBITDEPTH
+
#if CONFIG_SUPERTX
static void build_masked_compound_wedge_extend(
uint8_t *dst, int dst_stride,
- uint8_t *dst2, int dst2_stride,
+ uint8_t *src0, int src0_stride,
+ uint8_t *src1, int src1_stride,
int wedge_index,
int wedge_sign,
BLOCK_SIZE sb_type,
@@ -393,33 +419,39 @@
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
const uint8_t *mask = vp10_get_soft_mask(
wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y);
- build_masked_compound(dst, dst_stride,
- dst, dst_stride, dst2, dst2_stride, mask,
- h, w, subh, subw);
+ vpx_blend_mask6(dst, dst_stride,
+ src0, src0_stride,
+ src1, src1_stride,
+ mask, MASK_MASTER_STRIDE,
+ h, w, subh, subw);
}
#if CONFIG_VP9_HIGHBITDEPTH
static void build_masked_compound_wedge_extend_highbd(
uint8_t *dst_8, int dst_stride,
- uint8_t *dst2_8, int dst2_stride,
+ uint8_t *src0_8, int src0_stride,
+ uint8_t *src1_8, int src1_stride,
int wedge_index, int wedge_sign,
BLOCK_SIZE sb_type,
int wedge_offset_x, int wedge_offset_y,
- int h, int w) {
+ int h, int w, int bd) {
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
const uint8_t *mask = vp10_get_soft_mask(
wedge_index, wedge_sign, sb_type, wedge_offset_x, wedge_offset_y);
- build_masked_compound_highbd(dst_8, dst_stride,
- dst_8, dst_stride, dst2_8, dst2_stride, mask,
- h, w, subh, subw);
+ vpx_highbd_blend_mask6(dst_8, dst_stride,
+ src0_8, src0_stride,
+ src1_8, src1_stride,
+ mask, MASK_MASTER_STRIDE,
+ h, w, subh, subw, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#else // CONFIG_SUPERTX
static void build_masked_compound_wedge(uint8_t *dst, int dst_stride,
- uint8_t *dst2, int dst2_stride,
+ uint8_t *src0, int src0_stride,
+ uint8_t *src1, int src1_stride,
int wedge_index, int wedge_sign,
BLOCK_SIZE sb_type,
int h, int w) {
@@ -429,26 +461,31 @@
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
sb_type, 0, 0);
- build_masked_compound(dst, dst_stride,
- dst, dst_stride, dst2, dst2_stride, mask,
- h, w, subh, subw);
+ vpx_blend_mask6(dst, dst_stride,
+ src0, src0_stride,
+ src1, src1_stride,
+ mask, MASK_MASTER_STRIDE,
+ h, w, subh, subw);
}
#if CONFIG_VP9_HIGHBITDEPTH
static void build_masked_compound_wedge_highbd(uint8_t *dst_8, int dst_stride,
- uint8_t *dst2_8, int dst2_stride,
+ uint8_t *src0_8, int src0_stride,
+ uint8_t *src1_8, int src1_stride,
int wedge_index, int wedge_sign,
BLOCK_SIZE sb_type,
- int h, int w) {
+ int h, int w, int bd) {
// Derive subsampling from h and w passed in. May be refactored to
// pass in subsampling factors directly.
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign,
sb_type, 0, 0);
- build_masked_compound_highbd(dst_8, dst_stride,
- dst_8, dst_stride, dst2_8, dst2_stride, mask,
- h, w, subh, subw);
+ vpx_highbd_blend_mask6(dst_8, dst_stride,
+ src0_8, src0_stride,
+ src1_8, src1_stride,
+ mask, MASK_MASTER_STRIDE,
+ h, w, subh, subw, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_SUPERTX
@@ -493,14 +530,18 @@
#if CONFIG_SUPERTX
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_wedge_extend_highbd(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
- wedge_offset_x, wedge_offset_y, h, w);
+ wedge_offset_x, wedge_offset_y, h, w, xd->bd);
else
build_masked_compound_wedge_extend(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
@@ -508,13 +549,17 @@
#else
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
build_masked_compound_wedge_highbd(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type, h, w);
+ mi->mbmi.sb_type, h, w, xd->bd);
else
build_masked_compound_wedge(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
@@ -526,14 +571,18 @@
tmp_ipf, xs, ys, xd);
#if CONFIG_SUPERTX
build_masked_compound_wedge_extend(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type,
wedge_offset_x, wedge_offset_y, h, w);
#else
build_masked_compound_wedge(
- dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ dst, dst_stride,
+ dst, dst_stride,
+ tmp_dst, MAX_SB_SIZE,
mi->mbmi.interinter_wedge_index,
mi->mbmi.interinter_wedge_sign,
mi->mbmi.sb_type, h, w);
@@ -615,6 +664,74 @@
const int is_compound = has_second_ref(&mi->mbmi);
int ref;
+#if CONFIG_DUAL_FILTER
+ if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) {
+ int blk_num = 1 << (pd->subsampling_x + pd->subsampling_y);
+ int chr_idx;
+ int x_base = x;
+ int y_base = y;
+ int x_step = w >> pd->subsampling_x;
+ int y_step = h >> pd->subsampling_y;
+
+ for (chr_idx = 0; chr_idx < blk_num; ++chr_idx) {
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *dst = dst_buf->buf;
+ const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh,
+ pd->subsampling_x,
+ pd->subsampling_y);
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = vp10_is_scaled(sf);
+
+ x = x_base + (chr_idx & 0x01) * x_step;
+ y = y_base + (chr_idx >> 1) * y_step;
+
+ dst += dst_buf->stride * y + x;
+
+ if (is_scaled) {
+ pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + y * pre_buf->stride + x;
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride
+ + (scaled_mv.col >> SUBPEL_BITS);
+
+ #if CONFIG_EXT_INTER
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+ mi->mbmi.use_wedge_interinter)
+ vp10_make_masked_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, w, h,
+ mi->mbmi.interp_filter, xs, ys,
+ #if CONFIG_SUPERTX
+ wedge_offset_x, wedge_offset_y,
+ #endif // CONFIG_SUPERTX
+ xd);
+ else
+ #endif // CONFIG_EXT_INTER
+ vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, x_step, y_step, ref,
+ mi->mbmi.interp_filter, xs, ys, xd);
+ }
+ }
+ return;
+ }
+#endif
+
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
@@ -1728,10 +1845,11 @@
bsize, 0, 0);
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
- build_masked_compound(comppred, compstride,
- intrapred, intrastride,
- interpred, interstride, mask,
- bh, bw, subh, subw);
+ vpx_blend_mask6(comppred, compstride,
+ intrapred, intrastride,
+ interpred, interstride,
+ mask, MASK_MASTER_STRIDE,
+ bh, bw, subh, subw);
}
return;
}
@@ -1851,7 +1969,6 @@
uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
- (void) bd;
if (use_wedge_interintra) {
if (is_interintra_wedge_used(bsize)) {
@@ -1859,10 +1976,11 @@
bsize, 0, 0);
const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
- build_masked_compound_highbd(comppred8, compstride,
- intrapred8, intrastride,
- interpred8, interstride, mask,
- bh, bw, subh, subw);
+ vpx_highbd_blend_mask6(comppred8, compstride,
+ intrapred8, intrastride,
+ interpred8, interstride,
+ mask, MASK_MASTER_STRIDE,
+ bh, bw, subh, subw, bd);
}
return;
}
@@ -2263,113 +2381,74 @@
int ext_dst_stride0,
uint8_t *ext_dst1,
int ext_dst_stride1) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- const MODE_INFO *mi = xd->mi[0];
- const int is_compound = has_second_ref(&mi->mbmi);
- int ref;
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int is_compound = has_second_ref(mbmi);
+ MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
(void) block;
(void) bw;
(void) bh;
(void) mi_x;
(void) mi_y;
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- struct buf_2d *const dst_buf = &pd->dst;
- uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-
- if (ref && is_interinter_wedge_used(mi->mbmi.sb_type)
- && mi->mbmi.use_wedge_interinter) {
-#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
- uint8_t *tmp_dst =
- (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
- CONVERT_TO_BYTEPTR(tmp_dst_) : tmp_dst_;
-#else
- DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(tmp_dst_ + 2 * MAX_SB_SIZE * k, ext_dst1 +
- ext_dst_stride1 * 2 * k, w * 2);
- } else {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(tmp_dst_ + MAX_SB_SIZE * k, ext_dst1 +
- ext_dst_stride1 * k, w);
- }
-#else
- {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(tmp_dst + MAX_SB_SIZE * k, ext_dst1 +
- ext_dst_stride1 * k, w);
- }
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
+ if (is_compound
+ && is_interinter_wedge_used(mbmi->sb_type)
+ && mbmi->use_wedge_interinter) {
#if CONFIG_SUPERTX
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_masked_compound_wedge_extend_highbd(
- dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
- mi->mbmi.interinter_wedge_index,
- mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type,
- wedge_offset_x, wedge_offset_y, h, w);
- } else {
- build_masked_compound_wedge_extend(
- dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
- mi->mbmi.interinter_wedge_index,
- mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type,
- wedge_offset_x, wedge_offset_y, h, w);
- }
-#else
- build_masked_compound_wedge_extend(dst, dst_buf->stride,
- tmp_dst, MAX_SB_SIZE,
- mi->mbmi.interinter_wedge_index,
- mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type,
- wedge_offset_x, wedge_offset_y, h, w);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ build_masked_compound_wedge_extend_highbd(
+ dst, dst_buf->stride,
+ CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+ CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
+ mbmi->sb_type,
+ wedge_offset_x, wedge_offset_y, h, w,
+ xd->bd);
+ else
#endif // CONFIG_VP9_HIGHBITDEPTH
+ build_masked_compound_wedge_extend(
+ dst, dst_buf->stride,
+ ext_dst0, ext_dst_stride0,
+ ext_dst1, ext_dst_stride1,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
+ mbmi->sb_type,
+ wedge_offset_x, wedge_offset_y, h, w);
#else // CONFIG_SUPERTX
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- build_masked_compound_wedge_highbd(dst, dst_buf->stride, tmp_dst,
- MAX_SB_SIZE,
- mi->mbmi.interinter_wedge_index,
- mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type, h, w);
- else
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ build_masked_compound_wedge_highbd(
+ dst, dst_buf->stride,
+ CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+ CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
+ mbmi->sb_type, h, w,
+ xd->bd);
+ else
#endif // CONFIG_VP9_HIGHBITDEPTH
- build_masked_compound_wedge(dst, dst_buf->stride, tmp_dst, MAX_SB_SIZE,
- mi->mbmi.interinter_wedge_index,
- mi->mbmi.interinter_wedge_sign,
- mi->mbmi.sb_type, h, w);
+ build_masked_compound_wedge(
+ dst, dst_buf->stride,
+ ext_dst0, ext_dst_stride0,
+ ext_dst1, ext_dst_stride1,
+ mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign,
+ mbmi->sb_type, h, w);
#endif // CONFIG_SUPERTX
- } else {
+ } else {
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(CONVERT_TO_SHORTPTR(dst + dst_buf->stride * k),
- ext_dst0 + ext_dst_stride0 * 2 * k, w * 2);
- } else {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(dst + dst_buf->stride * k,
- ext_dst0 + ext_dst_stride0 * k, w);
- }
-#else
- {
- int k;
- for (k = 0; k < h; ++k)
- memcpy(dst + dst_buf->stride * k,
- ext_dst0 + ext_dst_stride0 * k, w);
- }
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ vpx_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+ dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
+ xd->bd);
+ else
#endif // CONFIG_VP9_HIGHBITDEPTH
- }
+ vpx_convolve_copy(ext_dst0, ext_dst_stride0,
+ dst, dst_buf->stride, NULL, 0, NULL, 0, w, h);
}
}
diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h
index 1e8679b..5d9a6f9 100644
--- a/vp10/common/reconinter.h
+++ b/vp10/common/reconinter.h
@@ -44,7 +44,8 @@
#if CONFIG_DUAL_FILTER
if (interp_filter_params_x.taps == SUBPEL_TAPS &&
- interp_filter_params_y.taps == SUBPEL_TAPS) {
+ interp_filter_params_y.taps == SUBPEL_TAPS &&
+ w > 2 && h > 2) {
const int16_t *kernel_x =
vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
const int16_t *kernel_y =
@@ -106,7 +107,8 @@
#if CONFIG_DUAL_FILTER
if (interp_filter_params_x.taps == SUBPEL_TAPS &&
- interp_filter_params_y.taps == SUBPEL_TAPS) {
+ interp_filter_params_y.taps == SUBPEL_TAPS &&
+ w > 2 && h > 2) {
const int16_t *kernel_x =
vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
const int16_t *kernel_y =
@@ -146,6 +148,64 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTER
+#define MAX_WEDGE_TYPES (1 << 5)
+
+#define WEDGE_WEIGHT_BITS 6
+
+#define WEDGE_NONE -1
+
+// Angles are with respect to horizontal anti-clockwise
+typedef enum {
+ WEDGE_HORIZONTAL = 0,
+ WEDGE_VERTICAL = 1,
+ WEDGE_OBLIQUE27 = 2,
+ WEDGE_OBLIQUE63 = 3,
+ WEDGE_OBLIQUE117 = 4,
+ WEDGE_OBLIQUE153 = 5,
+ WEDGE_DIRECTIONS
+} WedgeDirectionType;
+
+// 3-tuple: {direction, x_offset, y_offset}
+typedef struct {
+ WedgeDirectionType direction;
+ int x_offset;
+ int y_offset;
+} wedge_code_type;
+
+typedef struct {
+ int bits;
+ const wedge_code_type *codebook;
+ uint8_t *signflip;
+ int smoother;
+} wedge_params_type;
+
+extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES];
+
+static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].bits;
+}
+
+static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
+ (void) sb_type;
+ return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
+ const int wbits = wedge_params_lookup[sb_type].bits;
+ return (wbits > 0) ? wbits + 1 : 0;
+}
+
+static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
+ (void) sb_type;
+ return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].bits;
+}
+#endif // CONFIG_EXT_INTER
+
void build_inter_predictors(MACROBLOCKD *xd, int plane,
#if CONFIG_OBMC
int mi_col_offset, int mi_row_offset,
@@ -385,10 +445,10 @@
#if CONFIG_DUAL_FILTER
// Detect if the block have sub-pixel level motion vectors
// per component.
-static INLINE int has_subpel_mv_component(const MACROBLOCKD *const xd,
+static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
+ const MACROBLOCKD *const xd,
int dir) {
- MODE_INFO *const mi = xd->mi[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mbmi->sb_type;
int plane;
int ref = (dir >> 1);
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 6b4a460..fa20f2c 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -391,7 +391,6 @@
#if CONFIG_EXT_INTRA
#define FILTER_INTRA_PREC_BITS 10
-#define FILTER_INTRA_ROUND_VAL 511
static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
NEED_LEFT | NEED_ABOVE, // FILTER_DC
@@ -774,9 +773,7 @@
for (c = 1; c < 2 * bs + 1 - r; ++c) {
ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
- pred[r][c] = ipred < 0 ?
- -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
- ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
@@ -1050,9 +1047,7 @@
for (c = 1; c < 2 * bs + 1 - r; ++c) {
ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
- pred[r][c] = ipred < 0 ?
- -((-ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS) :
- ((ipred + FILTER_INTRA_ROUND_VAL) >> FILTER_INTRA_PREC_BITS);
+ pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
}
for (r = 0; r < bs; ++r) {
diff --git a/vp10/common/vp10_convolve.c b/vp10/common/vp10_convolve.c
index 6514a60..d7e2eaf 100644
--- a/vp10/common/vp10_convolve.c
+++ b/vp10/common/vp10_convolve.c
@@ -142,13 +142,21 @@
MAX_BLOCK_WIDTH];
int temp_stride = MAX_BLOCK_WIDTH;
#if CONFIG_DUAL_FILTER
- InterpFilterParams filter_params =
+ InterpFilterParams filter_params_x =
vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+ InterpFilterParams filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+ InterpFilterParams filter_params = filter_params_x;
+
+ // The filter size implies the required number of reference pixels for
+ // the second stage filtering. It is possible that the two directions
+ // require different filter sizes.
+ int filter_size = filter_params_y.taps;
#else
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
-#endif
int filter_size = filter_params.taps;
+#endif
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
@@ -159,7 +167,7 @@
subpel_x_q4, x_step_q4, 0);
#if CONFIG_DUAL_FILTER
- filter_params = vp10_get_interp_filter_params(interp_filter[2 * ref_idx]);
+ filter_params = filter_params_y;
#else
filter_params = vp10_get_interp_filter_params(interp_filter);
#endif
@@ -312,13 +320,17 @@
int temp_stride = MAX_BLOCK_WIDTH;
#if CONFIG_DUAL_FILTER
- InterpFilterParams filter_params =
+ InterpFilterParams filter_params_x =
vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+ InterpFilterParams filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+ InterpFilterParams filter_params = filter_params_x;
+ int filter_size = filter_params_y.taps;
#else
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
-#endif
int filter_size = filter_params.taps;
+#endif
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
@@ -328,9 +340,7 @@
filter_params, subpel_x_q4, x_step_q4, 0, bd);
#if CONFIG_DUAL_FILTER
- filter_params = vp10_get_interp_filter_params(interp_filter[2 * ref_idx]);
-#else
- filter_params = vp10_get_interp_filter_params(interp_filter);
+ filter_params = filter_params_y;
#endif
filter_size = filter_params.taps;
assert(filter_params.taps <= MAX_FILTER_TAP);
diff --git a/vp10/common/vp10_fwd_txfm2d.c b/vp10/common/vp10_fwd_txfm2d.c
index cd5ce71..91a5357 100644
--- a/vp10/common/vp10_fwd_txfm2d.c
+++ b/vp10/common/vp10_fwd_txfm2d.c
@@ -19,31 +19,22 @@
switch (txfm_type) {
case TXFM_TYPE_DCT4:
return vp10_fdct4_new;
- break;
case TXFM_TYPE_DCT8:
return vp10_fdct8_new;
- break;
case TXFM_TYPE_DCT16:
return vp10_fdct16_new;
- break;
case TXFM_TYPE_DCT32:
return vp10_fdct32_new;
- break;
case TXFM_TYPE_DCT64:
return vp10_fdct64_new;
- break;
case TXFM_TYPE_ADST4:
return vp10_fadst4_new;
- break;
case TXFM_TYPE_ADST8:
return vp10_fadst8_new;
- break;
case TXFM_TYPE_ADST16:
return vp10_fadst16_new;
- break;
case TXFM_TYPE_ADST32:
return vp10_fadst32_new;
- break;
default:
assert(0);
return NULL;
@@ -51,180 +42,153 @@
}
static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
- const int stride, const TXFM_2D_CFG *cfg,
+ const int stride, const TXFM_2D_FLIP_CFG *cfg,
int32_t *buf) {
- int i, j;
- const int txfm_size = cfg->txfm_size;
- const int8_t *shift = cfg->shift;
- const int8_t *stage_range_col = cfg->stage_range_col;
- const int8_t *stage_range_row = cfg->stage_range_row;
- const int8_t *cos_bit_col = cfg->cos_bit_col;
- const int8_t *cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
- const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
+ int c, r;
+ const int txfm_size = cfg->cfg->txfm_size;
+ const int8_t *shift = cfg->cfg->shift;
+ const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
// use output buffer as temp buffer
int32_t* temp_in = output;
int32_t* temp_out = output + txfm_size;
// Columns
- for (i = 0; i < txfm_size; ++i) {
- for (j = 0; j < txfm_size; ++j)
- temp_in[j] = input[j * stride + i];
+ for (c = 0; c < txfm_size; ++c) {
+ if (cfg->ud_flip == 0) {
+ for (r = 0; r < txfm_size; ++r)
+ temp_in[r] = input[r * stride + c];
+ } else {
+ for (r = 0; r < txfm_size; ++r)
+ // flip upside down
+ temp_in[r] = input[(txfm_size - r - 1) * stride + c];
+ }
round_shift_array(temp_in, txfm_size, -shift[0]);
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
round_shift_array(temp_out, txfm_size, -shift[1]);
- for (j = 0; j < txfm_size; ++j)
- buf[j * txfm_size + i] = temp_out[j];
+ if (cfg->lr_flip == 0) {
+ for (r = 0; r < txfm_size; ++r)
+ buf[r * txfm_size + c] = temp_out[r];
+ } else {
+ for (r = 0; r < txfm_size; ++r)
+ // flip from left to right
+ buf[r * txfm_size + (txfm_size - c - 1)] = temp_out[r];
+ }
}
// Rows
- for (i = 0; i < txfm_size; ++i) {
- txfm_func_row(buf + i * txfm_size, output + i * txfm_size, cos_bit_row,
+ for (r = 0; r < txfm_size; ++r) {
+ txfm_func_row(buf + r * txfm_size, output + r * txfm_size, cos_bit_row,
stage_range_row);
- round_shift_array(output + i * txfm_size, txfm_size, -shift[2]);
+ round_shift_array(output + r * txfm_size, txfm_size, -shift[2]);
}
}
void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[4 * 4];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_4x4_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_4X4);
(void)bd;
- fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[8 * 8];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_8x8_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_8X8);
(void)bd;
- fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[16 * 16];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_16x16_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_16X16);
(void)bd;
- fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[32 * 32];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
(void)bd;
- fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output,
- const int stride, int tx_type,
- const int bd) {
+ const int stride, int tx_type,
+ const int bd) {
int32_t txfm_buf[64 * 64];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
(void)bd;
- fwd_txfm2d_c(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
}
-const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_4;
- break;
- case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_4;
- break;
- case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_4;
- break;
- case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_4;
- break;
- default:
- assert(0);
- }
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG* fwd_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+ {&fwd_txfm_2d_cfg_dct_dct_4 , &fwd_txfm_2d_cfg_dct_dct_8,
+ &fwd_txfm_2d_cfg_dct_dct_16 , &fwd_txfm_2d_cfg_dct_dct_32},
+ {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32},
+ {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32},
+ {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32},
+ {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32},
+ {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32},
+ {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32},
+ {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32},
+ {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32},
+};
+#else // CONFIG_EXT_TX
+static const TXFM_2D_CFG* fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+ {&fwd_txfm_2d_cfg_dct_dct_4 , &fwd_txfm_2d_cfg_dct_dct_8,
+ &fwd_txfm_2d_cfg_dct_dct_16 , &fwd_txfm_2d_cfg_dct_dct_32},
+ {&fwd_txfm_2d_cfg_adst_dct_4 , &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16 , &fwd_txfm_2d_cfg_adst_dct_32},
+ {&fwd_txfm_2d_cfg_dct_adst_4 , &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16 , &fwd_txfm_2d_cfg_dct_adst_32},
+ {&fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32},
+};
+#endif // CONFIG_EXT_TX
+
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size) {
+ TXFM_2D_FLIP_CFG cfg;
+ set_flip_cfg(tx_type, &cfg);
+ cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
return cfg;
}
-const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg;
switch (tx_type) {
case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_8;
- break;
- case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_8;
- break;
- case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_8;
- break;
- case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_8;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_16;
- break;
- case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_16;
- break;
- case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_16;
- break;
- case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_16;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_32;
- break;
- case ADST_DCT:
- cfg = &fwd_txfm_2d_cfg_adst_dct_32;
- break;
- case DCT_ADST:
- cfg = &fwd_txfm_2d_cfg_dct_adst_32;
- break;
- case ADST_ADST:
- cfg = &fwd_txfm_2d_cfg_adst_adst_32;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+ cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+ cfg.ud_flip = 0;
+ cfg.lr_flip = 0;
break;
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
default:
+ cfg.ud_flip = 0;
+ cfg.lr_flip = 0;
assert(0);
}
return cfg;
diff --git a/vp10/common/vp10_fwd_txfm2d_cfg.h b/vp10/common/vp10_fwd_txfm2d_cfg.h
index ed976df..e15e4ba 100644
--- a/vp10/common/vp10_fwd_txfm2d_cfg.h
+++ b/vp10/common/vp10_fwd_txfm2d_cfg.h
@@ -399,11 +399,4 @@
fwd_cos_bit_row_adst_dct_32, // .cos_bit_row
TXFM_TYPE_ADST32, // .txfm_type_col
TXFM_TYPE_DCT32}; // .txfm_type_row
-
-const TXFM_2D_CFG* vp10_get_txfm_4x4_cfg(int tx_type);
-const TXFM_2D_CFG* vp10_get_txfm_8x8_cfg(int tx_type);
-const TXFM_2D_CFG* vp10_get_txfm_16x16_cfg(int tx_type);
-const TXFM_2D_CFG* vp10_get_txfm_32x32_cfg(int tx_type);
-const TXFM_2D_CFG* vp10_get_txfm_64x64_cfg(int tx_type);
-
#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/vp10/common/vp10_inv_txfm2d.c b/vp10/common/vp10_inv_txfm2d.c
index 3ae54c9..ccf4614 100644
--- a/vp10/common/vp10_inv_txfm2d.c
+++ b/vp10/common/vp10_inv_txfm2d.c
@@ -17,51 +17,75 @@
switch (txfm_type) {
case TXFM_TYPE_DCT4:
return vp10_idct4_new;
- break;
case TXFM_TYPE_DCT8:
return vp10_idct8_new;
- break;
case TXFM_TYPE_DCT16:
return vp10_idct16_new;
- break;
case TXFM_TYPE_DCT32:
return vp10_idct32_new;
- break;
case TXFM_TYPE_DCT64:
return vp10_idct64_new;
- break;
case TXFM_TYPE_ADST4:
return vp10_iadst4_new;
- break;
case TXFM_TYPE_ADST8:
return vp10_iadst8_new;
- break;
case TXFM_TYPE_ADST16:
return vp10_iadst16_new;
- break;
case TXFM_TYPE_ADST32:
return vp10_iadst32_new;
- break;
default:
assert(0);
return NULL;
}
}
-static const TXFM_2D_CFG* vp10_get_inv_txfm_4x4_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG* inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+ {&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8,
+ &inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32},
+ {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
+ {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
+ {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
+ {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
+ {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
+ {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
+ {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
+ {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
+};
+#else
+static const TXFM_2D_CFG* inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+ {&inv_txfm_2d_cfg_dct_dct_4 , &inv_txfm_2d_cfg_dct_dct_8,
+ &inv_txfm_2d_cfg_dct_dct_16 , &inv_txfm_2d_cfg_dct_dct_32},
+ {&inv_txfm_2d_cfg_adst_dct_4 , &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16 , &inv_txfm_2d_cfg_adst_dct_32},
+ {&inv_txfm_2d_cfg_dct_adst_4 , &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16 , &inv_txfm_2d_cfg_dct_adst_32},
+ {&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32},
+};
+#endif
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) {
+ TXFM_2D_FLIP_CFG cfg;
+ set_flip_cfg(tx_type, &cfg);
+ cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
+ return cfg;
+}
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg;
switch (tx_type) {
case DCT_DCT:
- cfg = &inv_txfm_2d_cfg_dct_dct_4;
- break;
- case ADST_DCT:
- cfg = &inv_txfm_2d_cfg_adst_dct_4;
- break;
- case DCT_ADST:
- cfg = &inv_txfm_2d_cfg_dct_adst_4;
- break;
- case ADST_ADST:
- cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
+ set_flip_cfg(tx_type, &cfg);
break;
default:
assert(0);
@@ -69,95 +93,17 @@
return cfg;
}
-static const TXFM_2D_CFG* vp10_get_inv_txfm_8x8_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &inv_txfm_2d_cfg_dct_dct_8;
- break;
- case ADST_DCT:
- cfg = &inv_txfm_2d_cfg_adst_dct_8;
- break;
- case DCT_ADST:
- cfg = &inv_txfm_2d_cfg_dct_adst_8;
- break;
- case ADST_ADST:
- cfg = &inv_txfm_2d_cfg_adst_adst_8;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-static const TXFM_2D_CFG* vp10_get_inv_txfm_16x16_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &inv_txfm_2d_cfg_dct_dct_16;
- break;
- case ADST_DCT:
- cfg = &inv_txfm_2d_cfg_adst_dct_16;
- break;
- case DCT_ADST:
- cfg = &inv_txfm_2d_cfg_dct_adst_16;
- break;
- case ADST_ADST:
- cfg = &inv_txfm_2d_cfg_adst_adst_16;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-static const TXFM_2D_CFG* vp10_get_inv_txfm_32x32_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &inv_txfm_2d_cfg_dct_dct_32;
- break;
- case ADST_DCT:
- cfg = &inv_txfm_2d_cfg_adst_dct_32;
- break;
- case DCT_ADST:
- cfg = &inv_txfm_2d_cfg_dct_adst_32;
- break;
- case ADST_ADST:
- cfg = &inv_txfm_2d_cfg_adst_adst_32;
- break;
- default:
- assert(0);
- }
- return cfg;
-}
-
-static const TXFM_2D_CFG* vp10_get_inv_txfm_64x64_cfg(int tx_type) {
- const TXFM_2D_CFG* cfg = NULL;
- switch (tx_type) {
- case DCT_DCT:
- cfg = &inv_txfm_2d_cfg_dct_dct_64;
- case ADST_DCT:
- case DCT_ADST:
- case ADST_ADST:
- default:
- assert(0);
- }
- return cfg;
-}
-
-
static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
- int stride, const TXFM_2D_CFG *cfg,
+ int stride, TXFM_2D_FLIP_CFG *cfg,
int32_t *txfm_buf) {
- const int txfm_size = cfg->txfm_size;
- const int8_t *shift = cfg->shift;
- const int8_t *stage_range_col = cfg->stage_range_col;
- const int8_t *stage_range_row = cfg->stage_range_row;
- const int8_t *cos_bit_col = cfg->cos_bit_col;
- const int8_t *cos_bit_row = cfg->cos_bit_row;
- const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->txfm_type_col);
- const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->txfm_type_row);
+ const int txfm_size = cfg->cfg->txfm_size;
+ const int8_t *shift = cfg->cfg->shift;
+ const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
// txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
// it is used for intermediate data buffering
@@ -165,10 +111,10 @@
int32_t *temp_out = temp_in + txfm_size;
int32_t *buf = temp_out + txfm_size;
int32_t *buf_ptr = buf;
- int i, j;
+ int c, r;
// Rows
- for (i = 0; i < txfm_size; ++i) {
+ for (r = 0; r < txfm_size; ++r) {
txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
round_shift_array(buf_ptr, txfm_size, -shift[0]);
input += txfm_size;
@@ -176,13 +122,25 @@
}
// Columns
- for (i = 0; i < txfm_size; ++i) {
- for (j = 0; j < txfm_size; ++j)
- temp_in[j] = buf[j * txfm_size + i];
+ for (c = 0; c < txfm_size; ++c) {
+ if (cfg->lr_flip == 0) {
+ for (r = 0; r < txfm_size; ++r)
+ temp_in[r] = buf[r * txfm_size + c];
+ } else {
+ // flip left right
+ for (r = 0; r < txfm_size; ++r)
+ temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)];
+ }
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
round_shift_array(temp_out, txfm_size, -shift[1]);
- for (j = 0; j < txfm_size; ++j)
- output[j * stride + i] += temp_out[j];
+ if (cfg->ud_flip == 0) {
+ for (r = 0; r < txfm_size; ++r)
+ output[r * stride + c] += temp_out[r];
+ } else {
+ // flip upside down
+ for (r = 0; r < txfm_size; ++r)
+ output[r * stride + c] += temp_out[txfm_size - r - 1];
+ }
}
}
@@ -194,8 +152,8 @@
// than (1 << bd) - 1
// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
// int16_t*
- const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_4x4_cfg(tx_type);
- inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_4X4);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
}
@@ -207,8 +165,8 @@
// than (1 << bd) - 1
// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
// int16_t*
- const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_8x8_cfg(tx_type);
- inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_8X8);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
}
@@ -220,8 +178,8 @@
// than (1 << bd) - 1
// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
// int16_t*
- const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_16x16_cfg(tx_type);
- inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_16X16);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
}
@@ -233,8 +191,8 @@
// than (1 << bd) - 1
// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
// int16_t*
- const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_32x32_cfg(tx_type);
- inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_32X32);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
}
@@ -246,7 +204,7 @@
// than (1 << bd) - 1
// since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
// int16_t*
- const TXFM_2D_CFG* cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
- inv_txfm2d_add_c(input, (int16_t *)output, stride, cfg, txfm_buf);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
}
diff --git a/vp10/common/vp10_rtcd_defs.pl b/vp10/common/vp10_rtcd_defs.pl
index d843dfe..1e93f7d 100644
--- a/vp10/common/vp10_rtcd_defs.pl
+++ b/vp10/common/vp10_rtcd_defs.pl
@@ -627,11 +627,11 @@
#inv txfm
add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
- specialize qw/vp10_inv_txfm2d_add_4x4/;
+ specialize qw/vp10_inv_txfm2d_add_4x4 sse4_1/;
add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
- specialize qw/vp10_inv_txfm2d_add_8x8/;
+ specialize qw/vp10_inv_txfm2d_add_8x8 sse4_1/;
add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
- specialize qw/vp10_inv_txfm2d_add_16x16/;
+ specialize qw/vp10_inv_txfm2d_add_16x16 sse4_1/;
add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
specialize qw/vp10_inv_txfm2d_add_32x32/;
add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
diff --git a/vp10/common/vp10_txfm.h b/vp10/common/vp10_txfm.h
index 9944bdd..2ac8f81 100644
--- a/vp10/common/vp10_txfm.h
+++ b/vp10/common/vp10_txfm.h
@@ -7,7 +7,6 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-
#ifndef VP10_TXFM_H_
#define VP10_TXFM_H_
@@ -15,6 +14,7 @@
#include <math.h>
#include <assert.h>
+#include "vp10/common/enums.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
@@ -166,4 +166,57 @@
const TXFM_TYPE txfm_type_row;
} TXFM_2D_CFG;
+typedef struct TXFM_2D_FLIP_CFG {
+ int ud_flip; // flip upside down
+ int lr_flip; // flip left to right
+ const TXFM_2D_CFG* cfg;
+} TXFM_2D_FLIP_CFG;
+
+static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG* cfg) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 0;
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 0;
+ break;
+ case DCT_FLIPADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 1;
+ break;
+ case FLIPADST_FLIPADST:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 1;
+ break;
+ case ADST_FLIPADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 1;
+ break;
+ case FLIPADST_ADST:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 0;
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 0;
+ assert(0);
+ }
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size);
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
#endif // VP10_TXFM_H_
diff --git a/vp10/common/x86/highbd_inv_txfm_sse4.c b/vp10/common/x86/highbd_inv_txfm_sse4.c
new file mode 100644
index 0000000..9ece108
--- /dev/null
+++ b/vp10/common/x86/highbd_inv_txfm_sse4.c
@@ -0,0 +1,1245 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h> /* SSE4.1 */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "vp10/common/vp10_inv_txfm2d_cfg.h"
+#include "vp10/common/x86/highbd_txfm_utility_sse4.h"
+
+static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+ in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+ in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+ in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+}
+
+static void idct4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3, x, y;
+
+ v0 = _mm_unpacklo_epi32(in[0], in[1]);
+ v1 = _mm_unpackhi_epi32(in[0], in[1]);
+ v2 = _mm_unpacklo_epi32(in[2], in[3]);
+ v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ u0 = _mm_unpacklo_epi64(v0, v2);
+ u1 = _mm_unpackhi_epi64(v0, v2);
+ u2 = _mm_unpacklo_epi64(v1, v3);
+ u3 = _mm_unpackhi_epi64(v1, v3);
+
+ x = _mm_mullo_epi32(u0, cospi32);
+ y = _mm_mullo_epi32(u2, cospi32);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ v1 = _mm_sub_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u1, cospi48);
+ y = _mm_mullo_epi32(u3, cospim16);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u1, cospi16);
+ y = _mm_mullo_epi32(u3, cospi48);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ in[0] = _mm_add_epi32(v0, v3);
+ in[1] = _mm_add_epi32(v1, v2);
+ in[2] = _mm_sub_epi32(v1, v2);
+ in[3] = _mm_sub_epi32(v0, v3);
+}
+
+static void iadst4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3, x, y;
+
+ v0 = _mm_unpacklo_epi32(in[0], in[1]);
+ v1 = _mm_unpackhi_epi32(in[0], in[1]);
+ v2 = _mm_unpacklo_epi32(in[2], in[3]);
+ v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ u0 = _mm_unpacklo_epi64(v0, v2);
+ u1 = _mm_unpackhi_epi64(v0, v2);
+ u2 = _mm_unpacklo_epi64(v1, v3);
+ u3 = _mm_unpackhi_epi64(v1, v3);
+
+ // stage 0
+ // stage 1
+ u1 = _mm_sub_epi32(zero, u1);
+ u3 = _mm_sub_epi32(zero, u3);
+
+ // stage 2
+ v0 = u0;
+ v1 = u3;
+ x = _mm_mullo_epi32(u1, cospi32);
+ y = _mm_mullo_epi32(u2, cospi32);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ v3 = _mm_sub_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(v0, v2);
+ u1 = _mm_add_epi32(v1, v3);
+ u2 = _mm_sub_epi32(v0, v2);
+ u3 = _mm_sub_epi32(v1, v3);
+
+ // stage 4
+ x = _mm_mullo_epi32(u0, cospi8);
+ y = _mm_mullo_epi32(u1, cospi56);
+ in[3] = _mm_add_epi32(x, y);
+ in[3] = _mm_add_epi32(in[3], rnding);
+ in[3] = _mm_srai_epi32(in[3], bit);
+
+ x = _mm_mullo_epi32(u0, cospi56);
+ y = _mm_mullo_epi32(u1, cospim8);
+ in[0] = _mm_add_epi32(x, y);
+ in[0] = _mm_add_epi32(in[0], rnding);
+ in[0] = _mm_srai_epi32(in[0], bit);
+
+ x = _mm_mullo_epi32(u2, cospi40);
+ y = _mm_mullo_epi32(u3, cospi24);
+ in[1] = _mm_add_epi32(x, y);
+ in[1] = _mm_add_epi32(in[1], rnding);
+ in[1] = _mm_srai_epi32(in[1], bit);
+
+ x = _mm_mullo_epi32(u2, cospi24);
+ y = _mm_mullo_epi32(u3, cospim40);
+ in[2] = _mm_add_epi32(x, y);
+ in[2] = _mm_add_epi32(in[2], rnding);
+ in[2] = _mm_srai_epi32(in[2], bit);
+}
+
+static INLINE void round_shift_4x4(__m128i *in, int shift) {
+ __m128i rnding = _mm_set1_epi32(1 << (shift - 1));
+
+ in[0] = _mm_add_epi32(in[0], rnding);
+ in[1] = _mm_add_epi32(in[1], rnding);
+ in[2] = _mm_add_epi32(in[2], rnding);
+ in[3] = _mm_add_epi32(in[3], rnding);
+
+ in[0] = _mm_srai_epi32(in[0], shift);
+ in[1] = _mm_srai_epi32(in[1], shift);
+ in[2] = _mm_srai_epi32(in[2], shift);
+ in[3] = _mm_srai_epi32(in[3], shift);
+}
+
+static INLINE __m128i highbd_clamp_epi16(__m128i u, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+ __m128i clamped, mask;
+
+ mask = _mm_cmpgt_epi16(u, max);
+ clamped = _mm_andnot_si128(mask, u);
+ mask = _mm_and_si128(mask, max);
+ clamped = _mm_or_si128(mask, clamped);
+ mask = _mm_cmpgt_epi16(clamped, zero);
+ clamped = _mm_and_si128(clamped, mask);
+
+ return clamped;
+}
+
+static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
+ int shift, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3;
+
+ round_shift_4x4(in, shift);
+
+ v0 = _mm_loadl_epi64((__m128i const *)(output + 0 * stride));
+ v1 = _mm_loadl_epi64((__m128i const *)(output + 1 * stride));
+ v2 = _mm_loadl_epi64((__m128i const *)(output + 2 * stride));
+ v3 = _mm_loadl_epi64((__m128i const *)(output + 3 * stride));
+
+ v0 = _mm_unpacklo_epi16(v0, zero);
+ v1 = _mm_unpacklo_epi16(v1, zero);
+ v2 = _mm_unpacklo_epi16(v2, zero);
+ v3 = _mm_unpacklo_epi16(v3, zero);
+
+ u0 = _mm_add_epi32(in[0], v0);
+ u1 = _mm_add_epi32(in[1], v1);
+ u2 = _mm_add_epi32(in[2], v2);
+ u3 = _mm_add_epi32(in[3], v3);
+
+ v0 = _mm_packus_epi32(u0, u1);
+ v2 = _mm_packus_epi32(u2, u3);
+
+ u0 = highbd_clamp_epi16(v0, bd);
+ u2 = highbd_clamp_epi16(v2, bd);
+
+ v0 = _mm_unpacklo_epi64(u0, u0);
+ v1 = _mm_unpackhi_epi64(u0, u0);
+ v2 = _mm_unpacklo_epi64(u2, u2);
+ v3 = _mm_unpackhi_epi64(u2, u2);
+
+ _mm_storel_epi64((__m128i *)(output + 0 * stride), v0);
+ _mm_storel_epi64((__m128i *)(output + 1 * stride), v1);
+ _mm_storel_epi64((__m128i *)(output + 2 * stride), v2);
+ _mm_storel_epi64((__m128i *)(output + 3 * stride), v3);
+}
+
+void vp10_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[4];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_4;
+ load_buffer_4x4(coeff, in);
+ idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(coeff, in);
+ idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, -cfg->shift[1], bd);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+// 8x8
+static void load_buffer_8x8(const int32_t *coeff, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+ in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+ in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+ in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+ in[4] = _mm_load_si128((const __m128i *)(coeff + 16));
+ in[5] = _mm_load_si128((const __m128i *)(coeff + 20));
+ in[6] = _mm_load_si128((const __m128i *)(coeff + 24));
+ in[7] = _mm_load_si128((const __m128i *)(coeff + 28));
+ in[8] = _mm_load_si128((const __m128i *)(coeff + 32));
+ in[9] = _mm_load_si128((const __m128i *)(coeff + 36));
+ in[10] = _mm_load_si128((const __m128i *)(coeff + 40));
+ in[11] = _mm_load_si128((const __m128i *)(coeff + 44));
+ in[12] = _mm_load_si128((const __m128i *)(coeff + 48));
+ in[13] = _mm_load_si128((const __m128i *)(coeff + 52));
+ in[14] = _mm_load_si128((const __m128i *)(coeff + 56));
+ in[15] = _mm_load_si128((const __m128i *)(coeff + 60));
+}
+
+static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i x, y;
+ int col;
+
+ // Note:
+ // Even column: 0, 2, ..., 14
+ // Odd column: 1, 3, ..., 15
+ // one even column plus one odd column constructs one row (8 coeffs)
+ // total we have 8 rows (8x8).
+ for (col = 0; col < 2; ++col) {
+ // stage 0
+ // stage 1
+ // stage 2
+ u0 = in[0 * 2 + col];
+ u1 = in[4 * 2 + col];
+ u2 = in[2 * 2 + col];
+ u3 = in[6 * 2 + col];
+
+ x = _mm_mullo_epi32(in[1 * 2 + col], cospi56);
+ y = _mm_mullo_epi32(in[7 * 2 + col], cospim8);
+ u4 = _mm_add_epi32(x, y);
+ u4 = _mm_add_epi32(u4, rnding);
+ u4 = _mm_srai_epi32(u4, bit);
+
+ x = _mm_mullo_epi32(in[1 * 2 + col], cospi8);
+ y = _mm_mullo_epi32(in[7 * 2 + col], cospi56);
+ u7 = _mm_add_epi32(x, y);
+ u7 = _mm_add_epi32(u7, rnding);
+ u7 = _mm_srai_epi32(u7, bit);
+
+ x = _mm_mullo_epi32(in[5 * 2 + col], cospi24);
+ y = _mm_mullo_epi32(in[3 * 2 + col], cospim40);
+ u5 = _mm_add_epi32(x, y);
+ u5 = _mm_add_epi32(u5, rnding);
+ u5 = _mm_srai_epi32(u5, bit);
+
+ x = _mm_mullo_epi32(in[5 * 2 + col], cospi40);
+ y = _mm_mullo_epi32(in[3 * 2 + col], cospi24);
+ u6 = _mm_add_epi32(x, y);
+ u6 = _mm_add_epi32(u6, rnding);
+ u6 = _mm_srai_epi32(u6, bit);
+
+ // stage 3
+ x = _mm_mullo_epi32(u0, cospi32);
+ y = _mm_mullo_epi32(u1, cospi32);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ v1 = _mm_sub_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u2, cospi48);
+ y = _mm_mullo_epi32(u3, cospim16);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u2, cospi16);
+ y = _mm_mullo_epi32(u3, cospi48);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ v4 = _mm_add_epi32(u4, u5);
+ v5 = _mm_sub_epi32(u4, u5);
+ v6 = _mm_sub_epi32(u7, u6);
+ v7 = _mm_add_epi32(u6, u7);
+
+ // stage 4
+ u0 = _mm_add_epi32(v0, v3);
+ u1 = _mm_add_epi32(v1, v2);
+ u2 = _mm_sub_epi32(v1, v2);
+ u3 = _mm_sub_epi32(v0, v3);
+ u4 = v4;
+ u7 = v7;
+
+ x = _mm_mullo_epi32(v5, cospi32);
+ y = _mm_mullo_epi32(v6, cospi32);
+ u6 = _mm_add_epi32(y, x);
+ u6 = _mm_add_epi32(u6, rnding);
+ u6 = _mm_srai_epi32(u6, bit);
+
+ u5 = _mm_sub_epi32(y, x);
+ u5 = _mm_add_epi32(u5, rnding);
+ u5 = _mm_srai_epi32(u5, bit);
+
+ // stage 5
+ out[0 * 2 + col] = _mm_add_epi32(u0, u7);
+ out[1 * 2 + col] = _mm_add_epi32(u1, u6);
+ out[2 * 2 + col] = _mm_add_epi32(u2, u5);
+ out[3 * 2 + col] = _mm_add_epi32(u3, u4);
+ out[4 * 2 + col] = _mm_sub_epi32(u3, u4);
+ out[5 * 2 + col] = _mm_sub_epi32(u2, u5);
+ out[6 * 2 + col] = _mm_sub_epi32(u1, u6);
+ out[7 * 2 + col] = _mm_sub_epi32(u0, u7);
+ }
+}
+
+static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i x, y;
+ int col;
+
+ // Note:
+ // Even column: 0, 2, ..., 14
+ // Odd column: 1, 3, ..., 15
+ // one even column plus one odd column constructs one row (8 coeffs)
+ // total we have 8 rows (8x8).
+ for (col = 0; col < 2; ++col) {
+ // stage 0
+ // stage 1
+ u0 = in[2 * 0 + col];
+ u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
+ u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
+ u3 = in[2 * 4 + col];
+ u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
+ u5 = in[2 * 6 + col];
+ u6 = in[2 * 2 + col];
+ u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
+
+ // stage 2
+ v0 = u0;
+ v1 = u1;
+
+ x = _mm_mullo_epi32(u2, cospi32);
+ y = _mm_mullo_epi32(u3, cospi32);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ v3 = _mm_sub_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ v4 = u4;
+ v5 = u5;
+
+ x = _mm_mullo_epi32(u6, cospi32);
+ y = _mm_mullo_epi32(u7, cospi32);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ v7 = _mm_sub_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(v0, v2);
+ u1 = _mm_add_epi32(v1, v3);
+ u2 = _mm_sub_epi32(v0, v2);
+ u3 = _mm_sub_epi32(v1, v3);
+ u4 = _mm_add_epi32(v4, v6);
+ u5 = _mm_add_epi32(v5, v7);
+ u6 = _mm_sub_epi32(v4, v6);
+ u7 = _mm_sub_epi32(v5, v7);
+
+ // stage 4
+ v0 = u0;
+ v1 = u1;
+ v2 = u2;
+ v3 = u3;
+
+ x = _mm_mullo_epi32(u4, cospi16);
+ y = _mm_mullo_epi32(u5, cospi48);
+ v4 = _mm_add_epi32(x, y);
+ v4 = _mm_add_epi32(v4, rnding);
+ v4 = _mm_srai_epi32(v4, bit);
+
+ x = _mm_mullo_epi32(u4, cospi48);
+ y = _mm_mullo_epi32(u5, cospim16);
+ v5 = _mm_add_epi32(x, y);
+ v5 = _mm_add_epi32(v5, rnding);
+ v5 = _mm_srai_epi32(v5, bit);
+
+ x = _mm_mullo_epi32(u6, cospim48);
+ y = _mm_mullo_epi32(u7, cospi16);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ x = _mm_mullo_epi32(u6, cospi16);
+ y = _mm_mullo_epi32(u7, cospi48);
+ v7 = _mm_add_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 5
+ u0 = _mm_add_epi32(v0, v4);
+ u1 = _mm_add_epi32(v1, v5);
+ u2 = _mm_add_epi32(v2, v6);
+ u3 = _mm_add_epi32(v3, v7);
+ u4 = _mm_sub_epi32(v0, v4);
+ u5 = _mm_sub_epi32(v1, v5);
+ u6 = _mm_sub_epi32(v2, v6);
+ u7 = _mm_sub_epi32(v3, v7);
+
+ // stage 6
+ x = _mm_mullo_epi32(u0, cospi4);
+ y = _mm_mullo_epi32(u1, cospi60);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ x = _mm_mullo_epi32(u0, cospi60);
+ y = _mm_mullo_epi32(u1, cospim4);
+ v1 = _mm_add_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u2, cospi20);
+ y = _mm_mullo_epi32(u3, cospi44);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u2, cospi44);
+ y = _mm_mullo_epi32(u3, cospim20);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ x = _mm_mullo_epi32(u4, cospi36);
+ y = _mm_mullo_epi32(u5, cospi28);
+ v4 = _mm_add_epi32(x, y);
+ v4 = _mm_add_epi32(v4, rnding);
+ v4 = _mm_srai_epi32(v4, bit);
+
+ x = _mm_mullo_epi32(u4, cospi28);
+ y = _mm_mullo_epi32(u5, cospim36);
+ v5 = _mm_add_epi32(x, y);
+ v5 = _mm_add_epi32(v5, rnding);
+ v5 = _mm_srai_epi32(v5, bit);
+
+ x = _mm_mullo_epi32(u6, cospi52);
+ y = _mm_mullo_epi32(u7, cospi12);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ x = _mm_mullo_epi32(u6, cospi12);
+ y = _mm_mullo_epi32(u7, cospim52);
+ v7 = _mm_add_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 7
+ out[2 * 0 + col] = v1;
+ out[2 * 1 + col] = v6;
+ out[2 * 2 + col] = v3;
+ out[2 * 3 + col] = v4;
+ out[2 * 4 + col] = v5;
+ out[2 * 5 + col] = v2;
+ out[2 * 6 + col] = v7;
+ out[2 * 7 + col] = v0;
+ }
+}
+
+static void round_shift_8x8(__m128i *in , int shift) {
+ round_shift_4x4(&in[0], shift);
+ round_shift_4x4(&in[4], shift);
+ round_shift_4x4(&in[8], shift);
+ round_shift_4x4(&in[12], shift);
+}
+
+static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
+ int shift, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i x0, x1;
+
+ round_shift_8x8(in, shift);
+
+ v0 = _mm_load_si128((__m128i const *)(output + 0 * stride));
+ v1 = _mm_load_si128((__m128i const *)(output + 1 * stride));
+ v2 = _mm_load_si128((__m128i const *)(output + 2 * stride));
+ v3 = _mm_load_si128((__m128i const *)(output + 3 * stride));
+ v4 = _mm_load_si128((__m128i const *)(output + 4 * stride));
+ v5 = _mm_load_si128((__m128i const *)(output + 5 * stride));
+ v6 = _mm_load_si128((__m128i const *)(output + 6 * stride));
+ v7 = _mm_load_si128((__m128i const *)(output + 7 * stride));
+
+ x0 = _mm_unpacklo_epi16(v0, zero);
+ x1 = _mm_unpackhi_epi16(v0, zero);
+ x0 = _mm_add_epi32(in[0], x0);
+ x1 = _mm_add_epi32(in[1], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u0 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v1, zero);
+ x1 = _mm_unpackhi_epi16(v1, zero);
+ x0 = _mm_add_epi32(in[2], x0);
+ x1 = _mm_add_epi32(in[3], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u1 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v2, zero);
+ x1 = _mm_unpackhi_epi16(v2, zero);
+ x0 = _mm_add_epi32(in[4], x0);
+ x1 = _mm_add_epi32(in[5], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u2 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v3, zero);
+ x1 = _mm_unpackhi_epi16(v3, zero);
+ x0 = _mm_add_epi32(in[6], x0);
+ x1 = _mm_add_epi32(in[7], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u3 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v4, zero);
+ x1 = _mm_unpackhi_epi16(v4, zero);
+ x0 = _mm_add_epi32(in[8], x0);
+ x1 = _mm_add_epi32(in[9], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u4 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v5, zero);
+ x1 = _mm_unpackhi_epi16(v5, zero);
+ x0 = _mm_add_epi32(in[10], x0);
+ x1 = _mm_add_epi32(in[11], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u5 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v6, zero);
+ x1 = _mm_unpackhi_epi16(v6, zero);
+ x0 = _mm_add_epi32(in[12], x0);
+ x1 = _mm_add_epi32(in[13], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u6 = highbd_clamp_epi16(x0, bd);
+
+ x0 = _mm_unpacklo_epi16(v7, zero);
+ x1 = _mm_unpackhi_epi16(v7, zero);
+ x0 = _mm_add_epi32(in[14], x0);
+ x1 = _mm_add_epi32(in[15], x1);
+ x0 = _mm_packus_epi32(x0, x1);
+ u7 = highbd_clamp_epi16(x0, bd);
+
+ _mm_store_si128((__m128i *)(output + 0 * stride), u0);
+ _mm_store_si128((__m128i *)(output + 1 * stride), u1);
+ _mm_store_si128((__m128i *)(output + 2 * stride), u2);
+ _mm_store_si128((__m128i *)(output + 3 * stride), u3);
+ _mm_store_si128((__m128i *)(output + 4 * stride), u4);
+ _mm_store_si128((__m128i *)(output + 5 * stride), u5);
+ _mm_store_si128((__m128i *)(output + 6 * stride), u6);
+ _mm_store_si128((__m128i *)(output + 7 * stride), u7);
+}
+
+void vp10_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[16], out[16];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, -cfg->shift[1], bd);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+// 16x16
+static void load_buffer_16x16(const int32_t *coeff, __m128i *in) {
+ int i;
+ for (i = 0; i < 64; ++i) {
+ in[i] = _mm_load_si128((const __m128i *)(coeff + (i << 2)));
+ }
+}
+
+static void assign_8x8_input_from_16x16(const __m128i *in, __m128i *in8x8,
+ int col) {
+ int i;
+ for (i = 0; i < 16; i += 2) {
+ in8x8[i] = in[col];
+ in8x8[i + 1] = in[col + 1];
+ col += 4;
+ }
+}
+
+static void write_buffer_16x16(__m128i *in, uint16_t *output, int stride,
+ int shift, int bd) {
+ __m128i in8x8[16];
+
+ // Left-up quarter
+ assign_8x8_input_from_16x16(in, in8x8, 0);
+ write_buffer_8x8(in8x8, &output[0], stride, shift, bd);
+
+ // Right-up quarter
+ assign_8x8_input_from_16x16(in, in8x8, 2);
+ write_buffer_8x8(in8x8, &output[8], stride, shift, bd);
+
+ // Left-down quarter
+ assign_8x8_input_from_16x16(in, in8x8, 32);
+ write_buffer_8x8(in8x8, &output[8 * stride], stride, shift, bd);
+
+ // Right-down quarter
+ assign_8x8_input_from_16x16(in, in8x8, 34);
+ write_buffer_8x8(in8x8, &output[8 * stride + 8], stride, shift, bd);
+}
+
+static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u[16], v[16], x, y;
+ int col;
+
+ for (col = 0; col < 4; ++col) {
+ // stage 0
+ // stage 1
+ u[0] = in[0 * 4 + col];
+ u[1] = in[8 * 4 + col];
+ u[2] = in[4 * 4 + col];
+ u[3] = in[12 * 4 + col];
+ u[4] = in[2 * 4 + col];
+ u[5] = in[10 * 4 + col];
+ u[6] = in[6 * 4 + col];
+ u[7] = in[14 * 4 + col];
+ u[8] = in[1 * 4 + col];
+ u[9] = in[9 * 4 + col];
+ u[10] = in[5 * 4 + col];
+ u[11] = in[13 * 4 + col];
+ u[12] = in[3 * 4 + col];
+ u[13] = in[11 * 4 + col];
+ u[14] = in[7 * 4 + col];
+ u[15] = in[15 * 4 + col];
+
+ // stage 2
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+
+ v[8] = half_btf_sse4_1(cospi60, u[8], cospim4, u[15], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi28, u[9], cospim36, u[14], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi44, u[10], cospim20, u[13], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi12, u[11], cospim52, u[12], rnding, bit);
+ v[12] = half_btf_sse4_1(cospi52, u[11], cospi12, u[12], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi20, u[10], cospi44, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi36, u[9], cospi28, u[14], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi4, u[8], cospi60, u[15], rnding, bit);
+
+ // stage 3
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+ u[4] = half_btf_sse4_1(cospi56, v[4], cospim8, v[7], rnding, bit);
+ u[5] = half_btf_sse4_1(cospi24, v[5], cospim40, v[6], rnding, bit);
+ u[6] = half_btf_sse4_1(cospi40, v[5], cospi24, v[6], rnding, bit);
+ u[7] = half_btf_sse4_1(cospi8, v[4], cospi56, v[7], rnding, bit);
+ u[8] = _mm_add_epi32(v[8], v[9]);
+ u[9] = _mm_sub_epi32(v[8], v[9]);
+ u[10] = _mm_sub_epi32(v[11], v[10]);
+ u[11] = _mm_add_epi32(v[10], v[11]);
+ u[12] = _mm_add_epi32(v[12], v[13]);
+ u[13] = _mm_sub_epi32(v[12], v[13]);
+ u[14] = _mm_sub_epi32(v[15], v[14]);
+ u[15] = _mm_add_epi32(v[14], v[15]);
+
+ // stage 4
+ x = _mm_mullo_epi32(u[0], cospi32);
+ y = _mm_mullo_epi32(u[1], cospi32);
+ v[0] = _mm_add_epi32(x, y);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ v[0] = _mm_srai_epi32(v[0], bit);
+
+ v[1] = _mm_sub_epi32(x, y);
+ v[1] = _mm_add_epi32(v[1], rnding);
+ v[1] = _mm_srai_epi32(v[1], bit);
+
+ v[2] = half_btf_sse4_1(cospi48, u[2], cospim16, u[3], rnding, bit);
+ v[3] = half_btf_sse4_1(cospi16, u[2], cospi48, u[3], rnding, bit);
+ v[4] = _mm_add_epi32(u[4], u[5]);
+ v[5] = _mm_sub_epi32(u[4], u[5]);
+ v[6] = _mm_sub_epi32(u[7], u[6]);
+ v[7] = _mm_add_epi32(u[6], u[7]);
+ v[8] = u[8];
+ v[9] = half_btf_sse4_1(cospim16, u[9], cospi48, u[14], rnding, bit);
+ v[10] = half_btf_sse4_1(cospim48, u[10], cospim16, u[13], rnding, bit);
+ v[11] = u[11];
+ v[12] = u[12];
+ v[13] = half_btf_sse4_1(cospim16, u[10], cospi48, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi48, u[9], cospi16, u[14], rnding, bit);
+ v[15] = u[15];
+
+ // stage 5
+ u[0] = _mm_add_epi32(v[0], v[3]);
+ u[1] = _mm_add_epi32(v[1], v[2]);
+ u[2] = _mm_sub_epi32(v[1], v[2]);
+ u[3] = _mm_sub_epi32(v[0], v[3]);
+ u[4] = v[4];
+
+ x = _mm_mullo_epi32(v[5], cospi32);
+ y = _mm_mullo_epi32(v[6], cospi32);
+ u[5] = _mm_sub_epi32(y, x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_add_epi32(y, x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = v[7];
+ u[8] = _mm_add_epi32(v[8], v[11]);
+ u[9] = _mm_add_epi32(v[9], v[10]);
+ u[10] = _mm_sub_epi32(v[9], v[10]);
+ u[11] = _mm_sub_epi32(v[8], v[11]);
+ u[12] = _mm_sub_epi32(v[15], v[12]);
+ u[13] = _mm_sub_epi32(v[14], v[13]);
+ u[14] = _mm_add_epi32(v[13], v[14]);
+ u[15] = _mm_add_epi32(v[12], v[15]);
+
+ // stage 6
+ v[0] = _mm_add_epi32(u[0], u[7]);
+ v[1] = _mm_add_epi32(u[1], u[6]);
+ v[2] = _mm_add_epi32(u[2], u[5]);
+ v[3] = _mm_add_epi32(u[3], u[4]);
+ v[4] = _mm_sub_epi32(u[3], u[4]);
+ v[5] = _mm_sub_epi32(u[2], u[5]);
+ v[6] = _mm_sub_epi32(u[1], u[6]);
+ v[7] = _mm_sub_epi32(u[0], u[7]);
+ v[8] = u[8];
+ v[9] = u[9];
+
+ x = _mm_mullo_epi32(u[10], cospi32);
+ y = _mm_mullo_epi32(u[13], cospi32);
+ v[10] = _mm_sub_epi32(y, x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[13] = _mm_add_epi32(x, y);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ x = _mm_mullo_epi32(u[11], cospi32);
+ y = _mm_mullo_epi32(u[12], cospi32);
+ v[11] = _mm_sub_epi32(y, x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_add_epi32(x, y);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[14] = u[14];
+ v[15] = u[15];
+
+ // stage 7
+ out[0 * 4 + col] = _mm_add_epi32(v[0], v[15]);
+ out[1 * 4 + col] = _mm_add_epi32(v[1], v[14]);
+ out[2 * 4 + col] = _mm_add_epi32(v[2], v[13]);
+ out[3 * 4 + col] = _mm_add_epi32(v[3], v[12]);
+ out[4 * 4 + col] = _mm_add_epi32(v[4], v[11]);
+ out[5 * 4 + col] = _mm_add_epi32(v[5], v[10]);
+ out[6 * 4 + col] = _mm_add_epi32(v[6], v[9]);
+ out[7 * 4 + col] = _mm_add_epi32(v[7], v[8]);
+ out[8 * 4 + col] = _mm_sub_epi32(v[7], v[8]);
+ out[9 * 4 + col] = _mm_sub_epi32(v[6], v[9]);
+ out[10 * 4 + col] = _mm_sub_epi32(v[5], v[10]);
+ out[11 * 4 + col] = _mm_sub_epi32(v[4], v[11]);
+ out[12 * 4 + col] = _mm_sub_epi32(v[3], v[12]);
+ out[13 * 4 + col] = _mm_sub_epi32(v[2], v[13]);
+ out[14 * 4 + col] = _mm_sub_epi32(v[1], v[14]);
+ out[15 * 4 + col] = _mm_sub_epi32(v[0], v[15]);
+ }
+}
+
+static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
+ const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
+ const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
+ const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
+ const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
+ const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
+ const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
+ const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
+ const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
+ const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
+ const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
+ const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
+ const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
+ const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
+ const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
+ const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
+ const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
+ const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
+ const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
+ const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
+ const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
+ const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
+ const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
+ const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+
+ __m128i u[16], v[16], x, y;
+ int col;
+
+ for (col = 0; col < 4; ++col) {
+ // stage 0
+ // stage 1
+ u[0] = in[0 * 4 + col];
+ u[1] = _mm_sub_epi32(zero, in[15 * 4 + col]);
+ u[2] = _mm_sub_epi32(zero, in[7 * 4 + col]);
+ u[3] = in[8 * 4 + col];
+ u[4] = _mm_sub_epi32(zero, in[3 * 4 + col]);
+ u[5] = in[12 * 4 + col];
+ u[6] = in[4 * 4 + col];
+ u[7] = _mm_sub_epi32(zero, in[11 * 4 + col]);
+ u[8] = _mm_sub_epi32(zero, in[1 * 4 + col]);
+ u[9] = in[14 * 4 + col];
+ u[10] = in[6 * 4 + col];
+ u[11] = _mm_sub_epi32(zero, in[9 * 4 + col]);
+ u[12] = in[2 * 4 + col];
+ u[13] = _mm_sub_epi32(zero, in[13 * 4 + col]);
+ u[14] = _mm_sub_epi32(zero, in[5 * 4 + col]);
+ u[15] = in[10 * 4 + col];
+
+ // stage 2
+ v[0] = u[0];
+ v[1] = u[1];
+
+ x = _mm_mullo_epi32(u[2], cospi32);
+ y = _mm_mullo_epi32(u[3], cospi32);
+ v[2] = _mm_add_epi32(x, y);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_sub_epi32(x, y);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = u[4];
+ v[5] = u[5];
+
+ x = _mm_mullo_epi32(u[6], cospi32);
+ y = _mm_mullo_epi32(u[7], cospi32);
+ v[6] = _mm_add_epi32(x, y);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_sub_epi32(x, y);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+
+ x = _mm_mullo_epi32(u[10], cospi32);
+ y = _mm_mullo_epi32(u[11], cospi32);
+ v[10] = _mm_add_epi32(x, y);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_sub_epi32(x, y);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = u[12];
+ v[13] = u[13];
+
+ x = _mm_mullo_epi32(u[14], cospi32);
+ y = _mm_mullo_epi32(u[15], cospi32);
+ v[14] = _mm_add_epi32(x, y);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_sub_epi32(x, y);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 3
+ u[0] = _mm_add_epi32(v[0], v[2]);
+ u[1] = _mm_add_epi32(v[1], v[3]);
+ u[2] = _mm_sub_epi32(v[0], v[2]);
+ u[3] = _mm_sub_epi32(v[1], v[3]);
+ u[4] = _mm_add_epi32(v[4], v[6]);
+ u[5] = _mm_add_epi32(v[5], v[7]);
+ u[6] = _mm_sub_epi32(v[4], v[6]);
+ u[7] = _mm_sub_epi32(v[5], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[10]);
+ u[9] = _mm_add_epi32(v[9], v[11]);
+ u[10] = _mm_sub_epi32(v[8], v[10]);
+ u[11] = _mm_sub_epi32(v[9], v[11]);
+ u[12] = _mm_add_epi32(v[12], v[14]);
+ u[13] = _mm_add_epi32(v[13], v[15]);
+ u[14] = _mm_sub_epi32(v[12], v[14]);
+ u[15] = _mm_sub_epi32(v[13], v[15]);
+
+ // stage 4
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = half_btf_sse4_1(cospi16, u[4], cospi48, u[5], rnding, bit);
+ v[5] = half_btf_sse4_1(cospi48, u[4], cospim16, u[5], rnding, bit);
+ v[6] = half_btf_sse4_1(cospim48, u[6], cospi16, u[7], rnding, bit);
+ v[7] = half_btf_sse4_1(cospi16, u[6], cospi48, u[7], rnding, bit);
+ v[8] = u[8];
+ v[9] = u[9];
+ v[10] = u[10];
+ v[11] = u[11];
+ v[12] = half_btf_sse4_1(cospi16, u[12], cospi48, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi48, u[12], cospim16, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospim48, u[14], cospi16, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi16, u[14], cospi48, u[15], rnding, bit);
+
+ // stage 5
+ u[0] = _mm_add_epi32(v[0], v[4]);
+ u[1] = _mm_add_epi32(v[1], v[5]);
+ u[2] = _mm_add_epi32(v[2], v[6]);
+ u[3] = _mm_add_epi32(v[3], v[7]);
+ u[4] = _mm_sub_epi32(v[0], v[4]);
+ u[5] = _mm_sub_epi32(v[1], v[5]);
+ u[6] = _mm_sub_epi32(v[2], v[6]);
+ u[7] = _mm_sub_epi32(v[3], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[12]);
+ u[9] = _mm_add_epi32(v[9], v[13]);
+ u[10] = _mm_add_epi32(v[10], v[14]);
+ u[11] = _mm_add_epi32(v[11], v[15]);
+ u[12] = _mm_sub_epi32(v[8], v[12]);
+ u[13] = _mm_sub_epi32(v[9], v[13]);
+ u[14] = _mm_sub_epi32(v[10], v[14]);
+ u[15] = _mm_sub_epi32(v[11], v[15]);
+
+ // stage 6
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+ v[8] = half_btf_sse4_1(cospi8, u[8], cospi56, u[9], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi56, u[8], cospim8, u[9], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi40, u[10], cospi24, u[11], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi24, u[10], cospim40, u[11], rnding, bit);
+ v[12] = half_btf_sse4_1(cospim56, u[12], cospi8, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi8, u[12], cospi56, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospim24, u[14], cospi40, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi40, u[14], cospi24, u[15], rnding, bit);
+
+ // stage 7
+ u[0] = _mm_add_epi32(v[0], v[8]);
+ u[1] = _mm_add_epi32(v[1], v[9]);
+ u[2] = _mm_add_epi32(v[2], v[10]);
+ u[3] = _mm_add_epi32(v[3], v[11]);
+ u[4] = _mm_add_epi32(v[4], v[12]);
+ u[5] = _mm_add_epi32(v[5], v[13]);
+ u[6] = _mm_add_epi32(v[6], v[14]);
+ u[7] = _mm_add_epi32(v[7], v[15]);
+ u[8] = _mm_sub_epi32(v[0], v[8]);
+ u[9] = _mm_sub_epi32(v[1], v[9]);
+ u[10] = _mm_sub_epi32(v[2], v[10]);
+ u[11] = _mm_sub_epi32(v[3], v[11]);
+ u[12] = _mm_sub_epi32(v[4], v[12]);
+ u[13] = _mm_sub_epi32(v[5], v[13]);
+ u[14] = _mm_sub_epi32(v[6], v[14]);
+ u[15] = _mm_sub_epi32(v[7], v[15]);
+
+ // stage 8
+ v[0] = half_btf_sse4_1(cospi2, u[0], cospi62, u[1], rnding, bit);
+ v[1] = half_btf_sse4_1(cospi62, u[0], cospim2, u[1], rnding, bit);
+ v[2] = half_btf_sse4_1(cospi10, u[2], cospi54, u[3], rnding, bit);
+ v[3] = half_btf_sse4_1(cospi54, u[2], cospim10, u[3], rnding, bit);
+ v[4] = half_btf_sse4_1(cospi18, u[4], cospi46, u[5], rnding, bit);
+ v[5] = half_btf_sse4_1(cospi46, u[4], cospim18, u[5], rnding, bit);
+ v[6] = half_btf_sse4_1(cospi26, u[6], cospi38, u[7], rnding, bit);
+ v[7] = half_btf_sse4_1(cospi38, u[6], cospim26, u[7], rnding, bit);
+ v[8] = half_btf_sse4_1(cospi34, u[8], cospi30, u[9], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi30, u[8], cospim34, u[9], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi42, u[10], cospi22, u[11], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi22, u[10], cospim42, u[11], rnding, bit);
+ v[12] = half_btf_sse4_1(cospi50, u[12], cospi14, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi14, u[12], cospim50, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi58, u[14], cospi6, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi6, u[14], cospim58, u[15], rnding, bit);
+
+ // stage 9
+ out[0 * 4 + col] = v[1];
+ out[1 * 4 + col] = v[14];
+ out[2 * 4 + col] = v[3];
+ out[3 * 4 + col] = v[12];
+ out[4 * 4 + col] = v[5];
+ out[5 * 4 + col] = v[10];
+ out[6 * 4 + col] = v[7];
+ out[7 * 4 + col] = v[8];
+ out[8 * 4 + col] = v[9];
+ out[9 * 4 + col] = v[6];
+ out[10 * 4 + col] = v[11];
+ out[11 * 4 + col] = v[4];
+ out[12 * 4 + col] = v[13];
+ out[13 * 4 + col] = v[2];
+ out[14 * 4 + col] = v[15];
+ out[15 * 4 + col] = v[0];
+ }
+}
+
+static void round_shift_16x16(__m128i *in, int shift) {
+ round_shift_8x8(&in[0], shift);
+ round_shift_8x8(&in[16], shift);
+ round_shift_8x8(&in[32], shift);
+ round_shift_8x8(&in[48], shift);
+}
+
+void vp10_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[64], out[64];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, -cfg->shift[1], bd);
+ break;
+ default:
+ assert(0);
+ }
+}
diff --git a/vp10/common/x86/highbd_txfm_utility_sse4.h b/vp10/common/x86/highbd_txfm_utility_sse4.h
new file mode 100644
index 0000000..319b50a
--- /dev/null
+++ b/vp10/common/x86/highbd_txfm_utility_sse4.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef _HIGHBD_TXFM_UTILITY_SSE4_H
+#define _HIGHBD_TXFM_UTILITY_SSE4_H
+
+#include <smmintrin.h> /* SSE4.1 */
+
+#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \
+ do { \
+ __m128i u0, u1, u2, u3; \
+ u0 = _mm_unpacklo_epi32(x0, x1); \
+ u1 = _mm_unpackhi_epi32(x0, x1); \
+ u2 = _mm_unpacklo_epi32(x2, x3); \
+ u3 = _mm_unpackhi_epi32(x2, x3); \
+ y0 = _mm_unpacklo_epi64(u0, u2); \
+ y1 = _mm_unpackhi_epi64(u0, u2); \
+ y2 = _mm_unpacklo_epi64(u1, u3); \
+ y3 = _mm_unpackhi_epi64(u1, u3); \
+ } while (0)
+
+static INLINE void transpose_8x8(const __m128i *in, __m128i *out) {
+ TRANSPOSE_4X4(in[0], in[2], in[4], in[6],
+ out[0], out[2], out[4], out[6]);
+ TRANSPOSE_4X4(in[1], in[3], in[5], in[7],
+ out[8], out[10], out[12], out[14]);
+ TRANSPOSE_4X4(in[8], in[10], in[12], in[14],
+ out[1], out[3], out[5], out[7]);
+ TRANSPOSE_4X4(in[9], in[11], in[13], in[15],
+ out[9], out[11], out[13], out[15]);
+}
+
+static INLINE void transpose_16x16(const __m128i *in, __m128i *out) {
+ // Upper left 8x8
+ TRANSPOSE_4X4(in[0], in[4], in[8], in[12],
+ out[0], out[4], out[8], out[12]);
+ TRANSPOSE_4X4(in[1], in[5], in[9], in[13],
+ out[16], out[20], out[24], out[28]);
+ TRANSPOSE_4X4(in[16], in[20], in[24], in[28],
+ out[1], out[5], out[9], out[13]);
+ TRANSPOSE_4X4(in[17], in[21], in[25], in[29],
+ out[17], out[21], out[25], out[29]);
+
+ // Upper right 8x8
+ TRANSPOSE_4X4(in[2], in[6], in[10], in[14],
+ out[32], out[36], out[40], out[44]);
+ TRANSPOSE_4X4(in[3], in[7], in[11], in[15],
+ out[48], out[52], out[56], out[60]);
+ TRANSPOSE_4X4(in[18], in[22], in[26], in[30],
+ out[33], out[37], out[41], out[45]);
+ TRANSPOSE_4X4(in[19], in[23], in[27], in[31],
+ out[49], out[53], out[57], out[61]);
+
+ // Lower left 8x8
+ TRANSPOSE_4X4(in[32], in[36], in[40], in[44],
+ out[2], out[6], out[10], out[14]);
+ TRANSPOSE_4X4(in[33], in[37], in[41], in[45],
+ out[18], out[22], out[26], out[30]);
+ TRANSPOSE_4X4(in[48], in[52], in[56], in[60],
+ out[3], out[7], out[11], out[15]);
+ TRANSPOSE_4X4(in[49], in[53], in[57], in[61],
+ out[19], out[23], out[27], out[31]);
+ // Lower right 8x8
+ TRANSPOSE_4X4(in[34], in[38], in[42], in[46],
+ out[34], out[38], out[42], out[46]);
+ TRANSPOSE_4X4(in[35], in[39], in[43], in[47],
+ out[50], out[54], out[58], out[62]);
+ TRANSPOSE_4X4(in[50], in[54], in[58], in[62],
+ out[35], out[39], out[43], out[47]);
+ TRANSPOSE_4X4(in[51], in[55], in[59], in[63],
+ out[51], out[55], out[59], out[63]);
+}
+
+// Note:
+// rounding = 1 << (bit - 1)
+static INLINE __m128i half_btf_sse4_1(__m128i w0, __m128i n0,
+ __m128i w1, __m128i n1,
+ __m128i rounding, int bit) {
+ __m128i x, y;
+
+ x = _mm_mullo_epi32(w0, n0);
+ y = _mm_mullo_epi32(w1, n1);
+ x = _mm_add_epi32(x, y);
+ x = _mm_add_epi32(x, rounding);
+ x = _mm_srai_epi32(x, bit);
+ return x;
+}
+
+#endif // _HIGHBD_TXFM_UTILITY_SSE4_H
diff --git a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
index 499e58d..1d70f14 100644
--- a/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
+++ b/vp10/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -8,7 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+#include "vp10/common/enums.h"
+#include "vp10/common/vp10_txfm.h"
#include "vp10/common/x86/vp10_txfm1d_sse4.h"
static INLINE void int16_array_with_stride_to_int32_array_without_stride(
@@ -91,16 +92,16 @@
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[1024];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_32x32_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
(void)bd;
- fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
}
void vp10_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
const int stride, int tx_type,
const int bd) {
int32_t txfm_buf[4096];
- const TXFM_2D_CFG* cfg = vp10_get_txfm_64x64_cfg(tx_type);
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
(void)bd;
- fwd_txfm2d_sse4_1(input, output, stride, cfg, txfm_buf);
+ fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
}
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index d3d9780..e3dadaf 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -57,6 +57,14 @@
}
static void setup_compound_reference_mode(VP10_COMMON *cm) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cm->comp_fwd_ref[0] = LAST_FRAME;
+ cm->comp_fwd_ref[1] = GOLDEN_FRAME;
+ cm->comp_bwd_ref[0] = BWDREF_FRAME;
+ cm->comp_bwd_ref[1] = ALTREF_FRAME;
+
+#else // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (cm->ref_frame_sign_bias[LAST_FRAME] ==
cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
cm->comp_fixed_ref = ALTREF_FRAME;
@@ -66,7 +74,7 @@
cm->comp_var_ref[2] = LAST3_FRAME;
cm->comp_var_ref[3] = LAST4_FRAME;
cm->comp_var_ref[4] = GOLDEN_FRAME;
-#else
+#else // CONFIG_EXT_REFS
cm->comp_var_ref[1] = GOLDEN_FRAME;
#endif // CONFIG_EXT_REFS
} else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
@@ -85,6 +93,7 @@
cm->comp_var_ref[0] = GOLDEN_FRAME;
cm->comp_var_ref[1] = ALTREF_FRAME;
}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
@@ -171,9 +180,15 @@
if (cm->reference_mode != SINGLE_REFERENCE) {
for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < (COMP_REFS - 1); ++j) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (j = 0; j < (FWD_REFS - 1); ++j)
vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
- }
+ for (j = 0; j < (BWD_REFS - 1); ++j)
+ vp10_diff_update_prob(r, &fc->comp_bwdref_prob[i][j]);
+#else
+ for (j = 0; j < (COMP_REFS - 1); ++j)
+ vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
}
}
@@ -380,572 +395,6 @@
}
#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
-#if CONFIG_SUPERTX
-static void build_mc_border(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int x, int y, int b_w, int b_h, int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint8_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w)
- left = b_w;
-
- if (x + b_w > w)
- right = x + b_w - w;
-
- if (right > b_w)
- right = b_w;
-
- copy = b_w - left - right;
-
- if (left)
- memset(dst, ref_row[0], left);
-
- if (copy)
- memcpy(dst + left, ref_row + x + left, copy);
-
- if (right)
- memset(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h)
- ref_row += src_stride;
- } while (--b_h);
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void build_mc_border_highbd(const uint8_t *src8, int src_stride,
- uint16_t *dst, int dst_stride,
- int x, int y, int b_w, int b_h,
- int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w)
- left = b_w;
-
- if (x + b_w > w)
- right = x + b_w - w;
-
- if (right > b_w)
- right = b_w;
-
- copy = b_w - left - right;
-
- if (left)
- vpx_memset16(dst, ref_row[0], left);
-
- if (copy)
- memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
-
- if (right)
- vpx_memset16(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h)
- ref_row += src_stride;
- } while (--b_h);
-}
-
-static void extend_and_predict_highbd(const uint8_t *buf_ptr1,
- int pre_buf_stride,
- int x0, int y0, int b_w, int b_h,
- int frame_width, int frame_height,
- int border_offset,
- uint8_t *const dst, int dst_buf_stride,
- int subpel_x, int subpel_y,
-#if CONFIG_DUAL_FILTER
- const INTERP_FILTER *interp_filter,
-#else
- const INTERP_FILTER interp_filter,
-#endif
- const struct scale_factors *sf,
-#if CONFIG_EXT_INTER
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- MACROBLOCKD *xd,
- int w, int h, int ref, int xs, int ys) {
- DECLARE_ALIGNED(16, uint16_t,
- mc_buf_high[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
- const uint8_t *buf_ptr;
-
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- build_mc_border_highbd(buf_ptr1, pre_buf_stride, mc_buf_high, b_w,
- x0, y0, b_w, b_h, frame_width, frame_height);
- buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset;
- } else {
- build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w,
- x0, y0, b_w, b_h, frame_width, frame_height);
- buf_ptr = ((uint8_t *)mc_buf_high) + border_offset;
- }
-#if CONFIG_EXT_INTER
- if (ref && is_interinter_wedge_used(xd->mi[0]->mbmi.sb_type) &&
- xd->mi[0]->mbmi.use_wedge_interinter)
- vp10_make_masked_inter_predictor(
- buf_ptr, b_w, dst, dst_buf_stride,
- subpel_x, subpel_y, sf, w, h,
- interp_filter, xs, ys,
- wedge_offset_x, wedge_offset_y,
- xd);
- else
-#endif // CONFIG_EXT_INTER
- vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride,
- subpel_x, subpel_y, sf, w, h, ref,
- interp_filter, xs, ys, xd);
-}
-
-#else
-
-static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
- int x0, int y0, int b_w, int b_h,
- int frame_width, int frame_height,
- int border_offset,
- uint8_t *const dst, int dst_buf_stride,
- int subpel_x, int subpel_y,
-#if CONFIG_DUAL_FILTER
- const INTERP_FILTER *interp_filter,
-#else
- const INTERP_FILTER interp_filter,
-#endif
- const struct scale_factors *sf,
-#if CONFIG_EXT_INTER
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- MACROBLOCKD *xd,
- int w, int h, int ref, int xs, int ys) {
- DECLARE_ALIGNED(16, uint8_t,
- mc_buf[(MAX_SB_SIZE + 16) * 2 * (MAX_SB_SIZE + 16) * 2]);
- const uint8_t *buf_ptr;
-
- build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
- x0, y0, b_w, b_h, frame_width, frame_height);
- buf_ptr = mc_buf + border_offset;
-#if CONFIG_EXT_INTER
- if (ref && is_interinter_wedge_used(xd->mi[0]->mbmi.sb_type) &&
- xd->mi[0]->mbmi.use_wedge_interinter)
- vp10_make_masked_inter_predictor(
- buf_ptr, b_w, dst, dst_buf_stride,
- subpel_x, subpel_y, sf, w, h,
- interp_filter, xs, ys,
- wedge_offset_x, wedge_offset_y,
- xd);
- else
-#endif // CONFIG_EXT_INTER
- vp10_make_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride,
- subpel_x, subpel_y, sf, w, h, ref,
- interp_filter, xs, ys, xd);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-static void dec_build_inter_predictors(VP10Decoder *const pbi,
- MACROBLOCKD *xd, int plane,
-#if CONFIG_OBMC
- int mi_col_offset, int mi_row_offset,
-#endif // CONFIG_OBMC
- int bw, int bh,
- int x, int y, int w, int h,
-#if CONFIG_EXT_INTER
- int wedge_offset_x, int wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- int mi_x, int mi_y,
-#if CONFIG_DUAL_FILTER
- const INTERP_FILTER *interp_filter,
-#else
- const INTERP_FILTER interp_filter,
-#endif
- const struct scale_factors *sf,
- struct buf_2d *pre_buf,
- struct buf_2d *dst_buf, const MV* mv,
- RefCntBuffer *ref_frame_buf,
- int is_scaled, int ref) {
- VP10_COMMON *const cm = &pbi->common;
- struct macroblockd_plane *const pd = &xd->plane[plane];
- uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- MV32 scaled_mv;
- MV mv_q4;
- int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
- buf_stride, subpel_x, subpel_y;
- uint8_t *ref_frame, *buf_ptr;
-#if CONFIG_EXT_INTER
-#if CONFIG_OBMC
- const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
-#else
- const MODE_INFO *mi = xd->mi[0];
-#endif // CONFIG_OBMC
-#endif // CONFIG_EXT_INTER
-#if CONFIG_EXT_INTERP
- const int i_filter = IsInterpolatingFilter(interp_filter);
-#endif // CONFIG_EXT_INTERP
-#if CONFIG_OBMC
- (void) mi_col_offset;
- (void) mi_row_offset;
-#endif // CONFIG_OBMC
-
- // Get reference frame pointer, width and height.
- if (plane == 0) {
- frame_width = ref_frame_buf->buf.y_crop_width;
- frame_height = ref_frame_buf->buf.y_crop_height;
- ref_frame = ref_frame_buf->buf.y_buffer;
- } else {
- frame_width = ref_frame_buf->buf.uv_crop_width;
- frame_height = ref_frame_buf->buf.uv_crop_height;
- ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
- : ref_frame_buf->buf.v_buffer;
- }
-
- mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
- pd->subsampling_x,
- pd->subsampling_y);
- if (is_scaled) {
- // Co-ordinate of containing block to pixel precision.
- int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
- int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
- // Co-ordinate of the block to 1/16th pixel precision.
- x0_16 = (x_start + x) << SUBPEL_BITS;
- y0_16 = (y_start + y) << SUBPEL_BITS;
-
- // Co-ordinate of current block in reference frame
- // to 1/16th pixel precision.
- x0_16 = sf->scale_value_x(x0_16, sf);
- y0_16 = sf->scale_value_y(y0_16, sf);
-
- // Map the top left corner of the block into the reference frame.
- x0 = sf->scale_value_x(x_start + x, sf);
- y0 = sf->scale_value_y(y_start + y, sf);
-
- // Scale the MV and incorporate the sub-pixel offset of the block
- // in the reference frame.
- scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
- xs = sf->x_step_q4;
- ys = sf->y_step_q4;
- } else {
- // Co-ordinate of containing block to pixel precision.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
- // Co-ordinate of the block to 1/16th pixel precision.
- x0_16 = x0 << SUBPEL_BITS;
- y0_16 = y0 << SUBPEL_BITS;
-
- scaled_mv.row = mv_q4.row;
- scaled_mv.col = mv_q4.col;
- xs = ys = 16;
- }
- subpel_x = scaled_mv.col & SUBPEL_MASK;
- subpel_y = scaled_mv.row & SUBPEL_MASK;
-
- // Calculate the top left corner of the best matching block in the
- // reference frame.
- x0 += scaled_mv.col >> SUBPEL_BITS;
- y0 += scaled_mv.row >> SUBPEL_BITS;
- x0_16 += scaled_mv.col;
- y0_16 += scaled_mv.row;
-
- // Get reference block pointer.
- buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
- buf_stride = pre_buf->stride;
-
- // Do border extension if there is motion or the
- // width/height is not a multiple of 8 pixels.
- if (is_scaled || scaled_mv.col || scaled_mv.row ||
-#if CONFIG_EXT_INTERP
- !i_filter ||
-#endif
- (frame_width & 0x7) || (frame_height & 0x7)) {
- int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
-
- // Get reference block bottom right horizontal coordinate.
- int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1;
- int x_pad = 0, y_pad = 0;
-
-#if CONFIG_DUAL_FILTER
- InterpFilterParams filter_params_y =
- vp10_get_interp_filter_params(interp_filter[0]);
- InterpFilterParams filter_params_x =
- vp10_get_interp_filter_params(interp_filter[1]);
- int filter_size = VPXMAX(filter_params_y.taps, filter_params_x.taps);
-#else
- InterpFilterParams filter_params =
- vp10_get_interp_filter_params(interp_filter);
- int filter_size = filter_params.taps;
-#endif
-
- if (subpel_x ||
-#if CONFIG_EXT_INTERP
- !i_filter ||
-#endif
- (sf->x_step_q4 != SUBPEL_SHIFTS)) {
- x0 -= filter_size / 2 - 1;
- x1 += filter_size / 2;
- x_pad = 1;
- }
-
- if (subpel_y ||
-#if CONFIG_EXT_INTERP
- !i_filter ||
-#endif
- (sf->y_step_q4 != SUBPEL_SHIFTS)) {
- y0 -= filter_size / 2 - 1;
- y1 += filter_size / 2;
- y_pad = 1;
- }
-
- // Wait until reference block is ready. Pad 7 more pixels as last 7
- // pixels of each superblock row can be changed by next superblock row.
- if (cm->frame_parallel_decode)
- vp10_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
-
- // Skip border extension if block is inside the frame.
- if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
- y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
- // Extend the border.
- const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0;
- const int b_w = x1 - x0 + 1;
- const int b_h = y1 - y0 + 1;
- const int border_offset = y_pad * (filter_size / 2 - 1) * b_w +
- x_pad * (filter_size / 2 - 1);
-
-#if CONFIG_VP9_HIGHBITDEPTH
- extend_and_predict_highbd(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
- frame_width, frame_height, border_offset,
- dst, dst_buf->stride,
- subpel_x, subpel_y,
- interp_filter, sf,
-#if CONFIG_EXT_INTER
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- xd, w, h, ref, xs, ys);
-#else
- extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
- frame_width, frame_height, border_offset,
- dst, dst_buf->stride,
- subpel_x, subpel_y,
- interp_filter, sf,
-#if CONFIG_EXT_INTER
- wedge_offset_x, wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- xd, w, h, ref, xs, ys);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- return;
- }
- } else {
- // Wait until reference block is ready. Pad 7 more pixels as last 7
- // pixels of each superblock row can be changed by next superblock row.
- if (cm->frame_parallel_decode) {
- const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
- vp10_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
- }
- }
-#if CONFIG_EXT_INTER
- if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
- mi->mbmi.use_wedge_interinter)
- vp10_make_masked_inter_predictor(
- buf_ptr, buf_stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h,
- interp_filter, xs, ys,
- wedge_offset_x, wedge_offset_y,
- xd);
- else
-#endif // CONFIG_EXT_INTER
- vp10_make_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride,
- subpel_x, subpel_y, sf, w, h, ref,
- interp_filter, xs, ys, xd);
-}
-
-static void dec_build_inter_predictors_sb_extend(
- VP10Decoder *const pbi, MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
- int mi_row_ori, int mi_col_ori,
-#endif // CONFIG_EXT_INTER
- int mi_row, int mi_col) {
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-#if CONFIG_EXT_INTER
- const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
- const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
-#endif // CONFIG_EXT_INTER
- const MODE_INFO *mi = xd->mi[0];
- const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
- const int is_compound = has_second_ref(&mi->mbmi);
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
-
- struct buf_2d *const dst_buf = &pd->dst;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
-
- const int n4w_x4 = 4 * num_4x4_w;
- const int n4h_x4 = 4 * num_4x4_h;
- int ref;
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp10_is_scaled(sf);
-
- if (sb_type < BLOCK_8X8) {
- const PARTITION_TYPE bp = BLOCK_8X8 - sb_type;
- const int have_vsplit = bp != PARTITION_HORZ;
- const int have_hsplit = bp != PARTITION_VERT;
- const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
- const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
- const int pw = 8 >> (have_vsplit | pd->subsampling_x);
- const int ph = 8 >> (have_hsplit | pd->subsampling_y);
- int x, y;
- for (y = 0; y < num_4x4_h; ++y) {
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
- dec_build_inter_predictors(
- pbi, xd, plane,
-#if CONFIG_OBMC
- 0, 0,
-#endif // CONFIG_OBMC
- n4w_x4, n4h_x4,
- 4 * x, 4 * y, pw, ph,
-#if CONFIG_EXT_INTER
- wedge_offset_x,
- wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- mi_x, mi_y,
- mi->mbmi.interp_filter, sf, pre_buf, dst_buf,
- &mv, ref_frame_buf, is_scaled, ref);
- }
- }
- } else {
- const MV mv = mi->mbmi.mv[ref].as_mv;
- dec_build_inter_predictors(
- pbi, xd, plane,
-#if CONFIG_OBMC
- 0, 0,
-#endif // CONFIG_OBMC
- n4w_x4, n4h_x4,
- 0, 0, n4w_x4, n4h_x4,
-#if CONFIG_EXT_INTER
- wedge_offset_x,
- wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- mi_x, mi_y,
- mi->mbmi.interp_filter, sf, pre_buf, dst_buf,
- &mv, ref_frame_buf,
- is_scaled, ref);
- }
- }
- }
-#if CONFIG_EXT_INTER
- if (is_interintra_pred(&mi->mbmi))
- vp10_build_interintra_predictors(xd,
- xd->plane[0].dst.buf,
- xd->plane[1].dst.buf,
- xd->plane[2].dst.buf,
- xd->plane[0].dst.stride,
- xd->plane[1].dst.stride,
- xd->plane[2].dst.stride,
- sb_type);
-#endif // CONFIG_EXT_INTER
-}
-
-static void dec_build_inter_predictors_sb_sub8x8_extend(
- VP10Decoder *const pbi,
- MACROBLOCKD *xd,
-#if CONFIG_EXT_INTER
- int mi_row_ori, int mi_col_ori,
-#endif // CONFIG_EXT_INTER
- int mi_row, int mi_col,
- int block) {
- // Prediction function used in supertx:
- // Use the mv at current block (which is less than 8x8)
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
-#if CONFIG_EXT_INTER
- const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
- const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
-#endif // CONFIG_EXT_INTER
- const MODE_INFO *mi = xd->mi[0];
- const int is_compound = has_second_ref(&mi->mbmi);
-
- // For sub8x8 uv:
- // Skip uv prediction in supertx except the first block (block = 0)
- int max_plane = block ? 1 : MAX_MB_PLANE;
-
- for (plane = 0; plane < max_plane; ++plane) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- struct buf_2d *const dst_buf = &pd->dst;
- const int num_4x4_w = pd->n4_w;
- const int num_4x4_h = pd->n4_h;
-
- const int n4w_x4 = 4 * num_4x4_w;
- const int n4h_x4 = 4 * num_4x4_h;
- int ref;
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp10_is_scaled(sf);
- const MV mv = average_split_mvs(pd, mi, ref, block);
- dec_build_inter_predictors(pbi, xd, plane,
-#if CONFIG_OBMC
- 0, 0,
-#endif // CONFIG_OBMC
- n4w_x4, n4h_x4,
- 0, 0, n4w_x4, n4h_x4,
-#if CONFIG_EXT_INTER
- wedge_offset_x,
- wedge_offset_y,
-#endif // CONFIG_EXT_INTER
- mi_x, mi_y,
- mi->mbmi.interp_filter, sf, pre_buf, dst_buf,
- &mv, ref_frame_buf, is_scaled, ref);
- }
- }
-#if CONFIG_EXT_INTER
- if (is_interintra_pred(&mi->mbmi))
- vp10_build_interintra_predictors(xd,
- xd->plane[0].dst.buf,
- xd->plane[1].dst.buf,
- xd->plane[2].dst.buf,
- xd->plane[0].dst.stride,
- xd->plane[1].dst.stride,
- xd->plane[2].dst.stride,
- mi->mbmi.sb_type);
-#endif // CONFIG_EXT_INTER
-}
-#endif // CONFIG_SUPERTX
-
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
@@ -1171,19 +620,19 @@
(c >> xd->plane[2].subsampling_x);
if (!b_sub8x8)
- dec_build_inter_predictors_sb_extend(
- pbi, xd,
+ vp10_build_inter_predictors_sb_extend(
+ xd,
#if CONFIG_EXT_INTER
mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
- mi_row_pred, mi_col_pred);
+ mi_row_pred, mi_col_pred, bsize_pred);
else
- dec_build_inter_predictors_sb_sub8x8_extend(
- pbi, xd,
+ vp10_build_inter_predictors_sb_sub8x8_extend(
+ xd,
#if CONFIG_EXT_INTER
mi_row_ori, mi_col_ori,
#endif // CONFIG_EXT_INTER
- mi_row_pred, mi_col_pred, block);
+ mi_row_pred, mi_col_pred, bsize_pred, block);
}
static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd,
@@ -3615,6 +3064,11 @@
cm->last_frame_type = cm->frame_type;
cm->last_intra_only = cm->intra_only;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE: By default all coded frames to be used as a reference
+ cm->is_reference_frame = 1;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
"Invalid frame marker");
@@ -3631,9 +3085,11 @@
#endif
cm->show_existing_frame = vpx_rb_read_bit(rb);
+
if (cm->show_existing_frame) {
// Show an existing frame directly.
const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)];
+
lock_buffer_pool(pool);
if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
unlock_buffer_pool(pool);
@@ -3641,17 +3097,72 @@
"Buffer %d does not contain a decoded frame",
frame_to_show);
}
-
ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
unlock_buffer_pool(pool);
- pbi->refresh_frame_flags = 0;
+
cm->lf.filter_level = 0;
cm->show_frame = 1;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE(zoeliu): The existing frame to show is adopted as a reference frame.
+ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
+
+ for (i = 0; i < REFS_PER_FRAME; ++i) {
+ const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2);
+ const int idx = cm->ref_frame_map[ref];
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ ref_frame->idx = idx;
+ ref_frame->buf = &frame_bufs[idx].buf;
+ cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb);
+ }
+
+ for (i = 0; i < REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_buf = &cm->frame_refs[i];
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(&ref_buf->sf,
+ ref_buf->buf->y_crop_width,
+ ref_buf->buf->y_crop_height,
+ cm->width, cm->height,
+ cm->use_highbitdepth);
+#else // CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(&ref_buf->sf,
+ ref_buf->buf->y_crop_width,
+ ref_buf->buf->y_crop_height,
+ cm->width, cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ // Generate next_ref_frame_map.
+ lock_buffer_pool(pool);
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ if (mask & 1) {
+ cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
+ ++frame_bufs[cm->new_fb_idx].ref_count;
+ } else {
+ cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
+ }
+ // Current thread holds the reference frame.
+ if (cm->ref_frame_map[ref_index] >= 0)
+ ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
+ ++ref_index;
+ }
+
+ for (; ref_index < REF_FRAMES; ++ref_index) {
+ cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
+ // Current thread holds the reference frame.
+ if (cm->ref_frame_map[ref_index] >= 0)
+ ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
+ }
+ unlock_buffer_pool(pool);
+ pbi->hold_ref_buf = 1;
+#else
+ pbi->refresh_frame_flags = 0;
if (cm->frame_parallel_decode) {
for (i = 0; i < REF_FRAMES; ++i)
cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
return 0;
}
@@ -3715,6 +3226,15 @@
}
} else if (pbi->need_resync != 1) { /* Skip if need resync */
pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (!pbi->refresh_frame_flags) {
+ // NOTE: "pbi->refresh_frame_flags == 0" indicates that the coded frame
+ // will not be used as a reference
+ cm->is_reference_frame = 0;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
for (i = 0; i < REFS_PER_FRAME; ++i) {
const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2);
const int idx = cm->ref_frame_map[ref];
@@ -4036,6 +3556,7 @@
if (cm->reference_mode != SINGLE_REFERENCE)
setup_compound_reference_mode(cm);
+
read_frame_reference_mode_probs(cm, &r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
@@ -4107,6 +3628,10 @@
sizeof(cm->counts.comp_ref)));
assert(!memcmp(&cm->counts.tx_size, &zero_counts.tx_size,
sizeof(cm->counts.tx_size)));
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ assert(!memcmp(cm->counts.comp_bwdref, zero_counts.comp_bwdref,
+ sizeof(cm->counts.comp_bwdref)));
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
#if CONFIG_REF_MV
assert(!memcmp(&cm->counts.mv[0], &zero_counts.mv[0],
@@ -4181,7 +3706,13 @@
if (!first_partition_size) {
// showing a frame directly
- *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cm->show_existing_frame)
+ *p_data_end = data + vpx_rb_bytes_read(&rb);
+ else
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2);
+
return;
}
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index fd14ef5..a25fe7a 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -759,11 +759,28 @@
const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
// FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
if (mode == COMPOUND_REFERENCE) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ const int idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+#else
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
const int ctx = vp10_get_pred_context_comp_ref_p(cm, xd);
const int bit = vp10_read(r, fc->comp_ref_prob[ctx][0]);
if (counts)
++counts->comp_ref[ctx][0][bit];
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ ref_frame[!idx] = cm->comp_fwd_ref[bit];
+ {
+ const int ctx1 = vp10_get_pred_context_comp_bwdref_p(cm, xd);
+ const int bit1 = vpx_read(r, fc->comp_bwdref_prob[ctx1][0]);
+ if (counts)
+ ++counts->comp_bwdref[ctx1][0][bit1];
+ ref_frame[idx] = cm->comp_bwd_ref[bit1];
+ }
+
+#else // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
ref_frame[idx] = cm->comp_fixed_ref;
#if CONFIG_EXT_REFS
@@ -788,9 +805,10 @@
ref_frame[!idx] = cm->comp_var_ref[4];
}
}
-#else
+#else // CONFIG_EXT_REFS
ref_frame[!idx] = cm->comp_var_ref[bit];
#endif // CONFIG_EXT_REFS
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
} else if (mode == SINGLE_REFERENCE) {
#if CONFIG_EXT_REFS
const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
@@ -822,7 +840,7 @@
ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME;
}
}
-#else
+#else // CONFIG_EXT_REFS
const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
const int bit0 = vp10_read(r, fc->single_ref_prob[ctx0][0]);
if (counts)
@@ -832,7 +850,19 @@
const int bit1 = vp10_read(r, fc->single_ref_prob[ctx1][1]);
if (counts)
++counts->single_ref[ctx1][1][bit1];
+#if CONFIG_BIDIR_PRED
+ if (bit1) {
+ const int ctx2 = vp10_get_pred_context_single_ref_p3(xd);
+ const int bit2 = vpx_read(r, fc->single_ref_prob[ctx2][2]);
+ if (counts)
+ ++counts->single_ref[ctx2][2][bit2];
+ ref_frame[0] = bit2 ? ALTREF_FRAME : BWDREF_FRAME;
+ } else {
+ ref_frame[0] = GOLDEN_FRAME;
+ }
+#else // CONFIG_BIDIR_PRED
ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
+#endif // CONFIG_BIDIR_PRED
} else {
ref_frame[0] = LAST_FRAME;
}
@@ -1577,7 +1607,7 @@
xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
if (mbmi->use_wedge_interintra) {
mbmi->interintra_wedge_index =
- vp10_read_literal(r, get_wedge_bits_lookup[bsize]);
+ vp10_read_literal(r, get_wedge_bits_lookup(bsize));
mbmi->interintra_wedge_sign = 0;
}
}
@@ -1610,22 +1640,27 @@
xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
if (mbmi->use_wedge_interinter) {
mbmi->interinter_wedge_index =
- vp10_read_literal(r, get_wedge_bits_lookup[bsize]);
+ vp10_read_literal(r, get_wedge_bits_lookup(bsize));
mbmi->interinter_wedge_sign = vp10_read_bit(r);
}
}
#endif // CONFIG_EXT_INTER
#if CONFIG_DUAL_FILTER
- for (ref = 0; ref < 4; ++ref) {
- const int frame_idx = (ref >> 1);
+ for (ref = 0; ref < 2; ++ref) {
mbmi->interp_filter[ref] = (cm->interp_filter == SWITCHABLE) ?
EIGHTTAP_REGULAR : cm->interp_filter;
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME &&
- has_subpel_mv_component(xd, ref))
+ if (has_subpel_mv_component(xd->mi[0], xd, ref) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, ref + 2)))
mbmi->interp_filter[ref] = read_interp_filter(cm, xd, ref, r);
}
+ // The index system worsk as:
+ // (0, 1) -> (vertical, horizontal) filter types for the first ref frame.
+ // (2, 3) -> (vertical, horizontal) filter types for the second ref frame.
+ mbmi->interp_filter[2] = mbmi->interp_filter[0];
+ mbmi->interp_filter[3] = mbmi->interp_filter[1];
#else
#if CONFIG_EXT_INTERP
mbmi->interp_filter = read_interp_filter(cm, xd, r);
diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c
index 6d567d6..573266e 100644
--- a/vp10/decoder/decoder.c
+++ b/vp10/decoder/decoder.c
@@ -214,6 +214,9 @@
// #else // CONFIG_EXT_REFS
// cpi->gld_fb_idx = 1;
// cpi->alt_fb_idx = 2;
+
+ // TODO(zoeliu): To revisit following code and reconsider what assumption we
+ // may take on the reference frame buffer virtual indexes
if (ref_frame_flag == VP9_LAST_FLAG) {
idx = cm->ref_frame_map[0];
#if CONFIG_EXT_REFS
@@ -227,11 +230,18 @@
idx = cm->ref_frame_map[4];
} else if (ref_frame_flag == VP9_ALT_FLAG) {
idx = cm->ref_frame_map[5];
-#else
+#else // CONFIG_EXT_REFS
} else if (ref_frame_flag == VP9_GOLD_FLAG) {
idx = cm->ref_frame_map[1];
+#if CONFIG_BIDIR_PRED
+ } else if (ref_frame_flag == VP9_BWD_FLAG) {
+ idx = cm->ref_frame_map[2];
+ } else if (ref_frame_flag == VP9_ALT_FLAG) {
+ idx = cm->ref_frame_map[3];
+#else // CONFIG_BIDIR_PRED
} else if (ref_frame_flag == VP9_ALT_FLAG) {
idx = cm->ref_frame_map[2];
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} else {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
@@ -281,15 +291,25 @@
}
// Current thread releases the holding of reference frame.
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (; ref_index < REF_FRAMES; ++ref_index) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
+ }
+#else
for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
const int old_idx = cm->ref_frame_map[ref_index];
decrease_ref_count(old_idx, frame_bufs, pool);
cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
unlock_buffer_pool(pool);
pbi->hold_ref_buf = 0;
cm->frame_to_show = get_frame_new_buffer(cm);
+ // TODO(zoeliu): To fix the ref frame buffer update for the scenario of
+ // cm->frame_parellel_decode == 1
if (!cm->frame_parallel_decode || !cm->show_frame) {
lock_buffer_pool(pool);
--frame_bufs[cm->new_fb_idx].ref_count;
@@ -297,8 +317,10 @@
}
// Invalidate these references until the next frame starts.
- for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++)
- cm->frame_refs[ref_index].idx = -1;
+ for (ref_index = 0; ref_index < REFS_PER_FRAME; ref_index++) {
+ cm->frame_refs[ref_index].idx = INVALID_IDX;
+ cm->frame_refs[ref_index].buf = NULL;
+ }
}
int vp10_receive_compressed_data(VP10Decoder *pbi,
@@ -327,12 +349,16 @@
pbi->ready_for_new_data = 0;
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
+
// Check if the previous frame was a frame without any references to it.
// Release frame buffer if not decoding in frame parallel mode.
if (!cm->frame_parallel_decode && cm->new_fb_idx >= 0
&& frame_bufs[cm->new_fb_idx].ref_count == 0)
pool->release_fb_cb(pool->cb_priv,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer);
+
// Find a free frame buffer. Return error if can not find any.
cm->new_fb_idx = get_free_fb(cm);
if (cm->new_fb_idx == INVALID_IDX)
@@ -386,10 +412,17 @@
}
// Current thread releases the holding of reference frame.
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (; ref_index < REF_FRAMES; ++ref_index) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ }
+#else
for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
const int old_idx = cm->ref_frame_map[ref_index];
decrease_ref_count(old_idx, frame_bufs, pool);
}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
pbi->hold_ref_buf = 0;
}
// Release current frame.
@@ -417,7 +450,13 @@
if (!cm->show_existing_frame) {
cm->last_show_frame = cm->show_frame;
- cm->prev_frame = cm->cur_frame;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE: It is not supposed to ref to any frame not used as reference
+ if (cm->is_reference_frame)
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cm->prev_frame = cm->cur_frame;
+
if (cm->seg.enabled && !cm->frame_parallel_decode)
vp10_swap_current_and_last_seg_map(cm);
}
@@ -483,6 +522,17 @@
return ret;
}
+int vp10_get_frame_to_show(VP10Decoder *pbi,
+ YV12_BUFFER_CONFIG *frame) {
+ VP10_COMMON *const cm = &pbi->common;
+
+ if (!cm->show_frame || !cm->frame_to_show)
+ return -1;
+
+ *frame = *cm->frame_to_show;
+ return 0;
+}
+
vpx_codec_err_t vp10_parse_superframe_index(const uint8_t *data,
size_t data_sz,
uint32_t sizes[8], int *count,
diff --git a/vp10/decoder/decoder.h b/vp10/decoder/decoder.h
index 0839e46..b34b009 100644
--- a/vp10/decoder/decoder.h
+++ b/vp10/decoder/decoder.h
@@ -104,6 +104,8 @@
int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
vp10_ppflags_t *flags);
+int vp10_get_frame_to_show(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *frame);
+
vpx_codec_err_t vp10_copy_reference_dec(struct VP10Decoder *pbi,
VP9_REFFRAME ref_frame_flag,
YV12_BUFFER_CONFIG *sd);
diff --git a/vp10/decoder/dthread.c b/vp10/decoder/dthread.c
index 4206adc..a4555c8 100644
--- a/vp10/decoder/dthread.c
+++ b/vp10/decoder/dthread.c
@@ -159,6 +159,10 @@
#if CONFIG_VP9_HIGHBITDEPTH
dst_cm->use_highbitdepth = src_cm->use_highbitdepth;
#endif
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To handle parallel decoding
+ assert(0);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
dst_cm->prev_frame = src_cm->show_existing_frame ?
src_cm->prev_frame : src_cm->cur_frame;
dst_cm->last_width = !src_cm->show_existing_frame ?
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 4f8e89c..bca36df 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -835,8 +835,11 @@
const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
mbmi->ref_frame[0] == LAST3_FRAME ||
mbmi->ref_frame[0] == LAST4_FRAME);
-#else
+#else // CONFIG_EXT_REFS
const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME;
+#if CONFIG_BIDIR_PRED
+ const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
vp10_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd));
@@ -852,6 +855,10 @@
vp10_write(w, bit3, vp10_get_pred_prob_comp_ref_p3(cm, xd));
}
}
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ vpx_write(w, bit_bwd, vp10_get_pred_prob_comp_bwdref_p(cm, xd));
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} else {
#if CONFIG_EXT_REFS
@@ -875,12 +882,18 @@
vp10_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd));
}
}
-#else
+#else // CONFIG_EXT_REFS
const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
vp10_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd));
if (bit0) {
const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
vp10_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd));
+#if CONFIG_BIDIR_PRED
+ if (bit1) {
+ const int bit2 = mbmi->ref_frame[0] != BWDREF_FRAME;
+ vp10_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd));
+ }
+#endif // CONFIG_BIDIR_PRED
}
#endif // CONFIG_EXT_REFS
}
@@ -938,10 +951,10 @@
#endif // CONFIG_DUAL_FILTER
#endif // CONFIG_EXT_INTERP
#if CONFIG_DUAL_FILTER
- for (dir = 0; dir < 4; ++dir) {
- const int frame_idx = (dir >> 1);
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME &&
- has_subpel_mv_component(xd, dir)) {
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
vp10_write_token(w, vp10_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx],
@@ -1338,7 +1351,7 @@
cm->fc->wedge_interintra_prob[bsize]);
if (mbmi->use_wedge_interintra) {
vp10_write_literal(w, mbmi->interintra_wedge_index,
- get_wedge_bits_lookup[bsize]);
+ get_wedge_bits_lookup(bsize));
assert(mbmi->interintra_wedge_sign == 0);
}
}
@@ -1368,7 +1381,7 @@
cm->fc->wedge_interinter_prob[bsize]);
if (mbmi->use_wedge_interinter) {
vp10_write_literal(w, mbmi->interinter_wedge_index,
- get_wedge_bits_lookup[bsize]);
+ get_wedge_bits_lookup(bsize));
vp10_write_bit(w, mbmi->interinter_wedge_sign);
}
}
@@ -1578,7 +1591,31 @@
// up if they are scaled. vp10_is_interp_needed is in turn needed by
// write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
set_ref_ptrs(cm, xd, m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
-#endif // CONFIG_EXT_INTER
+#endif // CONFIG_EXT_INTERP
+#if 0
+ // NOTE(zoeliu): For debug
+ if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
+ const PREDICTION_MODE mode = m->mbmi.mode;
+ const int segment_id = m->mbmi.segment_id;
+ const BLOCK_SIZE bsize = m->mbmi.sb_type;
+
+ // For sub8x8, simply dump out the first sub8x8 block info
+ const PREDICTION_MODE b_mode =
+ (bsize < BLOCK_8X8) ? m->bmi[0].as_mode : -1;
+ const int mv_x = (bsize < BLOCK_8X8) ?
+ m->bmi[0].as_mv[0].as_mv.row : m->mbmi.mv[0].as_mv.row;
+ const int mv_y = (bsize < BLOCK_8X8) ?
+ m->bmi[0].as_mv[0].as_mv.col : m->mbmi.mv[0].as_mv.col;
+
+ printf("Before pack_inter_mode_mvs(): "
+ "Frame=%d, (mi_row,mi_col)=(%d,%d), "
+ "mode=%d, segment_id=%d, bsize=%d, b_mode=%d, "
+ "mv[0]=(%d, %d), ref[0]=%d, ref[1]=%d\n",
+ cm->current_video_frame, mi_row, mi_col,
+ mode, segment_id, bsize, b_mode, mv_x, mv_y,
+ m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
+ }
+#endif // 0
pack_inter_mode_mvs(cpi, m,
#if CONFIG_SUPERTX
supertx_enabled,
@@ -2643,8 +2680,12 @@
// LAST4_FRAME.
refresh_mask |= (cpi->refresh_last_frame <<
cpi->lst_fb_idxes[LAST4_FRAME - LAST_FRAME]);
-#else
+#else // CONFIG_EXT_REFS
refresh_mask |= (cpi->refresh_last_frame << cpi->lst_fb_idx);
+
+#if CONFIG_BIDIR_PRED
+ refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->bwd_fb_idx);
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
if (vp10_preserve_existing_gf(cpi)) {
@@ -2997,7 +3038,46 @@
write_profile(cm->profile, wb);
- vpx_wb_write_bit(wb, 0); // show_existing_frame
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE: By default all coded frames to be used as a reference
+ cm->is_reference_frame = 1;
+
+ if (cm->show_existing_frame) {
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ const int frame_to_show =
+ cm->ref_frame_map[cpi->existing_fb_idx_to_show];
+
+ if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Buffer %d does not contain a reconstructed frame",
+ frame_to_show);
+ }
+ ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
+
+ vpx_wb_write_bit(wb, 1); // show_existing_frame
+ vpx_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
+
+ cpi->refresh_frame_mask = get_refresh_mask(cpi);
+ vpx_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+ {
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
+ vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
+ REF_FRAMES_LOG2);
+ // TODO(zoeliu): To further explore whether sign bias bits are needed.
+ vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
+ }
+ }
+
+ return;
+ } else {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vpx_wb_write_bit(wb, 0); // show_existing_frame
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
vpx_wb_write_bit(wb, cm->frame_type);
vpx_wb_write_bit(wb, cm->show_frame);
vpx_wb_write_bit(wb, cm->error_resilient_mode);
@@ -3025,15 +3105,37 @@
}
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_frame_mask = get_refresh_mask(cpi);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (cm->intra_only) {
write_sync_code(wb);
write_bitdepth_colorspace_sampling(cm, wb);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vpx_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
write_frame_size(cm, wb);
} else {
MV_REFERENCE_FRAME ref_frame;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vpx_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (!cpi->refresh_frame_mask) {
+ // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
+ // will not be used as a reference
+ cm->is_reference_frame = 0;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
@@ -3076,6 +3178,7 @@
cm->tx_mode = TX_4X4;
else
write_txfm_mode(cm->tx_mode, wb);
+
if (cpi->allow_comp_inter_inter) {
const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
@@ -3221,10 +3324,21 @@
if (cm->reference_mode != SINGLE_REFERENCE) {
for (i = 0; i < REF_CONTEXTS; i++) {
- for (j = 0; j < (COMP_REFS - 1); j ++) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (j = 0; j < (FWD_REFS - 1); j++) {
vp10_cond_prob_diff_update(header_bc, &fc->comp_ref_prob[i][j],
counts->comp_ref[i][j]);
}
+ for (j = 0; j < (BWD_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_bwdref_prob[i][j],
+ counts->comp_bwdref[i][j]);
+ }
+#else // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ for (j = 0; j < (COMP_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
}
@@ -3418,6 +3532,13 @@
// Write the uncompressed header
write_uncompressed_header(cpi, &wb);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cm->show_existing_frame) {
+ *size = vpx_wb_bytes_written(&wb);
+ return;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// We do not know these in advance. Output placeholder bit.
saved_wb = wb;
// Write tile size magnitudes
diff --git a/vp10/encoder/denoiser.c b/vp10/encoder/denoiser.c
index 43c94b1..5a6ae4a 100644
--- a/vp10/encoder/denoiser.c
+++ b/vp10/encoder/denoiser.c
@@ -388,6 +388,9 @@
YV12_BUFFER_CONFIG src,
FRAME_TYPE frame_type,
int refresh_last_frame,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int refresh_bwd_ref_frame,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int refresh_alt_ref_frame,
int refresh_golden_frame) {
if (frame_type == KEY_FRAME) {
@@ -411,6 +414,12 @@
swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
&denoiser->running_avg_y[INTRA_FRAME]);
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (refresh_bwd_ref_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[BWDREF_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
void vp10_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
diff --git a/vp10/encoder/denoiser.h b/vp10/encoder/denoiser.h
index 8182762..ceef451 100644
--- a/vp10/encoder/denoiser.h
+++ b/vp10/encoder/denoiser.h
@@ -36,6 +36,9 @@
YV12_BUFFER_CONFIG src,
FRAME_TYPE frame_type,
int refresh_last_frame,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int refresh_bwd_ref_frame,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int refresh_alt_ref_frame,
int refresh_golden_frame);
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index a6ff9b6..e68de82 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1023,12 +1023,13 @@
static void reset_intmv_filter_type(VP10_COMMON *cm,
MACROBLOCKD *xd, MB_MODE_INFO *mbmi) {
int dir;
- for (dir = 0; dir < 4; ++dir) {
- const int frame_idx = (dir >> 1);
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME &&
- !has_subpel_mv_component(xd, dir))
+ for (dir = 0; dir < 2; ++dir) {
+ if (!has_subpel_mv_component(xd->mi[0], xd, dir) &&
+ (mbmi->ref_frame[1] == NONE ||
+ !has_subpel_mv_component(xd->mi[0], xd, dir + 2)))
mbmi->interp_filter[dir] = (cm->interp_filter == SWITCHABLE) ?
EIGHTTAP_REGULAR : cm->interp_filter;
+ mbmi->interp_filter[dir + 2] = mbmi->interp_filter[dir];
}
}
@@ -1036,10 +1037,10 @@
const MACROBLOCKD *xd,
const MB_MODE_INFO *mbmi) {
int dir;
- for (dir = 0; dir < 4; ++dir) {
- const int frame_idx = (dir >> 1);
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME &&
- has_subpel_mv_component(xd, dir)) {
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
++counts->switchable_interp[ctx][mbmi->interp_filter[dir]];
}
@@ -1882,6 +1883,9 @@
// the reference frame counts used to work out probabilities.
if (inter_block) {
const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
if (cm->reference_mode == REFERENCE_MODE_SELECT)
counts->comp_inter[vp10_get_reference_mode_context(cm, xd)]
[has_second_ref(mbmi)]++;
@@ -1902,9 +1906,13 @@
[ref0 == LAST3_FRAME]++;
}
}
-#else
+#else // CONFIG_EXT_REFS
counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0]
[ref0 == GOLDEN_FRAME]++;
+#if CONFIG_BIDIR_PRED
+ counts->comp_bwdref[vp10_get_pred_context_comp_bwdref_p(cm, xd)][0]
+ [ref1 == ALTREF_FRAME]++;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} else {
#if CONFIG_EXT_REFS
@@ -1925,12 +1933,19 @@
[ref0 != LAST3_FRAME]++;
}
}
-#else
+#else // CONFIG_EXT_REFS
counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0]
[ref0 != LAST_FRAME]++;
- if (ref0 != LAST_FRAME)
+ if (ref0 != LAST_FRAME) {
counts->single_ref[vp10_get_pred_context_single_ref_p2(xd)][1]
[ref0 != GOLDEN_FRAME]++;
+#if CONFIG_BIDIR_PRED
+ if (ref0 != GOLDEN_FRAME) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p3(xd)][2]
+ [ref0 != BWDREF_FRAME]++;
+ }
+#endif // CONFIG_BIDIR_PRED
+ }
#endif // CONFIG_EXT_REFS
}
@@ -4331,6 +4346,10 @@
!!(ref_flags & VP9_LAST2_FLAG) +
!!(ref_flags & VP9_LAST3_FLAG) +
!!(ref_flags & VP9_LAST4_FLAG) +
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ !!(ref_flags & VP9_BWD_FLAG) +
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
!!(ref_flags & VP9_ALT_FLAG)) >= 2;
}
@@ -4359,7 +4378,7 @@
else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
return GOLDEN_FRAME;
else
- // TODO(zoeliu): TO investigate whether a frame_type other than
+ // TODO(zoeliu): To investigate whether a frame_type other than
// INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
return LAST_FRAME;
}
@@ -4513,6 +4532,7 @@
cm->height == cm->last_height &&
!cm->intra_only &&
cm->last_show_frame;
+
// Special case: set prev_mi to NULL when the previous mode info
// context cannot be used.
cm->prev_mi = cm->use_prev_frame_mvs ?
@@ -4560,13 +4580,6 @@
#endif
}
-#if !CONFIG_DUAL_FILTER
-static INTERP_FILTER get_cm_interp_filter(VP10_COMP *cpi) {
- (void)cpi;
- return SWITCHABLE;
-}
-#endif
-
void vp10_encode_frame(VP10_COMP *cpi) {
VP10_COMMON *const cm = &cpi->common;
@@ -4584,6 +4597,13 @@
cpi->allow_comp_inter_inter = 0;
} else {
cpi->allow_comp_inter_inter = 1;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cm->comp_fwd_ref[0] = LAST_FRAME;
+ cm->comp_fwd_ref[1] = GOLDEN_FRAME;
+ cm->comp_bwd_ref[0] = BWDREF_FRAME;
+ cm->comp_bwd_ref[1] = ALTREF_FRAME;
+#else // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cm->comp_fixed_ref = ALTREF_FRAME;
cm->comp_var_ref[0] = LAST_FRAME;
#if CONFIG_EXT_REFS
@@ -4591,9 +4611,10 @@
cm->comp_var_ref[2] = LAST3_FRAME;
cm->comp_var_ref[3] = LAST4_FRAME;
cm->comp_var_ref[4] = GOLDEN_FRAME;
-#else
+#else // CONFIG_EXT_REFS
cm->comp_var_ref[1] = GOLDEN_FRAME;
#endif // CONFIG_EXT_REFS
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
} else {
cpi->allow_comp_inter_inter = 0;
@@ -4613,7 +4634,7 @@
// that for subsequent frames.
// It does the same analysis for transform size selection also.
//
- // TODO(zoeliu): TO investigate whether a frame_type other than
+ // TODO(zoeliu): To investigate whether a frame_type other than
// INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
@@ -4633,10 +4654,8 @@
else
cm->reference_mode = REFERENCE_MODE_SELECT;
-#if !CONFIG_DUAL_FILTER
- if (cm->interp_filter == SWITCHABLE) {
- cm->interp_filter = get_cm_interp_filter(cpi);
- }
+#if CONFIG_DUAL_FILTER
+ cm->interp_filter = SWITCHABLE;
#endif
encode_frame_internal(cpi);
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 43b5401..dda82de 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -716,9 +716,9 @@
cpi->lookahead = vp10_lookahead_init(oxcf->width, oxcf->height,
cm->subsampling_x, cm->subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
+ cm->use_highbitdepth,
#endif
- oxcf->lag_in_frames);
+ oxcf->lag_in_frames);
if (!cpi->lookahead)
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
@@ -902,10 +902,15 @@
cpi->lst_fb_idxes[fb_idx] = fb_idx;
cpi->gld_fb_idx = LAST_REF_FRAMES;
cpi->alt_fb_idx = cpi->gld_fb_idx + 1;
-#else
+#else // CONFIG_EXT_REFS
cpi->lst_fb_idx = 0;
cpi->gld_fb_idx = 1;
+#if CONFIG_BIDIR_PRED
+ cpi->bwd_fb_idx = 2;
+ cpi->alt_fb_idx = 3;
+#else // CONFIG_BIDIR_PRED
cpi->alt_fb_idx = 2;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
}
@@ -2232,6 +2237,9 @@
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 1;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cm->refresh_frame_context =
(oxcf->error_resilient_mode || oxcf->frame_parallel_decoding_mode) ?
@@ -2301,6 +2309,12 @@
cpi->alt_ref_source = NULL;
rc->is_src_frame_alt_ref = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ rc->is_bwd_ref_frame = 0;
+ rc->is_last_nonref_frame = 0;
+ rc->is_nonref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
#if 0
// Experimental RD Code
cpi->frame_distortion = 0;
@@ -2409,6 +2423,7 @@
cm->current_video_frame = 0;
cpi->partition_search_skippable_frame = 0;
cpi->tile_data = NULL;
+ cpi->last_show_frame_buf_idx = INVALID_IDX;
realloc_segmentation_maps(cpi);
@@ -2766,6 +2781,7 @@
return cpi;
}
+
#define SNPRINT(H, T) \
snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
@@ -2967,8 +2983,8 @@
cpi->ext_refresh_frame_flags_pending = 1;
}
-static YV12_BUFFER_CONFIG *get_vp10_ref_frame_buffer(VP10_COMP *cpi,
- VP9_REFFRAME ref_frame_flag) {
+static YV12_BUFFER_CONFIG *get_vp10_ref_frame_buffer(
+ VP10_COMP *cpi, VP9_REFFRAME ref_frame_flag) {
MV_REFERENCE_FRAME ref_frame = NONE;
if (ref_frame_flag == VP9_LAST_FLAG)
ref_frame = LAST_FRAME;
@@ -2982,6 +2998,10 @@
#endif // CONFIG_EXT_REFS
else if (ref_frame_flag == VP9_GOLD_FLAG)
ref_frame = GOLDEN_FRAME;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ else if (ref_frame_flag == VP9_BWD_FLAG)
+ ref_frame = BWDREF_FRAME;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
else if (ref_frame_flag == VP9_ALT_FLAG)
ref_frame = ALTREF_FRAME;
@@ -3322,12 +3342,44 @@
int ref_frame;
#endif // CONFIG_EXT_REFS
- if (use_upsampled_ref) {
- // Up-sample the current encoded frame.
- RefCntBuffer *bufs = pool->frame_bufs;
- const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf;
+ // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
+ // for the purpose to verify no mismatch between encoder and decoder.
+ if (cm->show_frame)
+ cpi->last_show_frame_buf_idx = cm->new_fb_idx;
- new_uidx = upsample_ref_frame(cpi, ref);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To remove the reference buffer update for the
+ // show_existing_frame==1 case.
+#if 0
+ if (cpi->rc.is_last_nonref_frame) {
+ // NOTE: After the encoding of the LAST_NONREF_FRAME, the flag of
+ // show_existing_frame will be set, to notify the decoder to show the
+ // coded BWDREF_FRAME. During the handling of the show_existing_frame,
+ // no update will be conducted on the reference frame buffer.
+ // Following is to get the BWDREF_FRAME to show to be taken as the
+ // LAST_FRAME, preparing for the encoding of the next BWDREF_FRAME.
+ cpi->lst_fb_idx = cpi->bwd_fb_idx;
+ return;
+ }
+#endif // 0
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
+ if (use_upsampled_ref) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cm->show_existing_frame) {
+ new_uidx = cpi->upsampled_ref_idx[cpi->existing_fb_idx_to_show];
+ // TODO(zoeliu): Once following is confirmed, remove it.
+ assert(cpi->upsampled_ref_bufs[new_uidx].ref_count > 0);
+ } else {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // Up-sample the current encoded frame.
+ RefCntBuffer *bufs = pool->frame_bufs;
+ const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf;
+
+ new_uidx = upsample_ref_frame(cpi, ref);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
// At this point the new frame has been encoded.
@@ -3335,12 +3387,20 @@
if (cm->frame_type == KEY_FRAME) {
ref_cnt_fb(pool->frame_bufs,
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->bwd_fb_idx], cm->new_fb_idx);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
ref_cnt_fb(pool->frame_bufs,
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
if (use_upsampled_ref) {
uref_cnt_fb(cpi->upsampled_ref_bufs,
&cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
uref_cnt_fb(cpi->upsampled_ref_bufs,
&cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
}
@@ -3364,6 +3424,9 @@
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
+
+ // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
+ // cpi->interp_filter_selected[GOLDEN_FRAME]?
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
int arf_idx = cpi->alt_fb_idx;
@@ -3399,6 +3462,20 @@
cpi->interp_filter_selected[ALTREF_FRAME],
sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
}
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cpi->refresh_bwd_ref_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->bwd_fb_idx], cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
+
+ memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
if (cpi->refresh_last_frame) {
@@ -3474,17 +3551,43 @@
sizeof(cpi->interp_filter_selected[0]));
}
#else // CONFIG_EXT_REFS
- ref_cnt_fb(pool->frame_bufs,
- &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
- if (use_upsampled_ref)
- uref_cnt_fb(cpi->upsampled_ref_bufs,
- &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+#if CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To remove the reference buffer update for the
+ // show_existing_frame==1 case; Instead, we move the reference buffer update
+ // to the previous coded frame, i.e. the last-nonref-frame. In that case, no
+ // bit should be set in the refresh-mask, but the visual ref-idx should be
+ // updated and written to the bitstream accordingly, as the virtual ref-idx
+ // for LAST_FRAME and BWDREF_FRAME should be switched, i.e. cpi->lst_fb_idx
+ // and cpi->bwd_fb_idx should be switched.
+ if (cm->show_existing_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
- if (!cpi->rc.is_src_frame_alt_ref) {
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+
+ // NOTE(zoeliu): OVERLAY should not be the last non-reference frame.
+ assert(!cpi->rc.is_src_frame_alt_ref);
+
memcpy(cpi->interp_filter_selected[LAST_FRAME],
- cpi->interp_filter_selected[0],
- sizeof(cpi->interp_filter_selected[0]));
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
+ } else {
+#endif // CONFIG_BIDIR_PRED
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+#if CONFIG_BIDIR_PRED
}
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
}
@@ -3494,6 +3597,9 @@
*cpi->Source,
cpi->common.frame_type,
cpi->refresh_last_frame,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame,
cpi->refresh_golden_frame);
}
@@ -3572,6 +3678,9 @@
VP9_LAST4_FLAG,
#endif // CONFIG_EXT_REFS
VP9_GOLD_FLAG,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ VP9_BWD_FLAG,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
VP9_ALT_FLAG
};
@@ -3693,9 +3802,14 @@
refresh[1] = refresh[2] = refresh[3] = 0;
refresh[4] = (cpi->refresh_golden_frame) ? 1 : 0;
refresh[5] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
-#else
+#else // CONFIG_EXT_REFS
refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
+#if CONFIG_BIDIR_PRED
+ refresh[2] = (cpi->refresh_bwd_ref_frame) ? 1 : 0;
+ refresh[3] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#else // CONFIG_BIDIR_PRED
refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i - 1];
@@ -3830,7 +3944,12 @@
// after a key/intra-only frame.
cpi->max_mv_magnitude = max_mv_def;
} else {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): Maybe we should leave it the same as base.
+ if (cm->show_frame || cpi->rc.is_bwd_ref_frame) {
+#else
if (cm->show_frame) {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
@@ -3922,7 +4041,7 @@
// There has been a change in frame size.
vp10_set_size_literal(cpi, oxcf->scaled_frame_width,
- oxcf->scaled_frame_height);
+ oxcf->scaled_frame_height);
}
if (oxcf->pass == 0 &&
@@ -4159,7 +4278,7 @@
}
cpi->Source = vp10_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
+ &cpi->scaled_source);
if (cpi->unscaled_last_source != NULL)
cpi->Last_Source = vp10_scale_if_required(cm, cpi->unscaled_last_source,
@@ -4434,16 +4553,25 @@
map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[1]];
const int last4_is_last3 =
map[cpi->lst_fb_idxes[3]] == map[cpi->lst_fb_idxes[2]];
- const int gld_is_last4 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[3]];
+
const int last4_is_alt = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[3]];
-#else
+ const int gld_is_last4 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[3]];
+#else // CONFIG_EXT_REFS
const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
+#if CONFIG_BIDIR_PRED
+ const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idx];
+#endif // CONFIG_BIDIR_PRED
const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
#endif // CONFIG_EXT_REFS
const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
int flags = VP9_REFFRAME_ALL;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (!cpi->rc.is_bwd_ref_frame)
+ flags &= ~VP9_BWD_FLAG;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (gld_is_alt || gld_is_last)
flags &= ~VP9_GOLD_FLAG;
@@ -4465,6 +4593,11 @@
if (gld_is_last4 || gld_is_last3 || gld_is_last2)
flags &= ~VP9_GOLD_FLAG;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ if (bwd_is_last && (flags & VP9_BWD_FLAG))
+ flags &= ~VP9_BWD_FLAG;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
return flags;
@@ -4532,6 +4665,9 @@
(cpi->rc.source_alt_ref_active && !cpi->refresh_alt_ref_frame);
}
cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cm->ref_frame_sign_bias[BWDREF_FRAME] = cm->ref_frame_sign_bias[ALTREF_FRAME];
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
}
static int setup_interp_filter_search_mask(VP10_COMP *cpi) {
@@ -4563,6 +4699,11 @@
(ref_total[GOLDEN_FRAME] == 0 ||
cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50
< ref_total[GOLDEN_FRAME]) &&
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ (ref_total[BWDREF_FRAME] == 0 ||
+ cpi->interp_filter_selected[BWDREF_FRAME][ifilter] * 50
+ < ref_total[BWDREF_FRAME]) &&
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
(ref_total[ALTREF_FRAME] == 0 ||
cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50
< ref_total[ALTREF_FRAME]))
@@ -4571,6 +4712,61 @@
return mask;
}
+#define DUMP_RECON_FRAMES 0
+
+#if DUMP_RECON_FRAMES == 1
+// NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+static void dump_filtered_recon_frames(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const YV12_BUFFER_CONFIG *recon_buf = cm->frame_to_show;
+ int h;
+ char file_name[256] = "/tmp/enc_filtered_recon.yuv";
+ FILE *f_recon = NULL;
+
+ if (recon_buf == NULL || !cm->show_frame) {
+ printf("Frame %d is not ready or no show to dump.\n",
+ cm->current_video_frame);
+ return;
+ }
+
+ if (cm->current_video_frame == 0) {
+ if ((f_recon = fopen(file_name, "wb")) == NULL) {
+ printf("Unable to open file %s to write.\n", file_name);
+ return;
+ }
+ } else {
+ if ((f_recon = fopen(file_name, "ab")) == NULL) {
+ printf("Unable to open file %s to append.\n", file_name);
+ return;
+ }
+ }
+ printf("\nFrame=%5d, encode_update_type[%5d]=%1d, show_existing_frame=%d, "
+ "y_stride=%4d, uv_stride=%4d, width=%4d, height=%4d\n",
+ cm->current_video_frame, cpi->twopass.gf_group.index,
+ cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+ cm->show_existing_frame,
+ recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
+
+ // --- Y ---
+ for (h = 0; h < cm->height; ++h) {
+ fwrite(&recon_buf->y_buffer[h*recon_buf->y_stride],
+ 1, cm->width, f_recon);
+ }
+ // --- U ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&recon_buf->u_buffer[h*recon_buf->uv_stride],
+ 1, (cm->width >> 1), f_recon);
+ }
+ // --- V ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&recon_buf->v_buffer[h*recon_buf->uv_stride],
+ 1, (cm->width >> 1), f_recon);
+ }
+
+ fclose(f_recon);
+}
+#endif // DUMP_RECON_FRAMES
+
static void encode_frame_to_data_rate(VP10_COMP *cpi,
size_t *size,
uint8_t *dest,
@@ -4586,6 +4782,56 @@
// Set the arf sign bias for this frame.
set_arf_sign_bias(cpi);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cm->show_existing_frame) {
+ // NOTE(zoeliu): In BIDIR_PRED, the existing frame to show is the current
+ // BWDREF_FRAME in the reference frame buffer.
+
+ cm->frame_type = INTER_FRAME;
+ cm->show_frame = 1;
+ cpi->frame_flags = *frame_flags;
+
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_bwd_ref_frame = 0;
+ cpi->rc.is_last_nonref_frame = 0;
+ cpi->rc.is_nonref_frame = 0;
+
+ // Build the bitstream
+ vp10_pack_bitstream(cpi, dest, size);
+
+ // Set up frame to show to get ready for stats collection.
+ cm->frame_to_show = get_frame_new_buffer(cm);
+
+ // Update the LAST_FRAME in the reference frame buffer.
+ vp10_update_reference_frames(cpi);
+
+ cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+ cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
+ cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
+
+ *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
+
+#if DUMP_RECON_FRAMES == 1
+ // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+ dump_filtered_recon_frames(cpi);
+#endif // DUMP_RECON_FRAMES
+
+ // Update the frame type
+ cm->last_frame_type = cm->frame_type;
+
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
+ ++cm->current_video_frame;
+
+ return;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
@@ -4651,13 +4897,14 @@
vp10_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
yuv_denoised_file);
}
-#endif
-#endif
+#endif // OUTPUT_YUV_DENOISED
+#endif // CONFIG_VP9_TEMPORAL_DENOISING
+
#ifdef OUTPUT_YUV_SKINMAP
if (cpi->common.current_video_frame > 1) {
vp10_compute_skin_map(cpi, yuv_skinmap_file);
}
-#endif
+#endif // OUTPUT_YUV_SKINMAP
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
@@ -4686,18 +4933,38 @@
cm->frame_to_show->render_width = cm->render_width;
cm->frame_to_show->render_height = cm->render_height;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): For non-ref frames, loop filtering may need to be turned
+ // off.
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
- // build the bitstream
+ // Build the bitstream
vp10_pack_bitstream(cpi, dest, size);
+#if DUMP_RECON_FRAMES == 1
+ // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+ if (cm->show_frame)
+ dump_filtered_recon_frames(cpi);
+#endif // DUMP_RECON_FRAMES
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cpi->rc.is_last_nonref_frame) {
+ // NOTE: If the current frame is a LAST_NONREF_FRAME, we need next to show
+ // the BWDREF_FRAME.
+ cpi->existing_fb_idx_to_show = cpi->bwd_fb_idx;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (cm->seg.update_map)
update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {
release_scaled_references(cpi);
}
+
vp10_update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
@@ -4729,6 +4996,13 @@
else
cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cpi->refresh_bwd_ref_frame == 1)
+ cpi->frame_flags |= FRAMEFLAGS_BWDREF;
+ else
+ cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
cpi->ref_frame_flags = get_ref_frame_flags(cpi);
#if CONFIG_EXT_REFS
@@ -4764,13 +5038,37 @@
if (!cm->show_existing_frame)
cm->last_show_frame = cm->show_frame;
+#if 0
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if ((cm->show_frame &&
+ !(cpi->rc.is_nonref_frame || cpi->rc.is_last_nonref_frame) ||
+ cpi->rc.is_bwd_ref_frame) {
+ vp10_swap_mi_and_prev_mi(cm);
+ }
+ if (cm->show_frame || cpi->rc.is_bwd_ref_frame) {
+ // Don't increment frame counters if this was an altref buffer
+ // update not a real frame
+ ++cm->current_video_frame;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+#endif // 0
+
if (cm->show_frame) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that are
+ // being used as reference.
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
vp10_swap_mi_and_prev_mi(cm);
// Don't increment frame counters if this was an altref buffer
// update not a real frame
++cm->current_video_frame;
}
- cm->prev_frame = cm->cur_frame;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE: It is not supposed to ref to any frame not used as reference
+ if (cm->is_reference_frame)
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cm->prev_frame = cm->cur_frame;
}
static void Pass0Encode(VP10_COMP *cpi, size_t *size, uint8_t *dest,
@@ -4788,7 +5086,10 @@
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
- vp10_twopass_postencode_update(cpi);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (!cpi->common.show_existing_frame)
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ vp10_twopass_postencode_update(cpi);
}
static void init_ref_frame_bufs(VP10_COMMON *cm) {
@@ -4904,6 +5205,9 @@
return cm->frame_type == KEY_FRAME ||
cpi->refresh_last_frame ||
cpi->refresh_golden_frame ||
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame ||
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame ||
!cm->error_resilient_mode ||
cm->lf.mode_ref_delta_update ||
@@ -4968,6 +5272,27 @@
return arf_src_index;
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+static int get_brf_src_index(VP10_COMP *cpi) {
+ int brf_src_index = 0;
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+
+ // TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
+ // flag.
+ if (gf_group->bidir_pred_enabled[gf_group->index]) {
+ if (cpi->oxcf.pass == 2) {
+ if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
+ brf_src_index = gf_group->brf_src_offset[gf_group->index];
+ } else {
+ // TODO(zoeliu): To re-visit the setup for this scenario
+ brf_src_index = BIDIR_PRED_PERIOD - 1;
+ }
+ }
+
+ return brf_src_index;
+}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
static void check_src_altref(VP10_COMP *cpi,
const struct lookahead_entry *source) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -5117,6 +5442,9 @@
struct lookahead_entry *last_source = NULL;
struct lookahead_entry *source = NULL;
int arf_src_index;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int brf_src_index;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int i;
vpx_usec_timer_start(&cmptimer);
@@ -5138,11 +5466,63 @@
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (oxcf->pass == 2 && cm->show_existing_frame) {
+ // Manage the source buffer and flush out the source frame that has been
+ // coded already; Also get prepared for PSNR calculation if needed.
+ if ((source = vp10_lookahead_pop(cpi->lookahead, flush)) == NULL) {
+ *size = 0;
+ return -1;
+ }
+ cpi->Source = &source->img;
+
+ // TODO(zoeliu): To track down to determine whether it's needed to adjust
+ // the frame rate.
+ *time_stamp = source->ts_start;
+ *time_end = source->ts_end;
+
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
+ if (cm->new_fb_idx != INVALID_IDX) {
+ --pool->frame_bufs[cm->new_fb_idx].ref_count;
+ }
+ cm->new_fb_idx = get_free_fb(cm);
+
+ if (cm->new_fb_idx == INVALID_IDX)
+ return -1;
+
+ // Clear down mmx registers
+ vpx_clear_system_state();
+
+ // Start with a 0 size frame.
+ *size = 0;
+
+ Pass2Encode(cpi, size, dest, frame_flags);
+
+ if (cpi->b_calculate_psnr)
+ generate_psnr_packet(cpi);
+
+#if CONFIG_INTERNAL_STATS
+ compute_internal_stats(cpi);
+ cpi->bytes += (int)(*size);
+#endif // CONFIG_INTERNAL_STATS
+
+ // Clear down mmx registers
+ vpx_clear_system_state();
+
+ cm->show_existing_frame = 0;
+
+ return 0;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
-
if (arf_src_index) {
for (i = 0; i <= arf_src_index; ++i) {
struct lookahead_entry *e = vp10_lookahead_peek(cpi->lookahead, i);
@@ -5180,6 +5560,27 @@
rc->source_alt_ref_pending = 0;
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ rc->is_bwd_ref_frame = 0;
+ brf_src_index = get_brf_src_index(cpi);
+ // TODO(zoeliu): Need to handle when alt-ref is disabled; Currently bwd-ref
+ // works only when alt-ref is on.
+ if (brf_src_index) {
+ assert(brf_src_index <= rc->frames_to_key);
+ if ((source = vp10_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
+ cm->show_frame = 0;
+ cm->intra_only = 0;
+
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ rc->is_bwd_ref_frame = 1;
+ }
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (!source) {
// Get last frame source.
if (cm->current_video_frame > 0) {
@@ -5227,9 +5628,8 @@
vpx_clear_system_state();
// adjust frame rates based on timestamps given
- if (cm->show_frame) {
+ if (cm->show_frame)
adjust_frame_rate(cpi, source);
- }
// Find a free buffer for the new frame, releasing the reference previously
// held.
@@ -5301,8 +5701,21 @@
compute_internal_stats(cpi);
cpi->bytes += (int)(*size);
}
-#endif
+#endif // CONFIG_INTERNAL_STATS
+
vpx_clear_system_state();
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cpi->rc.is_last_nonref_frame) {
+ // NOTE(zoeliu): If the current frame is a last non-reference frame, we need
+ // next to show the BWDREF_FRAME.
+ cpi->rc.is_last_nonref_frame = 0;
+ cm->show_existing_frame = 1;
+ } else {
+ cm->show_existing_frame = 0;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
return 0;
}
@@ -5336,6 +5749,15 @@
}
}
+int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
+ if (cpi->last_show_frame_buf_idx == INVALID_IDX)
+ return -1;
+
+ *frame =
+ cpi->common.buffer_pool->frame_bufs[cpi->last_show_frame_buf_idx].buf;
+ return 0;
+}
+
int vp10_set_internal_size(VP10_COMP *cpi,
VPX_SCALING horiz_mode, VPX_SCALING vert_mode) {
VP10_COMMON *cm = &cpi->common;
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index 67ebe6d..f1508af 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -75,6 +75,9 @@
FRAME_CONTEXT fc;
} CODING_CONTEXT;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+#define BIDIR_PRED_PERIOD 2
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
typedef enum {
// encode_breakout is disabled.
@@ -111,7 +114,12 @@
typedef enum {
FRAMEFLAGS_KEY = 1 << 0,
FRAMEFLAGS_GOLDEN = 1 << 1,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ FRAMEFLAGS_BWDREF = 1 << 2,
+ FRAMEFLAGS_ALTREF = 1 << 3,
+#else
FRAMEFLAGS_ALTREF = 1 << 2,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
} FRAMETYPE_FLAGS;
typedef enum {
@@ -197,6 +205,9 @@
// ----------------------------------------------------------------
int enable_auto_arf;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int enable_auto_brf; // (b)ackward (r)ef (f)rame
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int encode_breakout; // early breakout : for video conf recommend 800
@@ -347,14 +358,22 @@
int scaled_ref_idx[MAX_REF_FRAMES];
#if CONFIG_EXT_REFS
int lst_fb_idxes[LAST_REF_FRAMES];
-#else
+#else // CONFIG_EXT_REFS
int lst_fb_idx;
#endif // CONFIG_EXT_REFS
int gld_fb_idx;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int bwd_fb_idx; // BWD_REF_FRAME
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int alt_fb_idx;
+ int last_show_frame_buf_idx; // last show frame buffer index
+
int refresh_last_frame;
int refresh_golden_frame;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int refresh_bwd_ref_frame;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
int refresh_alt_ref_frame;
int ext_refresh_frame_flags_pending;
@@ -587,6 +606,10 @@
#if CONFIG_ANS
struct BufAnsCoder buf_ans;
#endif
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int refresh_frame_mask;
+ int existing_fb_idx_to_show;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
} VP10_COMP;
void vp10_initialize_enc(void);
@@ -610,6 +633,8 @@
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
vp10_ppflags_t *flags);
+int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame);
+
int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags);
void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags);
@@ -654,12 +679,16 @@
#endif // CONFIG_EXT_REFS
else if (ref_frame == GOLDEN_FRAME)
return cpi->gld_fb_idx;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ else if (ref_frame == BWDREF_FRAME)
+ return cpi->bwd_fb_idx;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
else
return cpi->alt_fb_idx;
}
static INLINE int get_ref_frame_buf_idx(const VP10_COMP *const cpi,
- int ref_frame) {
+ MV_REFERENCE_FRAME ref_frame) {
const VP10_COMMON *const cm = &cpi->common;
const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
@@ -673,6 +702,14 @@
buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL;
}
+static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
+ VP10_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
+ // Use up-sampled reference frames.
+ const int buf_idx =
+ cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
+ return &cpi->upsampled_ref_bufs[buf_idx].buf;
+}
+
static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
// TODO(JBB): double check we can't exceed this token count if we have a
// 32x32 transform crossing a boundary at a multiple of 16.
@@ -714,6 +751,16 @@
cpi->oxcf.enable_auto_arf;
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+static INLINE int is_bwdref_enabled(const VP10_COMP *const cpi) {
+ // NOTE(zoeliu): The enabling of backward prediction depends on the alt_ref
+ // period, and will be off when the alt_ref period is not sufficiently large.
+ return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0;
+ // (zoeliu):
+ // && cpi->oxcf.enable_auto_brf && cpi->rc.bidir_pred_enabled;
+}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
static INLINE void set_ref_ptrs(VP10_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index 68e8107..f0d3ab9 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -1620,7 +1620,7 @@
GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS frame_stats;
int i;
- int frame_index = 1;
+ int frame_index = 0;
int target_frame_size;
int key_frame;
const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
@@ -1630,6 +1630,9 @@
int mid_boost_bits = 0;
int mid_frame_idx;
unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ int bidir_pred_frame_index = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -1639,27 +1642,38 @@
// is also the golden frame.
if (!key_frame) {
if (rc->source_alt_ref_active) {
- gf_group->update_type[0] = OVERLAY_UPDATE;
- gf_group->rf_level[0] = INTER_NORMAL;
- gf_group->bit_allocation[0] = 0;
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] = 0;
} else {
- gf_group->update_type[0] = GF_UPDATE;
- gf_group->rf_level[0] = GF_ARF_STD;
- gf_group->bit_allocation[0] = gf_arf_bits;
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
}
- gf_group->arf_update_idx[0] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
// Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats))
return;
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// Deduct the boost bits for arf (or gf if it is not a key frame)
// from the group total.
if (rc->source_alt_ref_pending || !key_frame)
total_group_bits -= gf_arf_bits;
+ frame_index++;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ bidir_pred_frame_index++;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
// Store the bits to spend on the ARF if there is one.
if (rc->source_alt_ref_pending) {
gf_group->update_type[frame_index] = ARF_UPDATE;
@@ -1673,6 +1687,13 @@
gf_group->arf_ref_idx[frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active];
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames.
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
++frame_index;
if (cpi->multi_arf_enabled) {
@@ -1718,10 +1739,67 @@
target_frame_size = clamp(target_frame_size, 0,
VPXMIN(max_bits, (int)total_group_bits));
- gf_group->update_type[frame_index] = LF_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): Currently only support BIDIR_PRED_PERIOD = 2
+ assert(BIDIR_PRED_PERIOD == 2);
+ // NOTE: BIDIR_PRED is only enabled when its interval is strictly
+ // less than the GOLDEN_FRAME group interval.
+ // TODO(zoeliu): Currently BIDIR_PRED is only enabled when alt-ref is on.
+ if (rc->source_alt_ref_pending && BIDIR_PRED_PERIOD <
+ (rc->baseline_gf_interval - rc->source_alt_ref_pending)) {
+ if (bidir_pred_frame_index == 1) {
+ const int curr_brf_src_offset = BIDIR_PRED_PERIOD - 1;
+ if ((i + curr_brf_src_offset) >=
+ (rc->baseline_gf_interval - rc->source_alt_ref_pending)) {
+ gf_group->update_type[frame_index] = LF_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ } else {
+ gf_group->update_type[frame_index] = BRF_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = curr_brf_src_offset;
+ }
+ } else if (bidir_pred_frame_index == BIDIR_PRED_PERIOD) {
+ gf_group->update_type[frame_index] = LASTNRF_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = 0;
+ // Reset the bidir_pred index.
+ bidir_pred_frame_index = 0;
+ } else {
+ gf_group->update_type[frame_index] = NRF_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
- gf_group->bit_allocation[frame_index] = target_frame_size;
+ bidir_pred_frame_index++;
+ } else {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->update_type[frame_index] = LF_UPDATE;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (gf_group->update_type[frame_index] == BRF_UPDATE) {
+ // Boost up the allocated bits on BWDREF_FRAME
+ // (zoeliu)gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->rf_level[frame_index] = INTER_HIGH;
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size + (target_frame_size >> 2);
+ } else if (gf_group->update_type[frame_index] == LASTNRF_UPDATE) {
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] =
+ VPXMAX(0, target_frame_size - (target_frame_size >> 1));
+ } else {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
++frame_index;
}
@@ -1747,6 +1825,10 @@
gf_group->update_type[frame_index] = GF_UPDATE;
gf_group->rf_level[frame_index] = GF_ARF_STD;
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
// Note whether multi-arf was enabled this group for next time.
cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
@@ -1837,6 +1919,7 @@
int int_lbq =
(int)(vp10_convert_qindex_to_q(rc->last_boosted_qindex,
cpi->common.bit_depth));
+
active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200);
if (active_min_gf_interval > rc->max_gf_interval)
active_min_gf_interval = rc->max_gf_interval;
@@ -2399,33 +2482,88 @@
TWO_PASS *const twopass = &cpi->twopass;
cpi->rc.is_src_frame_alt_ref = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->rc.is_bwd_ref_frame = 0;
+ cpi->rc.is_last_nonref_frame = 0;
+ cpi->rc.is_nonref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
case KF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 1;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 1;
break;
+
case LF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 0;
break;
+
case GF_UPDATE:
cpi->refresh_last_frame = 1;
cpi->refresh_golden_frame = 1;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 0;
break;
+
case OVERLAY_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 1;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
+
case ARF_UPDATE:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
cpi->refresh_alt_ref_frame = 1;
break;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ case BRF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_bwd_ref_frame = 1;
+ break;
+
+ // TODO(zoeliu): When BIDIR_PRED and EXT_REFS start to work together, we
+ // may take both LASTNRF and NRF as one of the last ref
+
+ case LASTNRF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_last_nonref_frame = 1;
+ break;
+
+ case NRF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_nonref_frame = 1;
+ break;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
default:
assert(0);
break;
@@ -2515,6 +2653,7 @@
rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
}
+
vp10_zero(this_frame);
if (EOF == input_stats(twopass, &this_frame))
return;
diff --git a/vp10/encoder/firstpass.h b/vp10/encoder/firstpass.h
index 68a8887..a09e523 100644
--- a/vp10/encoder/firstpass.h
+++ b/vp10/encoder/firstpass.h
@@ -72,7 +72,16 @@
GF_UPDATE = 2,
ARF_UPDATE = 3,
OVERLAY_UPDATE = 4,
+#if CONFIG_BIDIR_PRED
+ BRF_UPDATE = 5, // Backward Reference Frame
+ // For NRF's within a BIDIR_PRED period, if it is the last one, then it is
+ // needed to get LAST_FRAME updated; Otherwise no ref update is needed at all.
+ LASTNRF_UPDATE = 6, // Last Non-Reference Frame
+ NRF_UPDATE = 7, // Non-Reference Frame, but not the last one
+ FRAME_UPDATE_TYPES = 8
+#else
FRAME_UPDATE_TYPES = 5
+#endif // CONFIG_BIDIR_PRED
} FRAME_UPDATE_TYPE;
#define FC_ANIMATION_THRESH 0.15
@@ -89,6 +98,10 @@
unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
+#if CONFIG_BIDIR_PRED
+ unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1];
+#endif // CONFIG_BIDIR_PRED
int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
} GF_GROUP;
diff --git a/vp10/encoder/hybrid_fwd_txfm.c b/vp10/encoder/hybrid_fwd_txfm.c
index ff0a7c6..33bcab4 100644
--- a/vp10/encoder/hybrid_fwd_txfm.c
+++ b/vp10/encoder/hybrid_fwd_txfm.c
@@ -13,7 +13,6 @@
#include "./vpx_dsp_rtcd.h"
#include "vp10/common/idct.h"
-#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/encoder/hybrid_fwd_txfm.h"
static INLINE void fdct32x32(int rd_transform, const int16_t *src,
@@ -196,7 +195,7 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
break;
case V_DCT:
case H_DCT:
@@ -212,7 +211,6 @@
#endif // CONFIG_EXT_TX
default:
assert(0);
- break;
}
}
@@ -233,7 +231,7 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
break;
case V_DCT:
case H_DCT:
@@ -250,7 +248,6 @@
#endif // CONFIG_EXT_TX
default:
assert(0);
- break;
}
}
@@ -271,7 +268,7 @@
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
- vp10_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
break;
case V_DCT:
case H_DCT:
@@ -288,7 +285,6 @@
#endif // CONFIG_EXT_TX
default:
assert(0);
- break;
}
}
diff --git a/vp10/encoder/lookahead.c b/vp10/encoder/lookahead.c
index dce0139..bc7b404 100644
--- a/vp10/encoder/lookahead.c
+++ b/vp10/encoder/lookahead.c
@@ -47,13 +47,13 @@
struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
- unsigned int height,
- unsigned int subsampling_x,
- unsigned int subsampling_y,
+ unsigned int height,
+ unsigned int subsampling_x,
+ unsigned int subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
- int use_highbitdepth,
+ int use_highbitdepth,
#endif
- unsigned int depth) {
+ unsigned int depth) {
struct lookahead_ctx *ctx = NULL;
// Clamp the lookahead queue depth
diff --git a/vp10/encoder/ratectrl.c b/vp10/encoder/ratectrl.c
index 6068775..c1dc71e 100644
--- a/vp10/encoder/ratectrl.c
+++ b/vp10/encoder/ratectrl.c
@@ -240,11 +240,14 @@
RATE_CONTROL *const rc = &cpi->rc;
// Non-viewable frames are a special case and are treated as pure overhead.
- if (!cm->show_frame) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (!cm->show_frame && !rc->is_bwd_ref_frame)
+#else
+ if (!cm->show_frame)
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
rc->bits_off_target -= encoded_frame_size;
- } else {
+ else
rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
- }
// Clip the buffer level to the maximum specified buffer size.
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
@@ -946,8 +949,13 @@
int vp10_frame_type_qdelta(const VP10_COMP *cpi, int rf_level, int q) {
static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ 0.80, // INTER_NORMAL
+ 1.25, // INTER_HIGH
+#else
1.00, // INTER_NORMAL
1.00, // INTER_HIGH
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
1.50, // GF_ARF_LOW
1.75, // GF_ARF_STD
2.00, // KF_STD
@@ -1282,7 +1290,7 @@
}
}
- // Keep record of last boosted (KF/KF/ARF) Q value.
+ // Keep record of last boosted (KF/GF/ARF) Q value.
// If the current frame is coded at a lower Q then we also update it.
// If all mbs in this group are skipped only update if the Q value is
// better than that already stored.
@@ -1314,7 +1322,12 @@
// Actual bits spent
rc->total_actual_bits += rc->projected_frame_size;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ rc->total_target_bits += (cm->show_frame || rc->is_bwd_ref_frame) ?
+ rc->avg_frame_bandwidth : 0;
+#else
rc->total_target_bits += cm->show_frame ? rc->avg_frame_bandwidth : 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
@@ -1328,7 +1341,12 @@
if (cm->frame_type == KEY_FRAME)
rc->frames_since_key = 0;
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (cm->show_frame || rc->is_bwd_ref_frame) {
+#else
if (cm->show_frame) {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
rc->frames_since_key++;
rc->frames_to_key--;
}
diff --git a/vp10/encoder/ratectrl.h b/vp10/encoder/ratectrl.h
index 0b9fd45..ed51f12 100644
--- a/vp10/encoder/ratectrl.h
+++ b/vp10/encoder/ratectrl.h
@@ -90,6 +90,14 @@
int source_alt_ref_active;
int is_src_frame_alt_ref;
+#if CONFIG_BIDIR_PRED
+ // NOTE: Different types of frames may have different bits allocated
+ // accordingly, aiming to achieve the overall optimal RD performance.
+ int is_bwd_ref_frame;
+ int is_last_nonref_frame;
+ int is_nonref_frame;
+#endif // CONFIG_BIDIR_PRED
+
int avg_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation used for any frame
int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index f935e35..37ee4fc 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -223,7 +223,11 @@
8, 8, 4, 4, 2, 2, 1, 0
};
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
- 128, 144, 128, 128, 144
+ 128, 144, 128, 128, 144,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To adjust further following factor values.
+ 128, 128, 128
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
};
int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
@@ -729,10 +733,10 @@
int inter_filter_cost = 0;
int dir;
- for (dir = 0; dir < 4; ++dir) {
- const int frame_idx = (dir >> 1);
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME &&
- has_subpel_mv_component(xd, dir)) {
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
inter_filter_cost +=
cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
@@ -768,6 +772,10 @@
rd->thresh_mult[THR_NEARESTL2] = 300;
rd->thresh_mult[THR_NEARESTL3] = 300;
rd->thresh_mult[THR_NEARESTL4] = 300;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_NEARESTB] = 300;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARESTG] = 300;
rd->thresh_mult[THR_NEARESTA] = 300;
@@ -777,6 +785,10 @@
rd->thresh_mult[THR_NEARESTL2] = 0;
rd->thresh_mult[THR_NEARESTL3] = 0;
rd->thresh_mult[THR_NEARESTL4] = 0;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_NEARESTB] = 0;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARESTG] = 0;
rd->thresh_mult[THR_NEARESTA] = 0;
@@ -789,6 +801,10 @@
rd->thresh_mult[THR_NEWL2] += 1000;
rd->thresh_mult[THR_NEWL3] += 1000;
rd->thresh_mult[THR_NEWL4] += 1000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_NEWB] += 1000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEWA] += 1000;
rd->thresh_mult[THR_NEWG] += 1000;
@@ -798,6 +814,10 @@
rd->thresh_mult[THR_NEARL2] += 1000;
rd->thresh_mult[THR_NEARL3] += 1000;
rd->thresh_mult[THR_NEARL4] += 1000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_NEARB] += 1000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEARA] += 1000;
rd->thresh_mult[THR_NEARG] += 1000;
@@ -808,6 +828,10 @@
rd->thresh_mult[THR_NEWFROMNEARL2] += 1000;
rd->thresh_mult[THR_NEWFROMNEARL3] += 1000;
rd->thresh_mult[THR_NEWFROMNEARL4] += 1000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_NEWFROMNEARB] += 1000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_NEWFROMNEARG] += 1000;
rd->thresh_mult[THR_NEWFROMNEARA] += 1000;
@@ -818,6 +842,10 @@
rd->thresh_mult[THR_ZEROL2] += 2000;
rd->thresh_mult[THR_ZEROL3] += 2000;
rd->thresh_mult[THR_ZEROL4] += 2000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_ZEROB] += 2000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_ZEROG] += 2000;
rd->thresh_mult[THR_ZEROA] += 2000;
@@ -879,20 +907,53 @@
rd->thresh_mult[THR_COMP_NEW_NEARL4A] += 1700;
rd->thresh_mult[THR_COMP_NEW_NEWL4A] += 2000;
rd->thresh_mult[THR_COMP_ZERO_ZEROL4A] += 2500;
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARLB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARGB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTLB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTGB] += 1200;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500;
+
+ rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
+ rd->thresh_mult[THR_COMP_NEAR_NEWGB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700;
+
+ rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
+ rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
+
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLB] += 2500;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
-#else
+
+#else // CONFIG_EXT_INTER
+
rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_NEARESTL2A] += 1000;
rd->thresh_mult[THR_COMP_NEARESTL3A] += 1000;
rd->thresh_mult[THR_COMP_NEARESTL4A] += 1000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_COMP_NEARESTLB] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTGB] += 1000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
rd->thresh_mult[THR_COMP_NEARLA] += 1500;
rd->thresh_mult[THR_COMP_NEWLA] += 2000;
- rd->thresh_mult[THR_COMP_NEARGA] += 1500;
- rd->thresh_mult[THR_COMP_NEWGA] += 2000;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_NEARL2A] += 1500;
rd->thresh_mult[THR_COMP_NEWL2A] += 2000;
@@ -900,15 +961,30 @@
rd->thresh_mult[THR_COMP_NEWL3A] += 2000;
rd->thresh_mult[THR_COMP_NEARL4A] += 1500;
rd->thresh_mult[THR_COMP_NEWL4A] += 2000;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_COMP_NEARLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEARGB] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLB] += 2000;
+ rd->thresh_mult[THR_COMP_NEWGB] += 2000;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGA] += 2000;
rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
#if CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_ZEROL2A] += 2500;
rd->thresh_mult[THR_COMP_ZEROL3A] += 2500;
rd->thresh_mult[THR_COMP_ZEROL4A] += 2500;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ rd->thresh_mult[THR_COMP_ZEROLB] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROGB] += 2500;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
#endif // CONFIG_EXT_INTER
rd->thresh_mult[THR_H_PRED] += 2000;
@@ -964,9 +1040,14 @@
#if CONFIG_EXT_REFS
{2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 4500, 2500},
{2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 4000, 2000}
-#else
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 2500},
+ {2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 2000}
+#else // CONFIG_BIDIR_PRED
{2500, 2500, 2500, 4500, 4500, 2500},
{2000, 2000, 2000, 4000, 4000, 2000}
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
};
RD_OPT *const rd = &cpi->rd;
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 2e67663..624f2d2 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -44,23 +44,43 @@
#define INVALID_MV 0x80008000
#if CONFIG_EXT_REFS
+
#if CONFIG_EXT_INTER
#define MAX_MODES 114
-#else
+#else // CONFIG_EXT_INTER
#define MAX_MODES 54
#endif // CONFIG_EXT_INTER
-#else
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+
+#if CONFIG_EXT_INTER
+#define MAX_MODES 80
+#else // CONFIG_EXT_INTER
+#define MAX_MODES 42
+#endif // CONFIG_EXT_INTER
+
+#else // CONFIG_BIDIR_PRED
+
#if CONFIG_EXT_INTER
#define MAX_MODES 57
-#else
+#else // CONFIG_EXT_INTER
#define MAX_MODES 30
#endif // CONFIG_EXT_INTER
+
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
#define MAX_REFS 12
-#else
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+#define MAX_REFS 9
+#else // CONFIG_BIDIR_PRED
#define MAX_REFS 6
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
#define RD_THRESH_MAX_FACT 64
@@ -74,6 +94,10 @@
THR_NEARESTL2,
THR_NEARESTL3,
THR_NEARESTL4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_NEARESTB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_NEARESTA,
THR_NEARESTG,
@@ -85,6 +109,10 @@
THR_NEWL2,
THR_NEWL3,
THR_NEWL4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_NEWB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_NEWA,
THR_NEWG,
@@ -94,6 +122,10 @@
THR_NEARL2,
THR_NEARL3,
THR_NEARL4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_NEARB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_NEARA,
THR_NEARG,
@@ -104,6 +136,10 @@
THR_NEWFROMNEARL2,
THR_NEWFROMNEARL3,
THR_NEWFROMNEARL4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_NEWFROMNEARB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_NEWFROMNEARA,
THR_NEWFROMNEARG,
@@ -114,6 +150,10 @@
THR_ZEROL2,
THR_ZEROL3,
THR_ZEROL4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_ZEROB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_ZEROG,
THR_ZEROA,
@@ -124,6 +164,11 @@
THR_COMP_NEAREST_NEARESTL2A,
THR_COMP_NEAREST_NEARESTL3A,
THR_COMP_NEAREST_NEARESTL4A,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_COMP_NEAREST_NEARESTLB,
+ THR_COMP_NEAREST_NEARESTGB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_COMP_NEAREST_NEARESTGA,
#else // CONFIG_EXT_INTER
@@ -132,6 +177,11 @@
THR_COMP_NEARESTL2A,
THR_COMP_NEARESTL3A,
THR_COMP_NEARESTL4A,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_COMP_NEARESTLB,
+ THR_COMP_NEARESTGB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_COMP_NEARESTGA,
#endif // CONFIG_EXT_INTER
@@ -188,8 +238,31 @@
THR_COMP_NEAR_NEWL4A,
THR_COMP_NEW_NEWL4A,
THR_COMP_ZERO_ZEROL4A,
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+ THR_COMP_NEAR_NEARESTLB,
+ THR_COMP_NEAR_NEARESTGB,
+ THR_COMP_NEAREST_NEARLB,
+ THR_COMP_NEAREST_NEARGB,
+ THR_COMP_NEW_NEARESTLB,
+ THR_COMP_NEW_NEARESTGB,
+ THR_COMP_NEAREST_NEWLB,
+ THR_COMP_NEAREST_NEWGB,
+ THR_COMP_NEW_NEARLB,
+ THR_COMP_NEW_NEARGB,
+ THR_COMP_NEAR_NEWLB,
+ THR_COMP_NEAR_NEWGB,
+ THR_COMP_NEW_NEWLB,
+ THR_COMP_NEW_NEWGB,
+ THR_COMP_ZERO_ZEROLB,
+ THR_COMP_ZERO_ZEROGB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
-#else
+
+#else // CONFIG_EXT_INTER
+
THR_COMP_NEARLA,
THR_COMP_NEWLA,
#if CONFIG_EXT_REFS
@@ -199,6 +272,13 @@
THR_COMP_NEWL3A,
THR_COMP_NEARL4A,
THR_COMP_NEWL4A,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_COMP_NEARLB,
+ THR_COMP_NEWLB,
+ THR_COMP_NEARGB,
+ THR_COMP_NEWGB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_COMP_NEARGA,
THR_COMP_NEWGA,
@@ -208,6 +288,11 @@
THR_COMP_ZEROL2A,
THR_COMP_ZEROL3A,
THR_COMP_ZEROL4A,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_COMP_ZEROLB,
+ THR_COMP_ZEROGB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_COMP_ZEROGA,
#endif // CONFIG_EXT_INTER
@@ -262,14 +347,24 @@
THR_LAST2,
THR_LAST3,
THR_LAST4,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_BWDR,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_GOLD,
THR_ALTR,
+
THR_COMP_LA,
#if CONFIG_EXT_REFS
THR_COMP_L2A,
THR_COMP_L3A,
THR_COMP_L4A,
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ THR_COMP_LB,
+ THR_COMP_GB,
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
THR_COMP_GA,
THR_INTRA,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index c111b56..bfd34c9 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -65,7 +65,22 @@
(1 << LAST2_FRAME) | (1 << INTRA_FRAME) | \
(1 << LAST3_FRAME) | (1 << LAST4_FRAME))
-#else
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_BIDIR_PRED
+
+#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << BWDREF_FRAME) | (1 << INTRA_FRAME))
+#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
+ (1 << BWDREF_FRAME) | (1 << INTRA_FRAME))
+#define BWD_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
+ (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
+// TODO(zoeliu): To rename the following to ALTREF_MODE_MASK
+#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
+ (1 << BWDREF_FRAME) | (1 << INTRA_FRAME))
+
+
+#else // CONFIG_BIDIR_PRED
#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
(1 << INTRA_FRAME))
@@ -74,9 +89,16 @@
#define ALT_REF_MODE_MASK ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
(1 << INTRA_FRAME))
+#endif // CONFIG_BIDIR_PRED
+
#endif // CONFIG_EXT_REFS
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | \
+ 0x01)
+#else
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
@@ -122,6 +144,10 @@
{NEARESTMV, {LAST2_FRAME, NONE}},
{NEARESTMV, {LAST3_FRAME, NONE}},
{NEARESTMV, {LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEARESTMV, {BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEARESTMV, {ALTREF_FRAME, NONE}},
{NEARESTMV, {GOLDEN_FRAME, NONE}},
@@ -133,6 +159,10 @@
{NEWMV, {LAST2_FRAME, NONE}},
{NEWMV, {LAST3_FRAME, NONE}},
{NEWMV, {LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEWMV, {BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEWMV, {ALTREF_FRAME, NONE}},
{NEWMV, {GOLDEN_FRAME, NONE}},
@@ -142,6 +172,10 @@
{NEARMV, {LAST2_FRAME, NONE}},
{NEARMV, {LAST3_FRAME, NONE}},
{NEARMV, {LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEARMV, {BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEARMV, {ALTREF_FRAME, NONE}},
{NEARMV, {GOLDEN_FRAME, NONE}},
@@ -152,6 +186,10 @@
{NEWFROMNEARMV, {LAST2_FRAME, NONE}},
{NEWFROMNEARMV, {LAST3_FRAME, NONE}},
{NEWFROMNEARMV, {LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEWFROMNEARMV, {BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEWFROMNEARMV, {ALTREF_FRAME, NONE}},
{NEWFROMNEARMV, {GOLDEN_FRAME, NONE}},
@@ -162,16 +200,27 @@
{ZEROMV, {LAST2_FRAME, NONE}},
{ZEROMV, {LAST3_FRAME, NONE}},
{ZEROMV, {LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {ZEROMV, {BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{ZEROMV, {GOLDEN_FRAME, NONE}},
{ZEROMV, {ALTREF_FRAME, NONE}},
+ // TODO(zoeliu): May need to reconsider the order on the modes to check
+
#if CONFIG_EXT_INTER
{NEAREST_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
#if CONFIG_EXT_REFS
{NEAREST_NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
{NEAREST_NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
{NEAREST_NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEAREST_NEARESTMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEAREST_NEARESTMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEAREST_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
#else // CONFIG_EXT_INTER
@@ -180,6 +229,11 @@
{NEARESTMV, {LAST2_FRAME, ALTREF_FRAME}},
{NEARESTMV, {LAST3_FRAME, ALTREF_FRAME}},
{NEARESTMV, {LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEARESTMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEARESTMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
#endif // CONFIG_EXT_INTER
@@ -193,14 +247,14 @@
{NEAREST_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEAR_NEARMV, {LAST_FRAME, ALTREF_FRAME}},
{NEAR_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
- {NEW_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
- {NEW_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
- {NEAREST_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
- {NEAREST_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
- {NEW_NEARMV, {LAST_FRAME, ALTREF_FRAME}},
- {NEW_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
- {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
- {NEAR_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEW_NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAREST_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEW_NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
+ {NEAR_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEW_NEWMV, {LAST_FRAME, ALTREF_FRAME}},
{NEW_NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{ZERO_ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
@@ -235,8 +289,29 @@
{NEAR_NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
{NEW_NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
{ZERO_ZEROMV, {LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEAR_NEARESTMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEAR_NEARESTMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEAREST_NEARMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEAREST_NEARMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEW_NEARESTMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEW_NEARESTMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEAREST_NEWMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEAREST_NEWMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEW_NEARMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEW_NEARMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEAR_NEWMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEAR_NEWMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEW_NEWMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEW_NEWMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {ZERO_ZEROMV, {LAST_FRAME, BWDREF_FRAME}},
+ {ZERO_ZEROMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
-#else
+
+#else // CONFIG_EXT_INTER
+
{NEARMV, {LAST_FRAME, ALTREF_FRAME}},
{NEWMV, {LAST_FRAME, ALTREF_FRAME}},
#if CONFIG_EXT_REFS
@@ -246,6 +321,13 @@
{NEWMV, {LAST3_FRAME, ALTREF_FRAME}},
{NEARMV, {LAST4_FRAME, ALTREF_FRAME}},
{NEWMV, {LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {NEARMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEWMV, {LAST_FRAME, BWDREF_FRAME}},
+ {NEARMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+ {NEWMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
{NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
@@ -255,6 +337,11 @@
{ZEROMV, {LAST3_FRAME, ALTREF_FRAME}},
{ZEROMV, {LAST2_FRAME, ALTREF_FRAME}},
{ZEROMV, {LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {ZEROMV, {LAST_FRAME, BWDREF_FRAME}},
+ {ZEROMV, {GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
#endif // CONFIG_EXT_INTER
@@ -309,19 +396,41 @@
{{LAST2_FRAME, NONE}},
{{LAST3_FRAME, NONE}},
{{LAST4_FRAME, NONE}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {{BWDREF_FRAME, NONE}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{{GOLDEN_FRAME, NONE}},
{{ALTREF_FRAME, NONE}},
+
{{LAST_FRAME, ALTREF_FRAME}},
#if CONFIG_EXT_REFS
{{LAST2_FRAME, ALTREF_FRAME}},
{{LAST3_FRAME, ALTREF_FRAME}},
{{LAST4_FRAME, ALTREF_FRAME}},
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ {{LAST_FRAME, BWDREF_FRAME}},
+ {{GOLDEN_FRAME, BWDREF_FRAME}},
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
{{GOLDEN_FRAME, ALTREF_FRAME}},
{{INTRA_FRAME, NONE}},
};
+#if CONFIG_DUAL_FILTER
+// TODO(jingning): The magic number 9 here really means the combination
+// of prediction filter types for vertical and horizontal directions.
+// It will be replaced after we integrate the dual filter experiment with
+// the ext-interp experiment.
+static int filter_sets[9][2] = {
+ {0, 0}, {0, 1}, {0, 2},
+ {1, 0}, {1, 1}, {1, 2},
+ {2, 0}, {2, 1}, {2, 2},
+};
+#endif
+
static INLINE int write_uniform_cost(int n, int v) {
int l = get_unsigned_bits(n), m = (1 << l) - n;
if (l == 0)
@@ -540,8 +649,6 @@
get_energy_distribution_fine(cpi, bsize, src, src_stride,
dst, dst_stride, hdist, vdist);
-
-
svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] +
vdist[1] * ADST_FLIP_SVM[1] +
vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
@@ -717,7 +824,7 @@
static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
- int num_planes,
+ int plane_from, int plane_to,
int *out_rate_sum, int64_t *out_dist_sum,
int *skip_txfm_sb, int64_t *skip_sse_sb) {
// Note our transform coeffs are 8 times an orthogonal transform.
@@ -744,7 +851,7 @@
x->pred_sse[ref] = 0;
- for (i = 0; i < num_planes; ++i) {
+ for (i = plane_from; i <= plane_to; ++i) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -1997,8 +2104,8 @@
uint16_t best_dst16[8 * 8];
#endif
- memcpy(ta, a, sizeof(ta));
- memcpy(tl, l, sizeof(tl));
+ memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
+ memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
xd->mi[0]->mbmi.tx_size = TX_4X4;
xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
@@ -2020,8 +2127,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -2105,8 +2212,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
@@ -2146,8 +2253,8 @@
continue;
}
- memcpy(tempa, ta, sizeof(ta));
- memcpy(templ, tl, sizeof(tl));
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
@@ -2230,8 +2337,8 @@
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
- memcpy(a, tempa, sizeof(tempa));
- memcpy(l, templ, sizeof(templ));
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
@@ -2267,12 +2374,8 @@
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost[0];
- memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
- memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
#if CONFIG_EXT_INTRA
mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
mic->mbmi.intra_filter = INTRA_FILTER_LINEAR;
@@ -2298,7 +2401,9 @@
}
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
- bmode_costs, t_above + idx, t_left + idy,
+ bmode_costs,
+ xd->plane[0].above_context + idx,
+ xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd);
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
@@ -5691,8 +5796,10 @@
if (cm->reference_mode != COMPOUND_REFERENCE) {
vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
-#if CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS || CONFIG_BIDIR_PRED
vpx_prob ref_single_p3 = vp10_get_pred_prob_single_ref_p3(cm, xd);
+#endif // CONFIG_EXT_REFS || CONFIG_BIDIR_PRED
+#if CONFIG_EXT_REFS
vpx_prob ref_single_p4 = vp10_get_pred_prob_single_ref_p4(cm, xd);
vpx_prob ref_single_p5 = vp10_get_pred_prob_single_ref_p5(cm, xd);
#endif // CONFIG_EXT_REFS
@@ -5703,6 +5810,10 @@
ref_costs_single[LAST2_FRAME] =
ref_costs_single[LAST3_FRAME] =
ref_costs_single[LAST4_FRAME] =
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ ref_costs_single[BWDREF_FRAME] =
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
ref_costs_single[GOLDEN_FRAME] =
ref_costs_single[ALTREF_FRAME] = base_cost;
@@ -5726,12 +5837,18 @@
ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p4, 1);
ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p5, 0);
ref_costs_single[LAST4_FRAME] += vp10_cost_bit(ref_single_p5, 1);
-#else
+#else // CONFIG_EXT_REFS
ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
- ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+#if CONFIG_BIDIR_PRED
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p3, 1);
+ ref_costs_single[BWDREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+ ref_costs_single[BWDREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+ ref_costs_single[BWDREF_FRAME] += vp10_cost_bit(ref_single_p3, 0);
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} else {
ref_costs_single[LAST_FRAME] = 512;
@@ -5739,6 +5856,10 @@
ref_costs_single[LAST2_FRAME] = 512;
ref_costs_single[LAST3_FRAME] = 512;
ref_costs_single[LAST4_FRAME] = 512;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ ref_costs_single[BWDREF_FRAME] = 512;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
ref_costs_single[GOLDEN_FRAME] = 512;
ref_costs_single[ALTREF_FRAME] = 512;
@@ -5750,7 +5871,12 @@
vpx_prob ref_comp_p1 = vp10_get_pred_prob_comp_ref_p1(cm, xd);
vpx_prob ref_comp_p2 = vp10_get_pred_prob_comp_ref_p2(cm, xd);
vpx_prob ref_comp_p3 = vp10_get_pred_prob_comp_ref_p3(cm, xd);
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ vpx_prob bwdref_comp_p = vp10_get_pred_prob_comp_bwdref_p(cm, xd);
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
+
unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
ref_costs_comp[LAST_FRAME] =
@@ -5761,6 +5887,12 @@
#endif // CONFIG_EXT_REFS
ref_costs_comp[GOLDEN_FRAME] = base_cost;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
+ // more bit.
+ ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
#if CONFIG_EXT_REFS
ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p, 0);
@@ -5776,9 +5908,13 @@
ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p3, 1);
ref_costs_comp[LAST4_FRAME] += vp10_cost_bit(ref_comp_p3, 0);
-#else
+#else // CONFIG_EXT_REFS
ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+#if CONFIG_BIDIR_PRED
+ ref_costs_comp[BWDREF_FRAME] += vp10_cost_bit(bwdref_comp_p, 0);
+ ref_costs_comp[ALTREF_FRAME] += vp10_cost_bit(bwdref_comp_p, 1);
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
} else {
ref_costs_comp[LAST_FRAME] = 512;
@@ -5786,6 +5922,11 @@
ref_costs_comp[LAST2_FRAME] = 512;
ref_costs_comp[LAST3_FRAME] = 512;
ref_costs_comp[LAST4_FRAME] = 512;
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ ref_costs_comp[BWDREF_FRAME] = 512;
+ ref_costs_comp[ALTREF_FRAME] = 512;
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
ref_costs_comp[GOLDEN_FRAME] = 512;
}
@@ -6384,6 +6525,56 @@
xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
}
+#if CONFIG_EXT_INTER
+static int estimate_wedge_sign(const VP10_COMP *cpi,
+ const MACROBLOCK *x,
+ const BLOCK_SIZE bsize,
+ uint8_t *pred0, int stride0,
+ uint8_t *pred1, int stride1) {
+ const struct macroblock_plane *const p = &x->plane[0];
+ const uint8_t *src = p->src.buf;
+ int src_stride = p->src.stride;
+ const int f_index = bsize - BLOCK_8X8;
+ const int bw = 4 << (b_width_log2_lookup[bsize]);
+ const int bh = 4 << (b_height_log2_lookup[bsize]);
+ uint32_t esq[2][4], var;
+ int64_t tl, br;
+
+ var = cpi->fn_ptr[f_index].vf(
+ src, src_stride,
+ pred0, stride0, &esq[0][0]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bw / 2, src_stride,
+ pred0 + bw / 2, stride0, &esq[0][1]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bh / 2 * src_stride, src_stride,
+ pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred0 + bh / 2 * stride0 + bw / 2, stride0, &esq[0][3]);
+ var = cpi->fn_ptr[f_index].vf(
+ src, src_stride,
+ pred1, stride1, &esq[1][0]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bw / 2, src_stride,
+ pred1 + bw / 2, stride1, &esq[1][1]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bh / 2 * src_stride, src_stride,
+ pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
+ var = cpi->fn_ptr[f_index].vf(
+ src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred1 + bh / 2 * stride1 + bw / 2, stride0, &esq[1][3]);
+ (void) var;
+
+ tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
+ (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
+ br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
+ (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
+ return (tl + br > 0);
+}
+#endif // CONFIG_EXT_INTER
+
+#if !CONFIG_DUAL_FILTER
static INTERP_FILTER predict_interp_filter(const VP10_COMP *cpi,
const MACROBLOCK *x,
const BLOCK_SIZE bsize,
@@ -6503,6 +6694,7 @@
}
return best_filter;
}
+#endif
static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
@@ -6590,7 +6782,16 @@
uint8_t *orig_dst[MAX_MB_PLANE];
int orig_dst_stride[MAX_MB_PLANE];
int rs = 0;
+#if CONFIG_DUAL_FILTER
+ // Index use case:
+ // {0, 1} -> (vertical, horizontal) filter types for the first ref frame
+ // {2, 3} -> (vertical, horizontal) filter types for the second ref frame
+ INTERP_FILTER best_filter[4] = {SWITCHABLE, SWITCHABLE,
+ SWITCHABLE, SWITCHABLE,
+ };
+#else
INTERP_FILTER best_filter = SWITCHABLE;
+#endif
uint8_t skip_txfm[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
int64_t bsse[MAX_MB_PLANE][MAX_TX_BLOCKS_IN_MAX_SB] = {{0}};
@@ -6887,22 +7088,31 @@
if (is_comp_pred)
intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+#if !CONFIG_DUAL_FILTER
best_filter = predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
single_filter);
- if (cm->interp_filter != BILINEAR && best_filter == SWITCHABLE) {
+#endif
+
+ if (cm->interp_filter != BILINEAR) {
int newbest;
int tmp_rate_sum = 0;
int64_t tmp_dist_sum = 0;
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 9; ++i) {
+#else
for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+#endif
int j;
int64_t rs_rd;
int tmp_skip_sb = 0;
int64_t tmp_skip_sse = INT64_MAX;
#if CONFIG_DUAL_FILTER
- for (j = 0; j < 4; ++j)
- mbmi->interp_filter[j] = i;
+ mbmi->interp_filter[0] = filter_sets[i][0];
+ mbmi->interp_filter[1] = filter_sets[i][1];
+ mbmi->interp_filter[2] = filter_sets[i][0];
+ mbmi->interp_filter[3] = filter_sets[i][1];
#else
mbmi->interp_filter = i;
#endif
@@ -6946,8 +7156,8 @@
}
}
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &rate_sum, &dist_sum,
- &tmp_skip_sb, &tmp_skip_sse);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+ &rate_sum, &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
if (cm->interp_filter == SWITCHABLE)
@@ -6970,7 +7180,10 @@
if (newbest) {
best_rd = rd;
#if CONFIG_DUAL_FILTER
- best_filter = mbmi->interp_filter[0];
+ best_filter[0] = mbmi->interp_filter[0];
+ best_filter[1] = mbmi->interp_filter[1];
+ best_filter[2] = mbmi->interp_filter[2];
+ best_filter[3] = mbmi->interp_filter[3];
#else
best_filter = mbmi->interp_filter;
#endif
@@ -7002,11 +7215,15 @@
// Set the appropriate filter
#if CONFIG_DUAL_FILTER
- for (i = 0; i < 4; ++i) {
- const int frame_idx = (i >> 1);
- if (mbmi->ref_frame[frame_idx] > INTRA_FRAME)
- mbmi->interp_filter[i] = cm->interp_filter != SWITCHABLE ?
- cm->interp_filter : best_filter;
+ mbmi->interp_filter[0] = cm->interp_filter != SWITCHABLE ?
+ cm->interp_filter : best_filter[0];
+ mbmi->interp_filter[1] = cm->interp_filter != SWITCHABLE ?
+ cm->interp_filter : best_filter[1];
+ if (mbmi->ref_frame[1] > INTRA_FRAME) {
+ mbmi->interp_filter[2] = cm->interp_filter != SWITCHABLE ?
+ cm->interp_filter : best_filter[2];
+ mbmi->interp_filter[3] = cm->interp_filter != SWITCHABLE ?
+ cm->interp_filter : best_filter[3];
}
#else
mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
@@ -7024,8 +7241,9 @@
#endif // CONFIG_OBMC
if (is_comp_pred && is_interinter_wedge_used(bsize)) {
- int wedge_index, best_wedge_index = WEDGE_NONE, rs;
- int rate_sum;
+ int wedge_index, best_wedge_index = WEDGE_NONE;
+ int wedge_sign, best_wedge_sign = 0;
+ int rate_sum, rs;
int64_t dist_sum;
int64_t best_rd_nowedge = INT64_MAX;
int64_t best_rd_wedge = INT64_MAX;
@@ -7034,6 +7252,7 @@
int64_t tmp_skip_sse_sb;
rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+ mbmi->use_wedge_interinter = 0;
vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
vp10_subtract_plane(x, bsize, 0);
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
@@ -7042,41 +7261,43 @@
if (rd != INT64_MAX)
rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
best_rd_nowedge = rd;
- mbmi->use_wedge_interinter = 0;
// Disbale wedge search if source variance is small
if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
- best_rd_nowedge < 3 * ref_best_rd) {
+ best_rd_nowedge / 3 < ref_best_rd) {
+ uint8_t pred0[2 * MAX_SB_SQUARE * 3];
+ uint8_t pred1[2 * MAX_SB_SQUARE * 3];
+ uint8_t *preds0[3] = {pred0,
+ pred0 + 2 * MAX_SB_SQUARE,
+ pred0 + 4 * MAX_SB_SQUARE};
+ uint8_t *preds1[3] = {pred1,
+ pred1 + 2 * MAX_SB_SQUARE,
+ pred1 + 4 * MAX_SB_SQUARE};
+ int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
+ int est_wedge_sign;
mbmi->use_wedge_interinter = 1;
- rs = vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+ rs = vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
- wedge_types = (1 << get_wedge_bits_lookup[bsize]);
- if (have_newmv_in_inter_mode(this_mode)) {
- int_mv tmp_mv[2];
- int rate_mvs[2], tmp_rate_mv = 0;
- uint8_t pred0[2 * MAX_SB_SQUARE * 3];
- uint8_t pred1[2 * MAX_SB_SQUARE * 3];
- uint8_t *preds0[3] = {pred0,
- pred0 + 2 * MAX_SB_SQUARE,
- pred0 + 4 * MAX_SB_SQUARE};
- uint8_t *preds1[3] = {pred1,
- pred1 + 2 * MAX_SB_SQUARE,
- pred1 + 4 * MAX_SB_SQUARE};
- int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
- vp10_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
- vp10_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
+ wedge_types = (1 << get_wedge_bits_lookup(bsize));
- for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
- mbmi->interinter_wedge_index = wedge_index >> 1;
- mbmi->interinter_wedge_sign = wedge_index & 1;
- vp10_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
- mi_row, mi_col,
+ vp10_build_inter_predictors_for_planes_single_buf(
+ xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
+ vp10_build_inter_predictors_for_planes_single_buf(
+ xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
+
+ // Choose the best wedge
+ if (cpi->sf.fast_wedge_sign_estimate) {
+ est_wedge_sign = estimate_wedge_sign(
+ cpi, x, bsize, pred0, MAX_SB_SIZE, pred1, MAX_SB_SIZE);
+ best_wedge_sign = mbmi->interinter_wedge_sign = est_wedge_sign;
+ for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ mbmi->interinter_wedge_index = wedge_index;
+ vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+ 0, 0, mi_row, mi_col,
preds0, strides,
preds1, strides);
- model_rd_for_sb(cpi, bsize, x, xd, 1,
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
&rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
@@ -7085,8 +7306,34 @@
best_rd_wedge = rd;
}
}
- mbmi->interinter_wedge_index = best_wedge_index >> 1;
- mbmi->interinter_wedge_sign = best_wedge_index & 1;
+ } else {
+ for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ for (wedge_sign = 0; wedge_sign < 2; ++wedge_sign) {
+ mbmi->interinter_wedge_index = wedge_index;
+ mbmi->interinter_wedge_sign = wedge_sign;
+ vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
+ 0, 0, mi_row, mi_col,
+ preds0, strides,
+ preds1, strides);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
+ &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rs + rate_mv + rate_sum, dist_sum);
+ if (rd < best_rd_wedge) {
+ best_wedge_index = wedge_index;
+ best_wedge_sign = wedge_sign;
+ best_rd_wedge = rd;
+ }
+ }
+ }
+ }
+ mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_sign = best_wedge_sign;
+
+ if (have_newmv_in_inter_mode(this_mode)) {
+ int_mv tmp_mv[2];
+ int rate_mvs[2], tmp_rate_mv = 0;
if (this_mode == NEW_NEWMV) {
int mv_idxs[2] = {0, 0};
do_masked_motion_search_indexed(cpi, x,
@@ -7117,7 +7364,7 @@
mbmi->mv[1].as_int = tmp_mv[1].as_int;
}
vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
if (rd < best_rd_wedge) {
@@ -7131,7 +7378,6 @@
preds0, strides,
preds1, strides);
}
-
vp10_subtract_plane(x, bsize, 0);
rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb,
@@ -7143,8 +7389,8 @@
if (best_rd_wedge < best_rd_nowedge) {
mbmi->use_wedge_interinter = 1;
- mbmi->interinter_wedge_index = best_wedge_index >> 1;
- mbmi->interinter_wedge_sign = best_wedge_index & 1;
+ mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_sign = best_wedge_sign;
xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
*rate2 += tmp_rate_mv - rate_mv;
@@ -7157,37 +7403,6 @@
xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
}
} else {
- uint8_t pred0[2 * MAX_SB_SQUARE * 3];
- uint8_t pred1[2 * MAX_SB_SQUARE * 3];
- uint8_t *preds0[3] = {pred0,
- pred0 + 2 * MAX_SB_SQUARE,
- pred0 + 4 * MAX_SB_SQUARE};
- uint8_t *preds1[3] = {pred1,
- pred1 + 2 * MAX_SB_SQUARE,
- pred1 + 4 * MAX_SB_SQUARE};
- int strides[3] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
- vp10_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
- vp10_build_inter_predictors_for_planes_single_buf(
- xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
- for (wedge_index = 0; wedge_index < 2 * wedge_types; ++wedge_index) {
- mbmi->interinter_wedge_index = wedge_index >> 1;
- mbmi->interinter_wedge_sign = wedge_index & 1;
- vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
- 0, 0, mi_row, mi_col,
- preds0, strides,
- preds1, strides);
- model_rd_for_sb(cpi, bsize, x, xd, 1,
- &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
- rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
- if (rd < best_rd_wedge) {
- best_wedge_index = wedge_index;
- best_rd_wedge = rd;
- }
- }
- mbmi->interinter_wedge_sign = best_wedge_index & 1;
- mbmi->interinter_wedge_index = best_wedge_index >> 1;
vp10_build_wedge_inter_predictor_from_buf(xd, bsize,
0, 0, mi_row, mi_col,
preds0, strides,
@@ -7197,12 +7412,12 @@
&tmp_skip_txfm_sb, &tmp_skip_sse_sb,
INT64_MAX);
if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
- best_rd_wedge = rd;
+ rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+ best_rd_wedge = rd;
if (best_rd_wedge < best_rd_nowedge) {
mbmi->use_wedge_interinter = 1;
- mbmi->interinter_wedge_index = best_wedge_index >> 1;
- mbmi->interinter_wedge_sign = best_wedge_index & 1;
+ mbmi->interinter_wedge_index = best_wedge_index;
+ mbmi->interinter_wedge_sign = best_wedge_sign;
} else {
mbmi->use_wedge_interinter = 0;
}
@@ -7217,7 +7432,7 @@
if (mbmi->use_wedge_interinter)
*compmode_wedge_cost =
- vp10_cost_literal(1 + get_wedge_bits_lookup[bsize]) +
+ vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
else
*compmode_wedge_cost =
@@ -7241,6 +7456,7 @@
DECLARE_ALIGNED(16, uint8_t,
intrapred_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
uint8_t *intrapred;
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
intrapred = CONVERT_TO_BYTEPTR(intrapred_);
@@ -7265,34 +7481,34 @@
xd, bsize, 0, intrapred, MAX_SB_SIZE);
vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE,
intrapred, MAX_SB_SIZE);
- vp10_subtract_plane(x, bsize, 0);
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
- INT64_MAX);
- if (rd != INT64_MAX)
- rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
if (rd < best_interintra_rd) {
best_interintra_rd = rd;
best_interintra_mode = mbmi->interintra_mode;
}
}
mbmi->interintra_mode = best_interintra_mode;
+ rmode = interintra_mode_cost[mbmi->interintra_mode];
+ vp10_build_intra_predictors_for_interintra(
+ xd, bsize, 0, intrapred, MAX_SB_SIZE);
+ vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE,
+ intrapred, MAX_SB_SIZE);
+ vp10_subtract_plane(x, bsize, 0);
+ rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
+ INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
+ best_interintra_rd = rd;
+
if (ref_best_rd < INT64_MAX &&
best_interintra_rd > 2 * ref_best_rd) {
return INT64_MAX;
}
- vp10_build_intra_predictors_for_interintra(
- xd, bsize, 0, intrapred, MAX_SB_SIZE);
-
- rmode = interintra_mode_cost[mbmi->interintra_mode];
if (is_interintra_wedge_used(bsize)) {
rwedge = vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
- vp10_combine_interintra(xd, bsize, 0, tmp_buf, MAX_SB_SIZE,
- intrapred, MAX_SB_SIZE);
- vp10_subtract_plane(x, bsize, 0);
- rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
- &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
- INT64_MAX);
if (rd != INT64_MAX)
rd = RDCOST(x->rdmult, x->rddiv,
rmode + rate_mv + rwedge + rate_sum, dist_sum);
@@ -7300,10 +7516,9 @@
// Disbale wedge search if source variance is small
if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
-
mbmi->use_wedge_interintra = 1;
- wedge_types = (1 << get_wedge_bits_lookup[bsize]);
- rwedge = vp10_cost_literal(get_wedge_bits_lookup[bsize]) +
+ wedge_types = (1 << get_wedge_bits_lookup(bsize));
+ rwedge = vp10_cost_literal(get_interintra_wedge_bits(bsize)) +
vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mbmi->interintra_wedge_index = wedge_index;
@@ -7311,7 +7526,7 @@
vp10_combine_interintra(xd, bsize, 0,
tmp_buf, MAX_SB_SIZE,
intrapred, MAX_SB_SIZE);
- model_rd_for_sb(cpi, bsize, x, xd, 1,
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0,
&rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv,
@@ -7333,7 +7548,7 @@
0, mv_idx);
mbmi->mv[0].as_int = tmp_mv.as_int;
vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, 1, &rate_sum, &dist_sum,
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
&tmp_skip_txfm_sb, &tmp_skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv,
rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
@@ -7390,7 +7605,7 @@
cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
if (mbmi->use_wedge_interintra) {
*compmode_interintra_cost +=
- vp10_cost_literal(get_wedge_bits_lookup[bsize]);
+ vp10_cost_literal(get_interintra_wedge_bits(bsize));
}
}
} else if (is_interintra_allowed(mbmi)) {
@@ -7428,8 +7643,8 @@
// switchable list (ex. bilinear) is indicated at the frame level, or
// skip condition holds.
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
- &skip_txfm_sb, &skip_sse_sb);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+ &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
memcpy(bsse, x->bsse, sizeof(bsse));
@@ -7537,6 +7752,19 @@
#else
int tmp_rate2 = rate2_nocoeff;
#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER obmc_interp_filter[2][2] = {
+ {mbmi->interp_filter[0], mbmi->interp_filter[1]}, // obmc == 0
+ {mbmi->interp_filter[0], mbmi->interp_filter[1]} // obmc == 1
+ };
+#else
+ INTERP_FILTER obmc_interp_filter[2] = {
+ mbmi->interp_filter, // obmc == 0
+ mbmi->interp_filter // obmc == 1
+ };
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
if (mbmi->obmc) {
#if CONFIG_EXT_INTER
@@ -7565,6 +7793,21 @@
#else
tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ if (!has_subpel_mv_component(xd->mi[0], xd, 0))
+ obmc_interp_filter[1][0] = mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
+ if (!has_subpel_mv_component(xd->mi[0], xd, 1))
+ obmc_interp_filter[1][1] = mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+#else
+ if (!vp10_is_interp_needed(xd))
+ obmc_interp_filter[1] = mbmi->interp_filter = EIGHTTAP_REGULAR;
+#endif // CONFIG_DUAL_FILTER
+ // This is not quite correct with CONFIG_DUAL_FILTER when a filter
+ // is needed in only one direction
+ if (!vp10_is_interp_needed(xd))
+ tmp_rate2 -= rs;
+#endif // CONFIG_EXT_INTERP
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_EXT_INTER
} else {
@@ -7575,8 +7818,8 @@
NULL, NULL,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
- model_rd_for_sb(cpi, bsize, x, xd, MAX_MB_PLANE, &tmp_rate, &tmp_dist,
- &skip_txfm_sb, &skip_sse_sb);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
+ &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -7705,6 +7948,14 @@
#if CONFIG_OBMC
tmp_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
if (mbmi->obmc == 0 || (tmp_rd < best_rd)) {
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] = obmc_interp_filter[mbmi->obmc][0];
+ mbmi->interp_filter[1] = obmc_interp_filter[mbmi->obmc][1];
+#else
+ mbmi->interp_filter = obmc_interp_filter[mbmi->obmc];
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
best_mbmi = *mbmi;
best_rd = tmp_rd;
best_rate2 = *rate2;
@@ -8042,6 +8293,9 @@
VP9_LAST4_FLAG,
#endif // CONFIG_EXT_REFS
VP9_GOLD_FLAG,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ VP9_BWD_FLAG,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
VP9_ALT_FLAG
};
int64_t best_rd = best_rd_so_far;
@@ -8220,8 +8474,17 @@
// Skip checking missing references in both single and compound reference
// modes. Note that a mode will be skipped iff both reference frames
// are masked out.
- ref_frame_skip_mask[0] |= (1 << ref_frame);
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
+ } else {
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
} else {
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
// Skip fixed mv modes for poor references
@@ -8255,9 +8518,15 @@
(1 << LAST2_FRAME) |
(1 << LAST3_FRAME) |
(1 << LAST4_FRAME) |
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
(1 << GOLDEN_FRAME);
ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
+ // TODO(zoeliu): To further explore whether following needs to be done for
+ // BWDREF_FRAME as well.
mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
@@ -8433,8 +8702,17 @@
ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ case BWDREF_FRAME:
+ ref_frame_skip_mask[0] |= BWD_REF_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
case ALTREF_FRAME:
ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
break;
case NONE:
case MAX_REF_FRAMES:
@@ -8459,6 +8737,14 @@
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To further justify whether following is needed
+ if (cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]
+ != LASTNRF_UPDATE && second_ref_frame == BWDREF_FRAME) {
+ continue;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
if (!cpi->allow_comp_inter_inter)
continue;
@@ -9688,6 +9974,9 @@
VP9_LAST4_FLAG,
#endif // CONFIG_EXT_REFS
VP9_GOLD_FLAG,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ VP9_BWD_FLAG,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
VP9_ALT_FLAG
};
int64_t best_rd = best_rd_so_far;
@@ -9698,7 +9987,11 @@
int ref_index, best_ref_index = 0;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER tmp_best_filter[4] = { 0 };
+#else
INTERP_FILTER tmp_best_filter = SWITCHABLE;
+#endif
int rate_uv_intra, rate_uv_tokenonly;
int64_t dist_uv;
int skip_uv;
@@ -9814,6 +10107,10 @@
(1 << LAST2_FRAME) |
(1 << LAST3_FRAME) |
(1 << LAST4_FRAME) |
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
(1 << ALTREF_FRAME);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
@@ -9850,18 +10147,37 @@
(1 << LAST2_FRAME) |
(1 << LAST3_FRAME) |
(1 << LAST4_FRAME) |
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
(1 << ALTREF_FRAME);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ case BWDREF_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
case ALTREF_FRAME:
ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) |
#if CONFIG_EXT_REFS
(1 << LAST2_FRAME) |
(1 << LAST3_FRAME) |
(1 << LAST4_FRAME) |
+#else // CONFIG_EXT_REFS
+#if CONFIG_BIDIR_PRED
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_BIDIR_PRED
#endif // CONFIG_EXT_REFS
(1 << LAST_FRAME);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
break;
case NONE:
case MAX_REF_FRAMES:
@@ -9884,6 +10200,13 @@
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ // TODO(zoeliu): To further justify whether following is needed
+ if (cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index]
+ != LASTNRF_UPDATE && second_ref_frame == BWDREF_FRAME) {
+ continue;
+ }
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
if (!cpi->allow_comp_inter_inter)
continue;
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
@@ -9992,7 +10315,11 @@
&x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
b_mode_info tmp_best_bmodes[16]; // Should this be 4 ?
MB_MODE_INFO tmp_best_mbmode;
+#if CONFIG_DUAL_FILTER
+ BEST_SEG_INFO bsi[9];
+#else
BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
+#endif
int pred_exists = 0;
int uv_skippable;
#if CONFIG_EXT_INTER
@@ -10022,26 +10349,53 @@
mbmi->tx_type = DCT_DCT;
if (cm->interp_filter != BILINEAR) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = EIGHTTAP_REGULAR;
+ tmp_best_filter[1] = EIGHTTAP_REGULAR;
+ tmp_best_filter[2] = EIGHTTAP_REGULAR;
+ tmp_best_filter[3] = EIGHTTAP_REGULAR;
+#else
tmp_best_filter = EIGHTTAP_REGULAR;
+#endif
if (x->source_variance < sf->disable_filter_search_var_thresh) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = EIGHTTAP_REGULAR;
+#else
tmp_best_filter = EIGHTTAP_REGULAR;
+#endif
} else if (sf->adaptive_pred_interp_filter == 1 &&
ctx->pred_interp_filter < SWITCHABLE) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = ctx->pred_interp_filter;
+#else
tmp_best_filter = ctx->pred_interp_filter;
+#endif
} else if (sf->adaptive_pred_interp_filter == 2) {
- tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = ctx->pred_interp_filter < SWITCHABLE ?
ctx->pred_interp_filter : 0;
+#else
+ tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
+ ctx->pred_interp_filter : 0;
+#endif
} else {
+#if CONFIG_DUAL_FILTER
+ for (switchable_filter_index = 0;
+ switchable_filter_index < 9;
+ ++switchable_filter_index) {
+#else
for (switchable_filter_index = 0;
switchable_filter_index < SWITCHABLE_FILTERS;
++switchable_filter_index) {
+#endif
int newbest, rs;
int64_t rs_rd;
MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
#if CONFIG_DUAL_FILTER
- int dir;
- for (dir = 0; dir < 4; ++dir)
- mbmi->interp_filter[dir] = switchable_filter_index;
+ mbmi->interp_filter[0] = filter_sets[switchable_filter_index][0];
+ mbmi->interp_filter[1] = filter_sets[switchable_filter_index][1];
+ mbmi->interp_filter[2] = filter_sets[switchable_filter_index][0];
+ mbmi->interp_filter[3] = filter_sets[switchable_filter_index][1];
#else
mbmi->interp_filter = switchable_filter_index;
#endif
@@ -10077,7 +10431,10 @@
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
#if CONFIG_DUAL_FILTER
- tmp_best_filter = mbmi->interp_filter[0];
+ tmp_best_filter[0] = mbmi->interp_filter[0];
+ tmp_best_filter[1] = mbmi->interp_filter[1];
+ tmp_best_filter[2] = mbmi->interp_filter[2];
+ tmp_best_filter[3] = mbmi->interp_filter[3];
#else
tmp_best_filter = mbmi->interp_filter;
#endif
@@ -10113,9 +10470,14 @@
continue;
#if CONFIG_DUAL_FILTER
- for (i = 0; i < 4; ++i)
- mbmi->interp_filter[i] = (cm->interp_filter == SWITCHABLE ?
- tmp_best_filter : cm->interp_filter);
+ mbmi->interp_filter[0] = (cm->interp_filter == SWITCHABLE ?
+ tmp_best_filter[0] : cm->interp_filter);
+ mbmi->interp_filter[1] = (cm->interp_filter == SWITCHABLE ?
+ tmp_best_filter[1] : cm->interp_filter);
+ mbmi->interp_filter[2] = (cm->interp_filter == SWITCHABLE ?
+ tmp_best_filter[2] : cm->interp_filter);
+ mbmi->interp_filter[3] = (cm->interp_filter == SWITCHABLE ?
+ tmp_best_filter[3] : cm->interp_filter);
#else
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
tmp_best_filter : cm->interp_filter);
diff --git a/vp10/encoder/rdopt.h b/vp10/encoder/rdopt.h
index 2ca39a5..be6227b 100644
--- a/vp10/encoder/rdopt.h
+++ b/vp10/encoder/rdopt.h
@@ -90,24 +90,6 @@
int use_fast_coef_casting);
#endif // CONFIG_SUPERTX
-static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(VP10_COMP *cpi,
- const int ref) {
- // Use up-sampled reference frames.
- int ref_idx = 0;
- if (ref == LAST_FRAME)
-#if CONFIG_EXT_REFS
- ref_idx = cpi->lst_fb_idxes[ref - LAST_FRAME];
-#else
- ref_idx = cpi->lst_fb_idx;
-#endif // CONFIG_EXT_REFS
- else if (ref == GOLDEN_FRAME)
- ref_idx = cpi->gld_fb_idx;
- else if (ref == ALTREF_FRAME)
- ref_idx = cpi->alt_fb_idx;
-
- return &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[ref_idx]].buf;
-}
-
#if CONFIG_OBMC
void calc_target_weighted_pred(VP10_COMMON *cm,
MACROBLOCK *x,
diff --git a/vp10/encoder/speed_features.c b/vp10/encoder/speed_features.c
index 155f28e..b766cae 100644
--- a/vp10/encoder/speed_features.c
+++ b/vp10/encoder/speed_features.c
@@ -165,6 +165,7 @@
sf->use_transform_domain_distortion = 1;
#if CONFIG_EXT_INTER
sf->disable_wedge_search_var_thresh = 100;
+ sf->fast_wedge_sign_estimate = 1;
#endif // CONFIG_EXT_INTER
}
@@ -283,6 +284,7 @@
sf->use_upsampled_references = 0;
#if CONFIG_EXT_INTER
sf->disable_wedge_search_var_thresh = 100;
+ sf->fast_wedge_sign_estimate = 1;
#endif // CONFIG_EXT_INTER
// Use transform domain distortion computation
@@ -517,6 +519,7 @@
#endif // CONFIG_EXT_TILE
#if CONFIG_EXT_INTER
sf->disable_wedge_search_var_thresh = 0;
+ sf->fast_wedge_sign_estimate = 0;
#endif // CONFIG_EXT_INTER
for (i = 0; i < TX_SIZES; i++) {
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index 6cee748..ca6adbe 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -402,6 +402,9 @@
#if CONFIG_EXT_INTER
// A source variance threshold below which wedge search is disabled
unsigned int disable_wedge_search_var_thresh;
+
+ // Whether fast wedge sign estimate is used
+ int fast_wedge_sign_estimate;
#endif // CONFIG_EXT_INTER
// These bit masks allow you to enable or disable intra modes for each
diff --git a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
index ce9089e..dffdf20 100644
--- a/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
+++ b/vp10/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
#include "vp10/common/vp10_txfm.h"
+#include "vp10/common/x86/highbd_txfm_utility_sse4.h"
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
@@ -239,6 +240,43 @@
fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
write_buffer_4x4(in, coeff);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+#endif
default:
assert(0);
}
@@ -369,30 +407,6 @@
in[15] = _mm_srai_epi32(in[15], shift);
}
-#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \
- do { \
- __m128i u0, u1, u2, u3; \
- u0 = _mm_unpacklo_epi32(x0, x1); \
- u1 = _mm_unpackhi_epi32(x0, x1); \
- u2 = _mm_unpacklo_epi32(x2, x3); \
- u3 = _mm_unpackhi_epi32(x2, x3); \
- y0 = _mm_unpacklo_epi64(u0, u2); \
- y1 = _mm_unpackhi_epi64(u0, u2); \
- y2 = _mm_unpacklo_epi64(u1, u3); \
- y3 = _mm_unpackhi_epi64(u1, u3); \
- } while (0)
-
-static INLINE void transpose_8x8(const __m128i *in, __m128i *out) {
- TRANSPOSE_4X4(in[0], in[2], in[4], in[6],
- out[0], out[2], out[4], out[6]);
- TRANSPOSE_4X4(in[1], in[3], in[5], in[7],
- out[8], out[10], out[12], out[14]);
- TRANSPOSE_4X4(in[8], in[10], in[12], in[14],
- out[1], out[3], out[5], out[7]);
- TRANSPOSE_4X4(in[9], in[11], in[13], in[15],
- out[9], out[11], out[13], out[15]);
-}
-
static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
@@ -960,6 +974,58 @@
transpose_8x8(out, in);
write_buffer_8x8(in, coeff);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
}
@@ -1708,47 +1774,6 @@
col_txfm_8x8_rounding(&in[48], shift);
}
-static void transpose_16x16(const __m128i *in, __m128i *out) {
- // Upper left 8x8
- TRANSPOSE_4X4(in[0], in[4], in[8], in[12],
- out[0], out[4], out[8], out[12]);
- TRANSPOSE_4X4(in[1], in[5], in[9], in[13],
- out[16], out[20], out[24], out[28]);
- TRANSPOSE_4X4(in[16], in[20], in[24], in[28],
- out[1], out[5], out[9], out[13]);
- TRANSPOSE_4X4(in[17], in[21], in[25], in[29],
- out[17], out[21], out[25], out[29]);
-
- // Upper right 8x8
- TRANSPOSE_4X4(in[2], in[6], in[10], in[14],
- out[32], out[36], out[40], out[44]);
- TRANSPOSE_4X4(in[3], in[7], in[11], in[15],
- out[48], out[52], out[56], out[60]);
- TRANSPOSE_4X4(in[18], in[22], in[26], in[30],
- out[33], out[37], out[41], out[45]);
- TRANSPOSE_4X4(in[19], in[23], in[27], in[31],
- out[49], out[53], out[57], out[61]);
-
- // Lower left 8x8
- TRANSPOSE_4X4(in[32], in[36], in[40], in[44],
- out[2], out[6], out[10], out[14]);
- TRANSPOSE_4X4(in[33], in[37], in[41], in[45],
- out[18], out[22], out[26], out[30]);
- TRANSPOSE_4X4(in[48], in[52], in[56], in[60],
- out[3], out[7], out[11], out[15]);
- TRANSPOSE_4X4(in[49], in[53], in[57], in[61],
- out[19], out[23], out[27], out[31]);
- // Lower right 8x8
- TRANSPOSE_4X4(in[34], in[38], in[42], in[46],
- out[34], out[38], out[42], out[46]);
- TRANSPOSE_4X4(in[35], in[39], in[43], in[47],
- out[50], out[54], out[58], out[62]);
- TRANSPOSE_4X4(in[50], in[54], in[58], in[62],
- out[35], out[39], out[43], out[47]);
- TRANSPOSE_4X4(in[51], in[55], in[59], in[63],
- out[51], out[55], out[59], out[63]);
-}
-
static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
const int size_8x8 = 16 * 4;
write_buffer_8x8(&in[0], output);
@@ -1806,6 +1831,58 @@
transpose_16x16(out, in);
write_buffer_16x16(in, coeff);
break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
}
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 89b0edb..650b6f3 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -112,6 +112,10 @@
VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_fwd_txfm1d_sse4.c
VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_fwd_txfm2d_sse4.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_txfm_utility_sse4.h
+endif
+
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht4x4_add_neon.c
VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht8x8_add_neon.c
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 0cad961..bed6648 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -25,6 +25,9 @@
struct vp10_extracfg {
int cpu_used; // available cpu percentage in 1/16
unsigned int enable_auto_alt_ref;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ unsigned int enable_auto_bwd_ref;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
unsigned int noise_sensitivity;
unsigned int sharpness;
unsigned int static_thresh;
@@ -55,6 +58,9 @@
static struct vp10_extracfg default_extra_cfg = {
0, // cpu_used
1, // enable_auto_alt_ref
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ 0, // enable_auto_bwd_ref
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
0, // noise_sensitivity
0, // sharpness
0, // static_thresh
@@ -199,6 +205,9 @@
"or kf_max_dist instead.");
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ RANGE_CHECK(extra_cfg, enable_auto_bwd_ref, 0, 2);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, superblock_size,
@@ -411,6 +420,9 @@
oxcf->speed = abs(extra_cfg->cpu_used);
oxcf->encode_breakout = extra_cfg->static_thresh;
oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ oxcf->enable_auto_brf = extra_cfg->enable_auto_bwd_ref;
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
oxcf->sharpness = extra_cfg->sharpness;
@@ -574,6 +586,15 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+static vpx_codec_err_t ctrl_set_enable_auto_bwd_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_auto_bwd_ref = CAST(VP8E_SET_ENABLEAUTOBWDREF, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp10_extracfg extra_cfg = ctx->extra_cfg;
@@ -924,8 +945,12 @@
if (res == VPX_CODEC_OK) {
// There's no codec control for multiple alt-refs so check the encoder
// instance for its status to determine the compressed data size.
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img);
+#else
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
(cpi->multi_arf_allowed ? 8 : 2);
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
if (data_sz < 4096)
data_sz = 4096;
if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
@@ -1141,6 +1166,24 @@
}
}
+static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_image_t *const new_img = va_arg(args, vpx_image_t *);
+
+ if (new_img != NULL) {
+ YV12_BUFFER_CONFIG new_frame;
+
+ if (vp10_get_last_show_frame(ctx->cpi, &new_frame) == 0) {
+ yuvconfig2image(new_img, &new_frame, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
va_list args) {
#if CONFIG_VP9_POSTPROC
@@ -1299,6 +1342,9 @@
{VP8E_SET_SCALEMODE, ctrl_set_scale_mode},
{VP8E_SET_CPUUSED, ctrl_set_cpuused},
{VP8E_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref},
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ {VP8E_SET_ENABLEAUTOBWDREF, ctrl_set_enable_auto_bwd_ref},
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
{VP8E_SET_SHARPNESS, ctrl_set_sharpness},
{VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh},
{VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns},
@@ -1330,6 +1376,7 @@
{VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64},
{VP9_GET_REFERENCE, ctrl_get_reference},
{VP9E_GET_ACTIVEMAP, ctrl_get_active_map},
+ {VP10_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image},
{ -1, NULL},
};
diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c
index cf6ab56..d5c4c1c 100644
--- a/vp10/vp10_dx_iface.c
+++ b/vp10/vp10_dx_iface.c
@@ -58,6 +58,8 @@
int last_show_frame; // Index of last output frame.
int byte_alignment;
int skip_loop_filter;
+ int decode_tile_row;
+ int decode_tile_col;
// Frame parallel related.
int frame_parallel_decode; // frame-based threading.
@@ -501,8 +503,8 @@
frame_worker_data->pbi->decrypt_state = ctx->decrypt_state;
#if CONFIG_EXT_TILE
- frame_worker_data->pbi->dec_tile_row = ctx->cfg.tile_row;
- frame_worker_data->pbi->dec_tile_col = ctx->cfg.tile_col;
+ frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
+ frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
#endif // CONFIG_EXT_TILE
worker->had_error = 0;
@@ -919,6 +921,32 @@
}
}
+static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_image_t *new_img = va_arg(args, vpx_image_t *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (new_img) {
+ YV12_BUFFER_CONFIG new_frame;
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+
+ if (vp10_get_frame_to_show(frame_worker_data->pbi, &new_frame) == 0) {
+ yuvconfig2image(new_img, &new_frame, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
va_list args) {
#if CONFIG_VP9_POSTPROC
@@ -1118,6 +1146,18 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_decode_tile_row(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->decode_tile_row = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_decode_tile_col(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->decode_tile_col = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{VP8_COPY_REFERENCE, ctrl_copy_reference},
@@ -1132,6 +1172,8 @@
{VPXD_SET_DECRYPTOR, ctrl_set_decryptor},
{VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment},
{VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter},
+ {VP10_SET_DECODE_TILE_ROW, ctrl_set_decode_tile_row},
+ {VP10_SET_DECODE_TILE_COL, ctrl_set_decode_tile_col},
// Getters
{VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates},
@@ -1140,6 +1182,7 @@
{VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size},
{VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth},
{VP9D_GET_FRAME_SIZE, ctrl_get_frame_size},
+ {VP10_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image},
{ -1, NULL},
};
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index da90fe6..1aaac15 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -118,6 +118,7 @@
VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
+VP10_CX_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c
endif
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 8a035f9..ba67c38 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -56,6 +56,9 @@
*/
VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */
VP8_COMMON_CTRL_ID_MAX,
+
+ VP10_GET_NEW_FRAME_IMAGE = 192, /**< get a pointer to the new frame */
+
VP8_DECODER_CTRL_ID_START = 256
};
@@ -137,6 +140,8 @@
#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV
VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *)
#define VPX_CTRL_VP9_GET_REFERENCE
+VPX_CTRL_USE_TYPE(VP10_GET_NEW_FRAME_IMAGE, vpx_image_t *)
+#define VPX_CTRL_VP10_GET_NEW_FRAME_IMAGE
/*!\endcond */
/*! @} - end defgroup vp8 */
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 1306481..4d9a2a7 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -184,6 +184,15 @@
*/
VP8E_SET_ENABLEAUTOALTREF,
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+ /*!\brief Codec control function to enable automatic set and use
+ * bwd-pred frames.
+ *
+ * Supported in codecs: VP10
+ */
+ VP8E_SET_ENABLEAUTOBWDREF,
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
/*!\brief control function to set noise sensitivity
*
* 0: off, 1: OnYOnly, 2: OnYUV,
@@ -744,6 +753,12 @@
#define VPX_CTRL_VP8E_SET_CPUUSED
VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int)
#define VPX_CTRL_VP8E_SET_ENABLEAUTOALTREF
+
+#if !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOBWDREF, unsigned int)
+#define VPX_CTRL_VP8E_SET_ENABLEAUTOBWDREF
+#endif // !CONFIG_EXT_REFS && CONFIG_BIDIR_PRED
+
VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int)
#define VPX_CTRL_VP8E_SET_NOISE_SENSITIVITY
VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int)
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 1f02fd5..347521e 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -121,7 +121,16 @@
*/
VP9_SET_SKIP_LOOP_FILTER,
- VP8_DECODER_CTRL_ID_MAX
+ VP8_DECODER_CTRL_ID_MAX,
+
+ /** control function to set the range of tile decoding. A value that is
+ * greater and equal to zero indicates only the specific row/column is
+ * decoded. A value that is -1 indicates the whole row/column is decoded.
+ * A special case is both values are -1 that means the whole frame is
+ * decoded.
+ */
+ VP10_SET_DECODE_TILE_ROW,
+ VP10_SET_DECODE_TILE_COL
};
/** Decrypt n bytes of data from input -> output, using the decrypt_state
@@ -174,7 +183,10 @@
#define VPX_CTRL_VP9D_GET_FRAME_SIZE
VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER
-
+VPX_CTRL_USE_TYPE(VP10_SET_DECODE_TILE_ROW, int)
+#define VPX_CTRL_VP10_SET_DECODE_TILE_ROW
+VPX_CTRL_USE_TYPE(VP10_SET_DECODE_TILE_COL, int)
+#define VPX_CTRL_VP10_SET_DECODE_TILE_COL
/*!\endcond */
/*! @} - end defgroup vp8_decoder */
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index d4ba986..62fd919 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -108,10 +108,6 @@
unsigned int threads; /**< Maximum number of threads to use, default 1 */
unsigned int w; /**< Width */
unsigned int h; /**< Height */
- int tile_row; /**< The index of row tile to be decoded.
- Value -1 means to decode all row tiles. */
- int tile_col; /**< The index of column tile to be decoded.
- Value -1 means to decode all column tiles */
} vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */
diff --git a/vpx_dsp/blend_mask6.c b/vpx_dsp/blend_mask6.c
new file mode 100644
index 0000000..584ee6a
--- /dev/null
+++ b/vpx_dsp/blend_mask6.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#include "./vpx_dsp_rtcd.h"
+
+#define MASK_BITS 6
+
+void vpx_blend_mask6_c(uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int subh, int subw) {
+ int i, j;
+
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 4);
+ assert(w >= 4);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+
+ if (subw == 0 && subh == 0) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 = mask[i * mask_stride + j];
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else if (subw == 1 && subh == 1) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j)] +
+ mask[(2 * i) * mask_stride + (2 * j + 1)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
+ 2);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else if (subw == 1 && subh == 0) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[i * mask_stride + (2 * j)] +
+ mask[i * mask_stride + (2 * j + 1)], 1);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + j] +
+ mask[(2 * i + 1) * mask_stride + j], 1);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vpx_highbd_blend_mask6_c(uint8_t *dst_8, uint32_t dst_stride,
+ uint8_t *src0_8, uint32_t src0_stride,
+ uint8_t *src1_8, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int subh, int subw, int bd) {
+ int i, j;
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8);
+ uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8);
+ uint16_t *src1 = CONVERT_TO_SHORTPTR(src1_8);
+
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 4);
+ assert(w >= 4);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+
+ assert(bd == 8 || bd == 10 || bd == 12);
+
+ if (subw == 0 && subh == 0) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 = mask[i * mask_stride + j];
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else if (subw == 1 && subh == 1) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + (2 * j)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j)] +
+ mask[(2 * i) * mask_stride + (2 * j + 1)] +
+ mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
+ 2);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else if (subw == 1 && subh == 0) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[i * mask_stride + (2 * j)] +
+ mask[i * mask_stride + (2 * j + 1)], 1);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ } else {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ const int m0 =
+ ROUND_POWER_OF_TWO(mask[(2 * i) * mask_stride + j] +
+ mask[(2 * i + 1) * mask_stride + j], 1);
+ const int m1 = ((1 << MASK_BITS) - m0);
+ dst[i * dst_stride + j] =
+ ROUND_POWER_OF_TWO(src0[i * src0_stride + j] * m0 +
+ src1[i * src1_stride + j] * m1, MASK_BITS);
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/fwd_txfm.c b/vpx_dsp/fwd_txfm.c
index a5802e1..4c0d5db 100644
--- a/vpx_dsp/fwd_txfm.c
+++ b/vpx_dsp/fwd_txfm.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/fwd_txfm.h"
void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
diff --git a/vpx_dsp/inv_txfm.c b/vpx_dsp/inv_txfm.c
index 402fd9a..533f762 100644
--- a/vpx_dsp/inv_txfm.c
+++ b/vpx_dsp/inv_txfm.c
@@ -11,6 +11,7 @@
#include <math.h>
#include <string.h>
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/inv_txfm.h"
void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
diff --git a/vpx_dsp/loopfilter.c b/vpx_dsp/loopfilter.c
index 46ef646..645a1ab 100644
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
diff --git a/vpx_dsp/quantize.c b/vpx_dsp/quantize.c
index e4e741a..6426ccc 100644
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/quantize.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 46ef5fc..430cae1 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -65,6 +65,15 @@
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c
+# inter predictions
+
+ifeq ($(CONFIG_VP10),yes)
+ifeq ($(CONFIG_EXT_INTER),yes)
+DSP_SRCS-yes += blend_mask6.c
+DSP_SRCS-$(HAVE_SSE4_1) += x86/blend_mask6_sse4.c
+endif #CONFIG_EXT_INTER
+endif #CONFIG_VP10
+
# interpolation filters
DSP_SRCS-yes += vpx_convolve.c
DSP_SRCS-yes += vpx_convolve.h
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
index 3571eea..7aaa89f 100644
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -32,6 +32,8 @@
#define IMPLIES(a, b) (!(a) || (b)) // Logical 'a implies b' (or 'a -> b')
+#define IS_POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0)
+
// These can be used to give a hint about branch outcomes.
// This can have an effect, even if your target processor has a
// good branch predictor, as these hints can affect basic block
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index ad524a2..7bae037 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1358,10 +1358,10 @@
}
} # CONFIG_VP9_HIGHBITDEPTH
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
#
# Masked Variance / Masked Subpixel Variance
#
-if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_masked_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *mask, int mask_stride, unsigned int *sse";
@@ -1381,6 +1381,14 @@
}
}
}
+
+ add_proto qw/void vpx_blend_mask6/, "uint8_t *dst, uint32_t dst_stride, uint8_t *src0, uint32_t src0_stride, uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
+ specialize "vpx_blend_mask6", qw/sse4_1/;
+
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_blend_mask6/, "uint8_t *dst, uint32_t dst_stride, uint8_t *src0, uint32_t src0_stride, uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
+ specialize "vpx_highbd_blend_mask6", qw/sse4_1/;
+ }
}
#
diff --git a/vpx_dsp/x86/blend_mask6_sse4.c b/vpx_dsp/x86/blend_mask6_sse4.c
new file mode 100644
index 0000000..5de3e23
--- /dev/null
+++ b/vpx_dsp/x86/blend_mask6_sse4.c
@@ -0,0 +1,1146 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <smmintrin.h> // SSE4.1
+
+#include <assert.h>
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#include "./vpx_dsp_rtcd.h"
+
+#define MASK_BITS 6
+
+static INLINE __m128i mm_loadl_32(const void *a) {
+ return _mm_cvtsi32_si128(*(const uint32_t*)a);
+}
+
+static INLINE __m128i mm_loadl_64(const void *a) {
+ return _mm_loadl_epi64((const __m128i*)a);
+}
+
+static INLINE __m128i mm_loadu_128(const void *a) {
+ return _mm_loadu_si128((const __m128i*)a);
+}
+
+static INLINE void mm_storel_32(void *const a, const __m128i v) {
+ *(uint32_t*)a = _mm_cvtsi128_si32(v);
+}
+
+static INLINE void mm_storel_64(void *const a, const __m128i v) {
+ _mm_storel_epi64((__m128i*)a, v);
+}
+
+static INLINE void mm_storeu_128(void *const a, const __m128i v) {
+ _mm_storeu_si128((__m128i*)a, v);
+}
+
+static INLINE __m128i mm_round_epu16(__m128i v_val_w) {
+ return _mm_avg_epu16(v_val_w, _mm_setzero_si128());
+}
+
+static INLINE __m128i mm_roundn_epu16(__m128i v_val_w, int bits) {
+ const __m128i v_s_w =_mm_srli_epi16(v_val_w, bits-1);
+ return _mm_avg_epu16(v_s_w, _mm_setzero_si128());
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Common kernels
+//////////////////////////////////////////////////////////////////////////////
+
+static INLINE __m128i blend_4(uint8_t*src0, uint8_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_b = mm_loadl_32(src0);
+ const __m128i v_s1_b = mm_loadl_32(src1);
+ const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b);
+ const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b);
+
+ const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
+ const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
+
+ const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
+
+ const __m128i v_res_w = mm_roundn_epu16(v_sum_w, MASK_BITS);
+
+ return v_res_w;
+}
+
+static INLINE __m128i blend_8(uint8_t*src0, uint8_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_b = mm_loadl_64(src0);
+ const __m128i v_s1_b = mm_loadl_64(src1);
+ const __m128i v_s0_w = _mm_cvtepu8_epi16(v_s0_b);
+ const __m128i v_s1_w = _mm_cvtepu8_epi16(v_s1_b);
+
+ const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
+ const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
+
+ const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
+
+ const __m128i v_res_w = mm_roundn_epu16(v_sum_w, MASK_BITS);
+
+ return v_res_w;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// No sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static void blend_mask6_w4_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_m0_b = mm_loadl_32(mask);
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_4(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_32(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_w8_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_m0_b = mm_loadl_64(mask);
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_8(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_64(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_w16n_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 16) {
+ const __m128i v_m0l_b = mm_loadl_64(mask + c);
+ const __m128i v_m0h_b = mm_loadl_64(mask + c + 8);
+ const __m128i v_m0l_w = _mm_cvtepu8_epi16(v_m0l_b);
+ const __m128i v_m0h_w = _mm_cvtepu8_epi16(v_m0h_b);
+ const __m128i v_m1l_w = _mm_sub_epi16(v_maxval_w, v_m0l_w);
+ const __m128i v_m1h_w = _mm_sub_epi16(v_maxval_w, v_m0h_w);
+
+ const __m128i v_resl_w = blend_8(src0 + c, src1 + c,
+ v_m0l_w, v_m1l_w);
+ const __m128i v_resh_w = blend_8(src0 + c + 8, src1 + c + 8,
+ v_m0h_w, v_m1h_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_resl_w, v_resh_w);
+
+ mm_storeu_128(dst + c, v_res_b);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Horizontal sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static void blend_mask6_sx_w4_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_r_b = mm_loadl_64(mask);
+ const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
+
+ const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_4(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_32(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sx_w8_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_r_b = mm_loadu_128(mask);
+ const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
+
+ const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_8(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_64(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sx_w16n_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 16) {
+ const __m128i v_rl_b = mm_loadu_128(mask + 2 * c);
+ const __m128i v_rh_b = mm_loadu_128(mask + 2 * c + 16);
+ const __m128i v_al_b = _mm_avg_epu8(v_rl_b, _mm_srli_si128(v_rl_b, 1));
+ const __m128i v_ah_b = _mm_avg_epu8(v_rh_b, _mm_srli_si128(v_rh_b, 1));
+
+ const __m128i v_m0l_w = _mm_and_si128(v_al_b, v_zmask_b);
+ const __m128i v_m0h_w = _mm_and_si128(v_ah_b, v_zmask_b);
+ const __m128i v_m1l_w = _mm_sub_epi16(v_maxval_w, v_m0l_w);
+ const __m128i v_m1h_w = _mm_sub_epi16(v_maxval_w, v_m0h_w);
+
+ const __m128i v_resl_w = blend_8(src0 + c, src1 + c,
+ v_m0l_w, v_m1l_w);
+ const __m128i v_resh_w = blend_8(src0 + c + 8, src1 + c + 8,
+ v_m0h_w, v_m1h_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_resl_w, v_resh_w);
+
+ mm_storeu_128(dst + c, v_res_b);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Vertical sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static void blend_mask6_sy_w4_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_ra_b = mm_loadl_32(mask);
+ const __m128i v_rb_b = mm_loadl_32(mask + mask_stride);
+ const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
+
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_4(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_32(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sy_w8_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_ra_b = mm_loadl_64(mask);
+ const __m128i v_rb_b = mm_loadl_64(mask + mask_stride);
+ const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
+
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_8(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_64(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sy_w16n_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zero = _mm_setzero_si128();
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 16) {
+ const __m128i v_ra_b = mm_loadu_128(mask + c);
+ const __m128i v_rb_b = mm_loadu_128(mask + c + mask_stride);
+ const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
+
+ const __m128i v_m0l_w = _mm_cvtepu8_epi16(v_a_b);
+ const __m128i v_m0h_w = _mm_unpackhi_epi8(v_a_b, v_zero);
+ const __m128i v_m1l_w = _mm_sub_epi16(v_maxval_w, v_m0l_w);
+ const __m128i v_m1h_w = _mm_sub_epi16(v_maxval_w, v_m0h_w);
+
+ const __m128i v_resl_w = blend_8(src0 + c, src1 + c,
+ v_m0l_w, v_m1l_w);
+ const __m128i v_resh_w = blend_8(src0 + c + 8, src1 + c + 8,
+ v_m0h_w, v_m1h_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_resl_w, v_resh_w);
+
+ mm_storeu_128(dst + c, v_res_b);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Horizontal and Vertical sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static void blend_mask6_sx_sy_w4_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_ra_b = mm_loadl_64(mask);
+ const __m128i v_rb_b = mm_loadl_64(mask + mask_stride);
+ const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
+ const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
+ const __m128i v_rvsb_w = _mm_and_si128(_mm_srli_si128(v_rvs_b, 1),
+ v_zmask_b);
+ const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
+
+ const __m128i v_m0_w = mm_roundn_epu16(v_rs_w, 2);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_4(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_32(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sx_sy_w8_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ (void)w;
+
+ do {
+ const __m128i v_ra_b = mm_loadu_128(mask);
+ const __m128i v_rb_b = mm_loadu_128(mask + mask_stride);
+ const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
+ const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
+ const __m128i v_rvsb_w = _mm_and_si128(_mm_srli_si128(v_rvs_b, 1),
+ v_zmask_b);
+ const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
+
+ const __m128i v_m0_w = mm_roundn_epu16(v_rs_w, 2);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend_8(src0, src1, v_m0_w, v_m1_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_res_w, v_res_w);
+
+ mm_storel_64(dst, v_res_b);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_sx_sy_w16n_sse4_1(
+ uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 16) {
+ const __m128i v_ral_b = mm_loadu_128(mask + 2 * c);
+ const __m128i v_rah_b = mm_loadu_128(mask + 2 * c + 16);
+ const __m128i v_rbl_b = mm_loadu_128(mask + mask_stride + 2 * c);
+ const __m128i v_rbh_b = mm_loadu_128(mask + mask_stride + 2 * c + 16);
+ const __m128i v_rvsl_b = _mm_add_epi8(v_ral_b, v_rbl_b);
+ const __m128i v_rvsh_b = _mm_add_epi8(v_rah_b, v_rbh_b);
+ const __m128i v_rvsal_w = _mm_and_si128(v_rvsl_b, v_zmask_b);
+ const __m128i v_rvsah_w = _mm_and_si128(v_rvsh_b, v_zmask_b);
+ const __m128i v_rvsbl_w = _mm_and_si128(_mm_srli_si128(v_rvsl_b, 1),
+ v_zmask_b);
+ const __m128i v_rvsbh_w = _mm_and_si128(_mm_srli_si128(v_rvsh_b, 1),
+ v_zmask_b);
+ const __m128i v_rsl_w = _mm_add_epi16(v_rvsal_w, v_rvsbl_w);
+ const __m128i v_rsh_w = _mm_add_epi16(v_rvsah_w, v_rvsbh_w);
+
+ const __m128i v_m0l_w = mm_roundn_epu16(v_rsl_w, 2);
+ const __m128i v_m0h_w = mm_roundn_epu16(v_rsh_w, 2);
+ const __m128i v_m1l_w = _mm_sub_epi16(v_maxval_w, v_m0l_w);
+ const __m128i v_m1h_w = _mm_sub_epi16(v_maxval_w, v_m0h_w);
+
+ const __m128i v_resl_w = blend_8(src0 + c, src1 + c,
+ v_m0l_w, v_m1l_w);
+ const __m128i v_resh_w = blend_8(src0 + c + 8, src1 + c + 8,
+ v_m0h_w, v_m1h_w);
+
+ const __m128i v_res_b = _mm_packus_epi16(v_resl_w, v_resh_w);
+
+ mm_storeu_128(dst + c, v_res_b);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Dispatch
+//////////////////////////////////////////////////////////////////////////////
+
+void vpx_blend_mask6_sse4_1(uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int suby, int subx) {
+ typedef void (*blend_fn)(uint8_t *dst, uint32_t dst_stride,
+ uint8_t *src0, uint32_t src0_stride,
+ uint8_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w);
+
+ static blend_fn blend[3][2][2] = { // width_index X subx X suby
+ { // w % 16 == 0
+ {blend_mask6_w16n_sse4_1, blend_mask6_sy_w16n_sse4_1},
+ {blend_mask6_sx_w16n_sse4_1, blend_mask6_sx_sy_w16n_sse4_1}
+ }, { // w == 4
+ {blend_mask6_w4_sse4_1, blend_mask6_sy_w4_sse4_1},
+ {blend_mask6_sx_w4_sse4_1, blend_mask6_sx_sy_w4_sse4_1}
+ }, { // w == 8
+ {blend_mask6_w8_sse4_1, blend_mask6_sy_w8_sse4_1},
+ {blend_mask6_sx_w8_sse4_1, blend_mask6_sx_sy_w8_sse4_1}
+ }
+ };
+
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 4);
+ assert(w >= 4);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+
+ blend[(w >> 2) & 3][subx != 0][suby != 0](dst, dst_stride,
+ src0, src0_stride,
+ src1, src1_stride,
+ mask, mask_stride,
+ h, w);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+//////////////////////////////////////////////////////////////////////////////
+// Common kernels
+//////////////////////////////////////////////////////////////////////////////
+
+typedef __m128i (*blend_unit_fn)(uint16_t*src0, uint16_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w);
+
+static INLINE __m128i blend_4_b10(uint16_t*src0, uint16_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_w = mm_loadl_64(src0);
+ const __m128i v_s1_w = mm_loadl_64(src1);
+
+ const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
+ const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
+
+ const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
+
+ const __m128i v_res_w = mm_roundn_epu16(v_sum_w, MASK_BITS);
+
+ return v_res_w;
+}
+
+static INLINE __m128i blend_8_b10(uint16_t*src0, uint16_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_w = mm_loadu_128(src0);
+ const __m128i v_s1_w = mm_loadu_128(src1);
+
+ const __m128i v_p0_w = _mm_mullo_epi16(v_s0_w, v_m0_w);
+ const __m128i v_p1_w = _mm_mullo_epi16(v_s1_w, v_m1_w);
+
+ const __m128i v_sum_w = _mm_add_epi16(v_p0_w, v_p1_w);
+
+ const __m128i v_res_w = mm_roundn_epu16(v_sum_w, MASK_BITS);
+
+ return v_res_w;
+}
+
+static INLINE __m128i blend_4_b12(uint16_t*src0, uint16_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_w = mm_loadl_64(src0);
+ const __m128i v_s1_w = mm_loadl_64(src1);
+
+ // Interleave
+ const __m128i v_m01_w = _mm_unpacklo_epi16(v_m0_w, v_m1_w);
+ const __m128i v_s01_w = _mm_unpacklo_epi16(v_s0_w, v_s1_w);
+
+ // Multiply-Add
+ const __m128i v_sum_d = _mm_madd_epi16(v_s01_w, v_m01_w);
+
+ // Scale
+ const __m128i v_ssum_d = _mm_srli_epi32(v_sum_d, MASK_BITS - 1);
+
+ // Pack
+ const __m128i v_pssum_d = _mm_packs_epi32(v_ssum_d, v_ssum_d);
+
+ // Round
+ const __m128i v_res_w = mm_round_epu16(v_pssum_d);
+
+ return v_res_w;
+}
+
+static INLINE __m128i blend_8_b12(uint16_t*src0, uint16_t *src1,
+ const __m128i v_m0_w, const __m128i v_m1_w) {
+ const __m128i v_s0_w = mm_loadu_128(src0);
+ const __m128i v_s1_w = mm_loadu_128(src1);
+
+ // Interleave
+ const __m128i v_m01l_w = _mm_unpacklo_epi16(v_m0_w, v_m1_w);
+ const __m128i v_m01h_w = _mm_unpackhi_epi16(v_m0_w, v_m1_w);
+ const __m128i v_s01l_w = _mm_unpacklo_epi16(v_s0_w, v_s1_w);
+ const __m128i v_s01h_w = _mm_unpackhi_epi16(v_s0_w, v_s1_w);
+
+ // Multiply-Add
+ const __m128i v_suml_d = _mm_madd_epi16(v_s01l_w, v_m01l_w);
+ const __m128i v_sumh_d = _mm_madd_epi16(v_s01h_w, v_m01h_w);
+
+ // Scale
+ const __m128i v_ssuml_d = _mm_srli_epi32(v_suml_d, MASK_BITS - 1);
+ const __m128i v_ssumh_d = _mm_srli_epi32(v_sumh_d, MASK_BITS - 1);
+
+ // Pack
+ const __m128i v_pssum_d = _mm_packs_epi32(v_ssuml_d, v_ssumh_d);
+
+ // Round
+ const __m128i v_res_w = mm_round_epu16(v_pssum_d);
+
+ return v_res_w;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// No sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static INLINE void blend_mask6_bn_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, blend_unit_fn blend) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ const __m128i v_m0_b = mm_loadl_32(mask);
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
+
+ mm_storel_64(dst, v_res_w);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
+}
+
+static void blend_mask6_b12_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
+}
+
+static inline void blend_mask6_bn_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, blend_unit_fn blend) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 8) {
+ const __m128i v_m0_b = mm_loadl_64(mask + c);
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_m0_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
+
+ mm_storeu_128(dst + c, v_res_w);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
+}
+
+static void blend_mask6_b12_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Horizontal sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static INLINE void blend_mask6_bn_sx_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, blend_unit_fn blend) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ const __m128i v_r_b = mm_loadl_64(mask);
+ const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
+
+ const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
+
+ mm_storel_64(dst, v_res_w);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sx_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
+}
+
+static void blend_mask6_b12_sx_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
+}
+
+static INLINE void blend_mask6_bn_sx_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, blend_unit_fn blend) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 8) {
+ const __m128i v_r_b = mm_loadu_128(mask + 2 * c);
+ const __m128i v_a_b = _mm_avg_epu8(v_r_b, _mm_srli_si128(v_r_b, 1));
+
+ const __m128i v_m0_w = _mm_and_si128(v_a_b, v_zmask_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
+
+ mm_storeu_128(dst + c, v_res_w);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sx_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
+}
+
+static void blend_mask6_b12_sx_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Vertical sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static INLINE void blend_mask6_bn_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, blend_unit_fn blend) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ const __m128i v_ra_b = mm_loadl_32(mask);
+ const __m128i v_rb_b = mm_loadl_32(mask + mask_stride);
+ const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
+
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
+
+ mm_storel_64(dst, v_res_w);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
+}
+
+static void blend_mask6_b12_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
+}
+
+static INLINE void blend_mask6_bn_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, blend_unit_fn blend) {
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 8) {
+ const __m128i v_ra_b = mm_loadl_64(mask + c);
+ const __m128i v_rb_b = mm_loadl_64(mask + c + mask_stride);
+ const __m128i v_a_b = _mm_avg_epu8(v_ra_b, v_rb_b);
+
+ const __m128i v_m0_w = _mm_cvtepu8_epi16(v_a_b);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
+
+ mm_storeu_128(dst + c, v_res_w);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
+}
+
+static void blend_mask6_b12_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Horizontal and Vertical sub-sampling
+//////////////////////////////////////////////////////////////////////////////
+
+static INLINE void blend_mask6_bn_sx_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, blend_unit_fn blend) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ const __m128i v_ra_b = mm_loadl_64(mask);
+ const __m128i v_rb_b = mm_loadl_64(mask + mask_stride);
+ const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
+ const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
+ const __m128i v_rvsb_w = _mm_and_si128(_mm_srli_si128(v_rvs_b, 1),
+ v_zmask_b);
+ const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
+
+ const __m128i v_m0_w = mm_roundn_epu16(v_rs_w, 2);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0, src1, v_m0_w, v_m1_w);
+
+ mm_storel_64(dst, v_res_w);
+
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sx_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b10);
+}
+
+static void blend_mask6_b12_sx_sy_w4_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ (void)w;
+ blend_mask6_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h,
+ blend_4_b12);
+}
+
+static INLINE void blend_mask6_bn_sx_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, blend_unit_fn blend) {
+ const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff,
+ 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff);
+ const __m128i v_maxval_w = _mm_set1_epi16(1 << MASK_BITS);
+
+ do {
+ int c;
+ for (c = 0; c < w; c += 8) {
+ const __m128i v_ra_b = mm_loadu_128(mask + 2 * c);
+ const __m128i v_rb_b = mm_loadu_128(mask + 2 * c +mask_stride);
+ const __m128i v_rvs_b = _mm_add_epi8(v_ra_b, v_rb_b);
+ const __m128i v_rvsa_w = _mm_and_si128(v_rvs_b, v_zmask_b);
+ const __m128i v_rvsb_w = _mm_and_si128(_mm_srli_si128(v_rvs_b, 1),
+ v_zmask_b);
+ const __m128i v_rs_w = _mm_add_epi16(v_rvsa_w, v_rvsb_w);
+
+ const __m128i v_m0_w = mm_roundn_epu16(v_rs_w, 2);
+ const __m128i v_m1_w = _mm_sub_epi16(v_maxval_w, v_m0_w);
+
+ const __m128i v_res_w = blend(src0 + c, src1 + c, v_m0_w, v_m1_w);
+
+ mm_storeu_128(dst + c, v_res_w);
+ }
+ dst += dst_stride;
+ src0 += src0_stride;
+ src1 += src1_stride;
+ mask += 2 * mask_stride;
+ } while (--h);
+}
+
+static void blend_mask6_b10_sx_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b10);
+}
+
+static void blend_mask6_b12_sx_sy_w8n_sse4_1(
+ uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w) {
+ blend_mask6_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1,
+ src1_stride, mask, mask_stride, h, w,
+ blend_8_b12);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Dispatch
+//////////////////////////////////////////////////////////////////////////////
+
+void vpx_highbd_blend_mask6_sse4_1(uint8_t *dst_8, uint32_t dst_stride,
+ uint8_t *src0_8, uint32_t src0_stride,
+ uint8_t *src1_8, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w, int suby, int subx, int bd) {
+ uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8);
+ uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8);
+ uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8);
+
+ typedef void (*blend_fn)(uint16_t *dst, uint32_t dst_stride,
+ uint16_t *src0, uint32_t src0_stride,
+ uint16_t *src1, uint32_t src1_stride,
+ const uint8_t *mask, uint32_t mask_stride,
+ int h, int w);
+
+ static blend_fn blend[2][2][2][2] = { // bd_index X width_index X subx X suby
+ { // bd == 8 or 10
+ { // w % 8 == 0
+ {blend_mask6_b10_w8n_sse4_1, blend_mask6_b10_sy_w8n_sse4_1},
+ {blend_mask6_b10_sx_w8n_sse4_1, blend_mask6_b10_sx_sy_w8n_sse4_1}
+ }, { // w == 4
+ {blend_mask6_b10_w4_sse4_1, blend_mask6_b10_sy_w4_sse4_1},
+ {blend_mask6_b10_sx_w4_sse4_1, blend_mask6_b10_sx_sy_w4_sse4_1}
+ }
+ },
+ { // bd == 12
+ { // w % 8 == 0
+ {blend_mask6_b12_w8n_sse4_1, blend_mask6_b12_sy_w8n_sse4_1},
+ {blend_mask6_b12_sx_w8n_sse4_1, blend_mask6_b12_sx_sy_w8n_sse4_1}
+ }, { // w == 4
+ {blend_mask6_b12_w4_sse4_1, blend_mask6_b12_sy_w4_sse4_1},
+ {blend_mask6_b12_sx_w4_sse4_1, blend_mask6_b12_sx_sy_w4_sse4_1}
+ }
+ }
+ };
+
+ assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
+ assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
+
+ assert(h >= 4);
+ assert(w >= 4);
+ assert(IS_POWER_OF_TWO(h));
+ assert(IS_POWER_OF_TWO(w));
+
+ assert(bd == 8 || bd == 10 || bd == 12);
+
+ blend[bd == 12][(w >> 2) & 1][subx != 0][suby != 0](dst, dst_stride,
+ src0, src0_stride,
+ src1, src1_stride,
+ mask, mask_stride,
+ h, w);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h b/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
index 4df39df..951af3a 100644
--- a/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
+++ b/vpx_dsp/x86/fwd_dct32x32_impl_avx2.h
@@ -10,6 +10,7 @@
#include <immintrin.h> // AVX2
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/txfm_common.h"
#define pair256_set_epi16(a, b) \
diff --git a/vpx_dsp/x86/fwd_txfm_sse2.c b/vpx_dsp/x86/fwd_txfm_sse2.c
index e4deeec..3e4f49b 100644
--- a/vpx_dsp/x86/fwd_txfm_sse2.c
+++ b/vpx_dsp/x86/fwd_txfm_sse2.c
@@ -11,6 +11,7 @@
#include <emmintrin.h> // SSE2
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/fwd_txfm_sse2.h"
diff --git a/vpx_dsp/x86/masked_variance_intrin_ssse3.c b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
index 47e2c32..a0c2b6e 100644
--- a/vpx_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/vpx_dsp/x86/masked_variance_intrin_ssse3.c
@@ -1380,8 +1380,9 @@
#endif // CONFIG_EXT_PARTITION
#if CONFIG_VP9_HIGHBITDEPTH
-typedef int (*highbd_calc_masked_var_t)(__m128i v_sum_d, __m128i v_sse_q,
- unsigned int* sse, const int w, const int h);
+typedef uint32_t (*highbd_calc_masked_var_t)(__m128i v_sum_d, __m128i v_sse_q,
+ uint32_t *sse,
+ const int w, const int h);
typedef unsigned int (*highbd_variance_fn_t)(
const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 4dce9c2..993124a 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -46,6 +46,11 @@
#define ROUNDZ_POWER_OF_TWO(value, n) \
((n) ? (((value) + (1 << ((n) - 1))) >> (n)) : (value))
+/* Shift down with rounding for signed integers, for use when n > 0 */
+#define ROUND_POWER_OF_TWO_SIGNED(value, n) \
+ (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
+ : ROUND_POWER_OF_TWO((value), (n)))
+
#define ALIGN_POWER_OF_TWO(value, n) \
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 5212075..d6a88b8 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -235,7 +235,7 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
+static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
memcpy(dst, src, num * sizeof(uint16_t));
diff --git a/vpxdec.c b/vpxdec.c
index 13b020b..235d17a 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -818,11 +818,6 @@
if (!interface)
interface = get_vpx_decoder_by_index(0);
-#if CONFIG_EXT_TILE
- cfg.tile_row = tile_row;
- cfg.tile_col = tile_col;
-#endif // CONFIG_EXT_TILE
-
dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) |
(ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0) |
(frame_parallel ? VPX_CODEC_USE_FRAME_THREADING : 0);
@@ -877,6 +872,21 @@
}
#endif
+#if CONFIG_VP10_DECODER && CONFIG_EXT_TILE
+ if (strncmp(decoder.name, "WebM Project VP10", 17) == 0) {
+ if (vpx_codec_control(&decoder, VP10_SET_DECODE_TILE_ROW, tile_row)) {
+ fprintf(stderr, "Failed to set decode_tile_row: %s\n",
+ vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+
+ if (vpx_codec_control(&decoder, VP10_SET_DECODE_TILE_COL, tile_col)) {
+ fprintf(stderr, "Failed to set decode_tile_col: %s\n",
+ vpx_codec_error(&decoder));
+ return EXIT_FAILURE;
+ }
+ }
+#endif
if (arg_skip)
fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
diff --git a/vpxenc.c b/vpxenc.c
index 7fb28cd..d988b30 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1580,8 +1580,18 @@
#if CONFIG_DECODERS
if (global->test_decode != TEST_DECODE_OFF) {
const VpxInterface *decoder = get_vpx_decoder_by_name(global->codec->name);
- vpx_codec_dec_cfg_t cfg = { 0, 0, 0, -1, -1 };
+ vpx_codec_dec_cfg_t cfg = { 0, 0, 0};
vpx_codec_dec_init(&stream->decoder, decoder->codec_interface(), &cfg, 0);
+
+#if CONFIG_VP10_DECODER && CONFIG_EXT_TILE
+ if (strcmp(global->codec->name, "vp10") == 0) {
+ vpx_codec_control(&stream->decoder, VP10_SET_DECODE_TILE_ROW, -1);
+ ctx_exit_on_error(&stream->decoder, "Failed to set decode_tile_row");
+
+ vpx_codec_control(&stream->decoder, VP10_SET_DECODE_TILE_COL, -1);
+ ctx_exit_on_error(&stream->decoder, "Failed to set decode_tile_col");
+ }
+#endif
}
#endif
}
@@ -1846,26 +1856,25 @@
vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &ref_enc);
vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &ref_dec);
} else {
- struct vp9_ref_frame ref_enc, ref_dec;
+ vpx_codec_control(&stream->encoder, VP10_GET_NEW_FRAME_IMAGE, &enc_img);
+ vpx_codec_control(&stream->decoder, VP10_GET_NEW_FRAME_IMAGE, &dec_img);
- ref_enc.idx = 0;
- ref_dec.idx = 0;
- vpx_codec_control(&stream->encoder, VP9_GET_REFERENCE, &ref_enc);
- enc_img = ref_enc.img;
- vpx_codec_control(&stream->decoder, VP9_GET_REFERENCE, &ref_dec);
- dec_img = ref_dec.img;
#if CONFIG_VP9_HIGHBITDEPTH
if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) !=
(dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) {
if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
- vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
+ vpx_image_t enc_hbd_img;
+ vpx_img_alloc(&enc_hbd_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
enc_img.d_w, enc_img.d_h, 16);
- vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img);
+ vpx_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
+ enc_img = enc_hbd_img;
}
if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
- vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
+ vpx_image_t dec_hbd_img;
+ vpx_img_alloc(&dec_hbd_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
dec_img.d_w, dec_img.d_h, 16);
- vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img);
+ vpx_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
+ dec_img = dec_hbd_img;
}
}
#endif