Merge "Updates for 1-pass CBR rate control."
diff --git a/args.h b/args.h
index ad591af..ea909cb 100644
--- a/args.h
+++ b/args.h
@@ -9,8 +9,8 @@
*/
-#ifndef ARGS_H
-#define ARGS_H
+#ifndef ARGS_H_
+#define ARGS_H_
#include <stdio.h>
struct arg {
@@ -48,4 +48,4 @@
int arg_parse_int(const struct arg *arg);
struct vpx_rational arg_parse_rational(const struct arg *arg);
int arg_parse_enum_or_int(const struct arg *arg);
-#endif
+#endif // ARGS_H_
diff --git a/ivfdec.h b/ivfdec.h
index b1468a9..5da9acc 100644
--- a/ivfdec.h
+++ b/ivfdec.h
@@ -27,4 +27,4 @@
} /* extern "C" */
#endif
-#endif /* IVFDEC_H_ */
+#endif // IVFDEC_H_
diff --git a/ivfenc.h b/ivfenc.h
index a332c7d..b486bc8 100644
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -30,4 +30,4 @@
} /* extern "C" */
#endif
-#endif /* IVFENC_H_ */
+#endif // IVFENC_H_
diff --git a/md5_utils.h b/md5_utils.h
index 81792c4..9935eae 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -20,8 +20,8 @@
* Still in the public domain.
*/
-#ifndef MD5_H
-#define MD5_H
+#ifndef MD5_UTILS_H_
+#define MD5_UTILS_H_
#define md5byte unsigned char
#define UWORD32 unsigned int
@@ -38,4 +38,4 @@
void MD5Final(unsigned char digest[16], struct MD5Context *context);
void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
-#endif /* !MD5_H */
+#endif // MD5_UTILS_H_
diff --git a/test/acm_random.h b/test/acm_random.h
index de94186..496dae3 100644
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -26,6 +26,11 @@
void Reset(int seed) {
random_.Reseed(seed);
}
+ uint16_t Rand16(void) {
+ const uint32_t value =
+ random_.Generate(testing::internal::Random::kMaxRange);
+ return (value >> 16) & 0xffff;
+ }
uint8_t Rand8(void) {
const uint32_t value =
diff --git a/test/external_frame_buffer_test.cc b/test/external_frame_buffer_test.cc
index 874d199..48eb853 100644
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -232,8 +232,10 @@
const int num_buffers = 13;
set_num_buffers(num_buffers);
+#if CONFIG_VP8_DECODER
// Tell compiler we are not using kVP8TestVectors.
(void)libvpx_test::kVP8TestVectors;
+#endif
// Open compressed video file.
if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -252,42 +254,51 @@
delete video;
}
-TEST_F(ExternalFrameBufferTest, EightFrameBuffers) {
- // Minimum number of reference buffers for VP9 is 8.
- const int num_buffers = 8;
+TEST_F(ExternalFrameBufferTest, NineFrameBuffers) {
+ // Minimum number of external frame buffers for VP9 is
+ // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
+ const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
ASSERT_EQ(VPX_CODEC_OK,
SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
}
TEST_F(ExternalFrameBufferTest, EightJitterBuffers) {
- // Number of buffers equals number of possible reference buffers(8), plus
- // one working buffer, plus eight jitter buffers.
- const int num_buffers = 17;
+ // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+ // #VPX_MAXIMUM_WORK_BUFFERS + eight jitter buffers.
+ const int jitter_buffers = 8;
+ const int num_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
ASSERT_EQ(VPX_CODEC_OK,
SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
}
TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
- // Minimum number of reference buffers for VP9 is 8.
- const int num_buffers = 7;
+ // Minimum number of external frame buffers for VP9 is
+ // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. Set one less.
+ const int num_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS - 1;
ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
SetExternalFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
}
TEST_F(ExternalFrameBufferTest, NullFrameBufferList) {
- // Number of buffers equals number of possible reference buffers(8), plus
- // one working buffer, plus four jitter buffers.
- const int num_buffers = 13;
+ // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+ // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+ const int jitter_buffers = 4;
+ const int num_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
ASSERT_EQ(VPX_CODEC_INVALID_PARAM,
SetNullFrameBuffers(num_buffers, realloc_vp9_frame_buffer));
}
TEST_F(ExternalFrameBufferTest, NullRealloc) {
- // Number of buffers equals number of possible reference buffers(8), plus
- // one working buffer, plus four jitter buffers.
- const int num_buffers = 13;
+ // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+ // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+ const int jitter_buffers = 4;
+ const int num_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
ASSERT_EQ(VPX_CODEC_OK,
SetExternalFrameBuffers(num_buffers,
zero_realloc_vp9_frame_buffer));
@@ -295,9 +306,11 @@
}
TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) {
- // Number of buffers equals number of possible reference buffers(8), plus
- // one working buffer, plus four jitter buffers.
- const int num_buffers = 13;
+ // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS +
+ // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers.
+ const int jitter_buffers = 4;
+ const int num_buffers =
+ VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers;
ASSERT_EQ(VPX_CODEC_OK,
SetExternalFrameBuffers(num_buffers,
one_less_byte_realloc_vp9_frame_buffer));
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
new file mode 100644
index 0000000..14b78f6
--- /dev/null
+++ b/test/partial_idct_test.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+extern "C" {
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_scan.h"
+}
+
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+typedef void (*fwd_txfm_t)(const int16_t *in, int16_t *out, int stride);
+typedef void (*inv_txfm_t)(const int16_t *in, uint8_t *out, int stride);
+typedef std::tr1::tuple<inv_txfm_t,
+ inv_txfm_t,
+ TX_SIZE, int> partial_itxfm_param_t;
+const int kMaxNumCoeffs = 1024;
+class PartialIDctTest : public ::testing::TestWithParam<partial_itxfm_param_t> {
+ public:
+ virtual ~PartialIDctTest() {}
+ virtual void SetUp() {
+ full_itxfm_ = GET_PARAM(0);
+ partial_itxfm_ = GET_PARAM(1);
+ tx_size_ = GET_PARAM(2);
+ last_nonzero_ = GET_PARAM(3);
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ int last_nonzero_;
+ TX_SIZE tx_size_;
+ inv_txfm_t full_itxfm_;
+ inv_txfm_t partial_itxfm_;
+};
+
+TEST_P(PartialIDctTest, ResultsMatch) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int size;
+ switch (tx_size_) {
+ case TX_4X4:
+ size = 4;
+ break;
+ case TX_8X8:
+ size = 8;
+ break;
+ case TX_16X16:
+ size = 16;
+ break;
+ case TX_32X32:
+ size = 32;
+ break;
+ default:
+ ASSERT_TRUE(0) << "Wrong Size!";
+ break;
+ }
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block1, kMaxNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, int16_t, test_coef_block2, kMaxNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+ const int count_test_block = 1000;
+ const int max_coeff = 32766 / 4;
+ const int block_size = size * size;
+ int max_error = 0;
+ for (int i = 0; i < count_test_block; ++i) {
+ // clear out destination buffer
+ memset(dst1, 0, sizeof(*dst1) * block_size);
+ memset(dst2, 0, sizeof(*dst2) * block_size);
+ memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+ memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+ int max_energy_leftover = max_coeff * max_coeff;
+ for (int j = 0; j < last_nonzero_; ++j) {
+ int16_t coef = static_cast<int16_t>(sqrt(max_energy_leftover) *
+ (rnd.Rand16() - 32768) / 65536);
+ max_energy_leftover -= coef * coef;
+ if (max_energy_leftover < 0) {
+ max_energy_leftover = 0;
+ coef = 0;
+ }
+ test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]] = coef;
+ }
+
+ memcpy(test_coef_block2, test_coef_block1,
+ sizeof(*test_coef_block2) * block_size);
+
+ REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+ REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+
+ for (int j = 0; j < block_size; ++j) {
+ const int diff = dst1[j] - dst2[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ }
+ }
+
+ EXPECT_EQ(0, max_error)
+ << "Error: partial inverse transform produces different results";
+}
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+ C, PartialIDctTest,
+ ::testing::Values(
+ make_tuple(vp9_idct32x32_1024_add_c,
+ vp9_idct32x32_34_add_c,
+ TX_32X32, 34),
+ make_tuple(vp9_idct32x32_1024_add_c,
+ vp9_idct32x32_1_add_c,
+ TX_32X32, 1),
+ make_tuple(vp9_idct16x16_256_add_c,
+ vp9_idct16x16_10_add_c,
+ TX_16X16, 10),
+ make_tuple(vp9_idct16x16_256_add_c,
+ vp9_idct16x16_1_add_c,
+ TX_16X16, 1),
+ make_tuple(vp9_idct8x8_64_add_c,
+ vp9_idct8x8_10_add_c,
+ TX_8X8, 10),
+ make_tuple(vp9_idct8x8_64_add_c,
+ vp9_idct8x8_1_add_c,
+ TX_8X8, 1),
+ make_tuple(vp9_idct4x4_16_add_c,
+ vp9_idct4x4_1_add_c,
+ TX_4X4, 1)));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+ SSE2, PartialIDctTest,
+ ::testing::Values(
+ make_tuple(vp9_idct32x32_1024_add_c,
+ vp9_idct32x32_34_add_sse2,
+ TX_32X32, 34),
+ make_tuple(vp9_idct32x32_1024_add_c,
+ vp9_idct32x32_1_add_sse2,
+ TX_32X32, 1),
+ make_tuple(vp9_idct16x16_256_add_c,
+ vp9_idct16x16_10_add_sse2,
+ TX_16X16, 10),
+ make_tuple(vp9_idct16x16_256_add_c,
+ vp9_idct16x16_1_add_sse2,
+ TX_16X16, 1),
+ make_tuple(vp9_idct8x8_64_add_c,
+ vp9_idct8x8_10_add_sse2,
+ TX_8X8, 10),
+ make_tuple(vp9_idct8x8_64_add_c,
+ vp9_idct8x8_1_add_sse2,
+ TX_8X8, 1),
+ make_tuple(vp9_idct4x4_16_add_c,
+ vp9_idct4x4_1_add_sse2,
+ TX_4X4, 1)));
+#endif
+} // namespace
diff --git a/test/test.mk b/test/test.mk
index 2905a1a..13e1e3a 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -92,6 +92,7 @@
# IDCT test currently depends on FDCT function
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
+LIBVPX_TEST_SRCS-yes += partial_idct_test.cc
LIBVPX_TEST_SRCS-yes += superframe_test.cc
LIBVPX_TEST_SRCS-yes += tile_independence_test.cc
endif
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
index 751bc74..5b8ec20 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.asm
@@ -8,18 +8,18 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_loop_filter_horizontal_edge_16_neon|
+ EXPORT |vp9_lpf_horizontal_4_dual_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_loop_filter_horizontal_edge_16_neon(uint8_t *s, int p,
-; const uint8_t *blimit0,
-; const uint8_t *limit0,
-; const uint8_t *thresh0,
-; const uint8_t *blimit1,
-; const uint8_t *limit1,
-; const uint8_t *thresh1)
+;void vp9_lpf_horizontal_4_dual_neon(uint8_t *s, int p,
+; const uint8_t *blimit0,
+; const uint8_t *limit0,
+; const uint8_t *thresh0,
+; const uint8_t *blimit1,
+; const uint8_t *limit1,
+; const uint8_t *thresh1)
; r0 uint8_t *s,
; r1 int p,
; r2 const uint8_t *blimit0,
@@ -29,7 +29,7 @@
; sp+8 const uint8_t *limit1,
; sp+12 const uint8_t *thresh1,
-|vp9_loop_filter_horizontal_edge_16_neon| PROC
+|vp9_lpf_horizontal_4_dual_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh0
@@ -76,7 +76,7 @@
vpop {d8-d15} ; restore neon registers
pop {pc}
- ENDP ; |vp9_loop_filter_horizontal_edge_16_neon|
+ ENDP ; |vp9_lpf_horizontal_4_dual_neon|
; void vp9_loop_filter_neon_16();
; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
index b97e7aa..0820db2 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
+++ b/vp9/common/arm/neon/vp9_loopfilter_16_neon.c
@@ -10,44 +10,43 @@
#include "./vp9_rtcd.h"
-void vp9_mbloop_filter_horizontal_edge_16_neon(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_16_neon(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_vertical_4_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_neon(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_neon(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_neon(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_vertical_8_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mb_lpf_vertical_edge_w_16_neon(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- vp9_mb_lpf_vertical_edge_w_neon(s, p, blimit, limit, thresh);
- vp9_mb_lpf_vertical_edge_w_neon(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_neon(uint8_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+ vp9_lpf_vertical_16_neon(s + 8 * p, p, blimit, limit, thresh);
}
diff --git a/vp9/common/arm/neon/vp9_loopfilter_neon.asm b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
index 8b4fe5d..4430322 100644
--- a/vp9/common/arm/neon/vp9_loopfilter_neon.asm
+++ b/vp9/common/arm/neon/vp9_loopfilter_neon.asm
@@ -8,10 +8,10 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_loop_filter_horizontal_edge_neon|
- EXPORT |vp9_loop_filter_vertical_edge_neon|
- EXPORT |vp9_mbloop_filter_horizontal_edge_neon|
- EXPORT |vp9_mbloop_filter_vertical_edge_neon|
+ EXPORT |vp9_lpf_horizontal_4_neon|
+ EXPORT |vp9_lpf_vertical_4_neon|
+ EXPORT |vp9_lpf_horizontal_8_neon|
+ EXPORT |vp9_lpf_vertical_8_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
@@ -21,12 +21,12 @@
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_loop_filter_horizontal_edge_neon(uint8_t *s,
-; int p /* pitch */,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_horizontal_4_neon(uint8_t *s,
+; int p /* pitch */,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
@@ -34,7 +34,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_loop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -77,19 +77,19 @@
end_vp9_lf_h_edge
pop {pc}
- ENDP ; |vp9_loop_filter_horizontal_edge_neon|
+ ENDP ; |vp9_lpf_horizontal_4_neon|
; Currently vp9 only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
-; void vp9_loop_filter_vertical_edge_neon(uint8_t *s,
-; int p /* pitch */,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_vertical_4_neon(uint8_t *s,
+; int p /* pitch */,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
@@ -97,7 +97,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_loop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_4_neon| PROC
push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -158,7 +158,7 @@
end_vp9_lf_v_edge
pop {pc}
- ENDP ; |vp9_loop_filter_vertical_edge_neon|
+ ENDP ; |vp9_lpf_vertical_4_neon|
; void vp9_loop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
@@ -276,18 +276,18 @@
bx lr
ENDP ; |vp9_loop_filter_neon|
-; void vp9_mbloop_filter_horizontal_edge_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_horizontal_8_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_mbloop_filter_horizontal_edge_neon| PROC
+|vp9_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -333,14 +333,14 @@
end_vp9_mblf_h_edge
pop {r4-r5, pc}
- ENDP ; |vp9_mbloop_filter_horizontal_edge_neon|
+ ENDP ; |vp9_lpf_horizontal_8_neon|
-; void vp9_mbloop_filter_vertical_edge_neon(uint8_t *s,
-; int pitch,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; void vp9_lpf_vertical_8_neon(uint8_t *s,
+; int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
;
; r0 uint8_t *s,
; r1 int pitch,
@@ -348,7 +348,7 @@
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
-|vp9_mbloop_filter_vertical_edge_neon| PROC
+|vp9_lpf_vertical_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
@@ -420,7 +420,7 @@
end_vp9_mblf_v_edge
pop {r4-r5, pc}
- ENDP ; |vp9_mbloop_filter_vertical_edge_neon|
+ ENDP ; |vp9_lpf_vertical_8_neon|
; void vp9_mbloop_filter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
index 2e8001b..8cb913c 100644
--- a/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
+++ b/vp9/common/arm/neon/vp9_mb_lpf_neon.asm
@@ -8,23 +8,23 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_mb_lpf_horizontal_edge_w_neon|
- EXPORT |vp9_mb_lpf_vertical_edge_w_neon|
+ EXPORT |vp9_lpf_horizontal_16_neon|
+ EXPORT |vp9_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vp9_mb_lpf_horizontal_edge_w_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh
-; int count)
+; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_mb_lpf_horizontal_edge_w_neon| PROC
+|vp9_lpf_horizontal_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -115,18 +115,18 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_mb_lpf_horizontal_edge_w_neon|
+ ENDP ; |vp9_lpf_horizontal_16_neon|
-; void vp9_mb_lpf_vertical_edge_w_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh)
+; void vp9_lpf_vertical_16_neon(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vp9_mb_lpf_vertical_edge_w_neon| PROC
+|vp9_lpf_vertical_16_neon| PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] ; load thresh
@@ -279,7 +279,7 @@
vpop {d8-d15}
pop {r4-r8, pc}
- ENDP ; |vp9_mb_lpf_vertical_edge_w_neon|
+ ENDP ; |vp9_lpf_vertical_16_neon|
; void vp9_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the
diff --git a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
index 0c0f155..3df7f4c 100644
--- a/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.c
@@ -20,12 +20,12 @@
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_4_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask;
uint32_t hev;
@@ -114,12 +114,12 @@
}
}
-void vp9_loop_filter_vertical_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_4_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask, hev;
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
@@ -307,58 +307,56 @@
}
}
-void vp9_loop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_dspr2(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_horizontal_edge_16_dspr2(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge_dspr2(s + 8, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+void vp9_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
+void vp9_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
1);
}
-void vp9_mb_lpf_vertical_edge_w_16_dspr2(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
- vp9_mb_lpf_vertical_edge_w_dspr2(s, p, blimit, limit, thresh);
- vp9_mb_lpf_vertical_edge_w_dspr2(s + 8 * p, p, blimit, limit, thresh);
+void vp9_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
+ vp9_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
+ vp9_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
}
#endif // #if HAVE_DSPR2
diff --git a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
index adfd755..7cd0b63 100644
--- a/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mbloop_loopfilter_dspr2.c
@@ -20,12 +20,12 @@
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mbloop_filter_horizontal_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_8_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint32_t mask;
uint32_t hev, flat;
uint8_t i;
@@ -319,12 +319,12 @@
}
}
-void vp9_mbloop_filter_vertical_edge_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_8_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint8_t i;
uint32_t mask, hev, flat;
uint8_t *s1, *s2, *s3, *s4;
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
index 0759755..6c94674 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mblpf_horiz_loopfilter_dspr2.c
@@ -20,12 +20,12 @@
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mb_lpf_horizontal_edge_w_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_16_dspr2(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+ int count) {
uint32_t mask;
uint32_t hev, flat, flat2;
uint8_t i;
diff --git a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
index 9e9171c..851fc6c 100644
--- a/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_mblpf_vert_loopfilter_dspr2.c
@@ -20,11 +20,11 @@
#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
#if HAVE_DSPR2
-void vp9_mb_lpf_vertical_edge_w_dspr2(uint8_t *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dspr2(uint8_t *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh) {
uint8_t i;
uint32_t mask, hev, flat, flat2;
uint8_t *s1, *s2, *s3, *s4;
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 72adf92..2266e0e 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -221,23 +221,10 @@
static const uint16_t left_border_uv = 0x1111;
static const uint16_t above_border_uv = 0x000f;
-
-static void lf_init_lut(loop_filter_info_n *lfi) {
- lfi->mode_lf_lut[DC_PRED] = 0;
- lfi->mode_lf_lut[D45_PRED] = 0;
- lfi->mode_lf_lut[D135_PRED] = 0;
- lfi->mode_lf_lut[D117_PRED] = 0;
- lfi->mode_lf_lut[D153_PRED] = 0;
- lfi->mode_lf_lut[D207_PRED] = 0;
- lfi->mode_lf_lut[D63_PRED] = 0;
- lfi->mode_lf_lut[V_PRED] = 0;
- lfi->mode_lf_lut[H_PRED] = 0;
- lfi->mode_lf_lut[TM_PRED] = 0;
- lfi->mode_lf_lut[ZEROMV] = 0;
- lfi->mode_lf_lut[NEARESTMV] = 1;
- lfi->mode_lf_lut[NEARMV] = 1;
- lfi->mode_lf_lut[NEWMV] = 1;
-}
+static const int mode_lf_lut[MB_MODE_COUNT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
+ 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
+};
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
int lvl;
@@ -270,9 +257,6 @@
update_sharpness(lfi, lf->sharpness_level);
lf->last_sharpness_level = lf->sharpness_level;
- // init LUT for lvl and hev thr picking
- lf_init_lut(lfi);
-
// init hev threshold const vectors
for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
@@ -355,56 +339,56 @@
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
- vp9_mb_lpf_vertical_edge_w_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
- vp9_mb_lpf_vertical_edge_w(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr);
+ vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
} else {
- vp9_mb_lpf_vertical_edge_w(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
- vp9_mbloop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
- vp9_mbloop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ 1);
} else {
- vp9_mbloop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
- vp9_loop_filter_vertical_edge_16(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
- vp9_loop_filter_vertical_edge(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
+ 1);
} else {
- vp9_loop_filter_vertical_edge(s + 8 *pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
- vp9_loop_filter_vertical_edge_16(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr);
+ vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
- vp9_loop_filter_vertical_edge(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, 1);
} else {
- vp9_loop_filter_vertical_edge(s + 8 *pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, 1);
}
}
}
@@ -440,81 +424,73 @@
if (mask & 1) {
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2);
+ vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 2);
count = 2;
} else {
- vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_mbloop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_mbloop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
- vp9_loop_filter_horizontal_edge_16(s, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
if ((mask_4x4_int & 3) == 3) {
- vp9_loop_filter_horizontal_edge_16(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr,
- lfin->mblim, lfin->lim,
- lfin->hev_thr);
+ vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
} else {
if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
else if (mask_4x4_int & 2)
- vp9_loop_filter_horizontal_edge(s + 8 + 4 * pitch, pitch,
- lfin->mblim, lfin->lim,
- lfin->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, 1);
}
count = 2;
} else {
- vp9_loop_filter_horizontal_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
- if (mask_4x4_int & 1)
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ if (mask_4x4_int & 1)
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
} else if (mask_4x4_int & 1) {
- vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1);
+ vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, 1);
}
}
s += 8 * count;
@@ -543,8 +519,7 @@
const int skip = mi->mbmi.skip_coeff;
const int seg = mi->mbmi.segment_id;
const int ref = mi->mbmi.ref_frame[0];
- const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
- const int filter_level = lfi_n->lvl[seg][ref][mode];
+ const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
uint64_t *left_y = &lfm->left_y[tx_size_y];
uint64_t *above_y = &lfm->above_y[tx_size_y];
uint64_t *int_4x4_y = &lfm->int_4x4_y;
@@ -625,8 +600,7 @@
const int skip = mi->mbmi.skip_coeff;
const int seg = mi->mbmi.segment_id;
const int ref = mi->mbmi.ref_frame[0];
- const int mode = lfi_n->mode_lf_lut[mi->mbmi.mode];
- const int filter_level = lfi_n->lvl[seg][ref][mode];
+ const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]];
uint64_t *left_y = &lfm->left_y[tx_size_y];
uint64_t *above_y = &lfm->above_y[tx_size_y];
uint64_t *int_4x4_y = &lfm->int_4x4_y;
@@ -919,10 +893,7 @@
const MB_MODE_INFO *mbmi) {
const int seg = mbmi->segment_id;
const int ref = mbmi->ref_frame[0];
- const int mode = lfi_n->mode_lf_lut[mbmi->mode];
- const int filter_level = lfi_n->lvl[seg][ref][mode];
-
- return filter_level;
+ return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]];
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -940,19 +911,15 @@
if (mask & 1) {
if (mask_16x16 & 1) {
- vp9_mb_lpf_vertical_edge_w(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr);
+ vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_8x8 & 1) {
- vp9_mbloop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
} else if (mask_4x4 & 1) {
- vp9_loop_filter_vertical_edge(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
}
if (mask_4x4_int & 1)
- vp9_loop_filter_vertical_edge(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 62389ea..98fac96 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -54,7 +54,6 @@
typedef struct {
loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
- uint8_t mode_lf_lut[MB_MODE_COUNT];
} loop_filter_info_n;
/* assorted loopfilter functions which get used elsewhere */
diff --git a/vp9/common/vp9_loopfilter_filters.c b/vp9/common/vp9_loopfilter_filters.c
index f2e910f..bbbad01 100644
--- a/vp9/common/vp9_loopfilter_filters.c
+++ b/vp9/common/vp9_loopfilter_filters.c
@@ -101,11 +101,9 @@
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
}
-void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
+ const uint8_t *blimit, const uint8_t *limit,
+ const uint8_t *thresh, int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -121,22 +119,17 @@
}
}
-void vp9_loop_filter_horizontal_edge_16_c(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -152,15 +145,12 @@
}
}
-void vp9_loop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_loop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_loop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -185,11 +175,9 @@
}
}
-void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -208,22 +196,17 @@
}
}
-void vp9_mbloop_filter_horizontal_edge_16_c(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_horizontal_edge_c(s, p, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_horizontal_edge_c(s + 8, p, blimit1, limit1, thresh1, 1);
+void vp9_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
+ vp9_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
}
-void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
for (i = 0; i < 8 * count; ++i) {
@@ -239,15 +222,12 @@
}
}
-void vp9_mbloop_filter_vertical_edge_16_c(uint8_t *s, int pitch,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
- vp9_mbloop_filter_vertical_edge_c(s, pitch, blimit0, limit0, thresh0, 1);
- vp9_mbloop_filter_vertical_edge_c(s + 8 * pitch, pitch, blimit1, limit1,
+void vp9_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
+ const uint8_t *limit0, const uint8_t *thresh0,
+ const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ vp9_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
+ vp9_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
thresh1, 1);
}
@@ -302,11 +282,9 @@
}
}
-void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
- int count) {
+void vp9_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -355,16 +333,12 @@
}
}
-void vp9_mb_lpf_vertical_edge_w_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
}
-void vp9_mb_lpf_vertical_edge_w_16_c(uint8_t *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh) {
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16);
}
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index 449b945..03f4cc2 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -42,29 +42,30 @@
else
return SWITCHABLE_FILTERS;
}
-// Returns a context number for the given MB prediction signal
+
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
int vp9_get_intra_inter_context(const MACROBLOCKD *xd) {
const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd));
const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
const int has_above = above_mbmi != NULL;
const int has_left = left_mbmi != NULL;
- const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
- const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
- // The mode info data structure has a one element border above and to the
- // left of the entries corresponding to real macroblocks.
- // The prediction flags in these dummy entries are initialized to 0.
- // 0 - inter/inter, inter/--, --/inter, --/--
- // 1 - intra/inter, inter/intra
- // 2 - intra/--, --/intra
- // 3 - intra/intra
- if (has_above && has_left) // both edges available
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
return left_intra && above_intra ? 3
: left_intra || above_intra;
- else if (has_above || has_left) // one edge available
- return 2 * (has_above ? above_intra : left_intra);
- else
+ } else if (has_above || has_left) { // one edge available
+ return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
+ } else {
return 0;
+ }
}
int vp9_get_reference_mode_context(const VP9_COMMON *cm,
@@ -117,8 +118,7 @@
const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
const int above_in_image = above_mbmi != NULL;
const int left_in_image = left_mbmi != NULL;
- const int above_intra = above_in_image ? !is_inter_block(above_mbmi) : 1;
- const int left_intra = left_in_image ? !is_inter_block(left_mbmi) : 1;
+
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries correpsonding to real macroblocks.
@@ -127,6 +127,9 @@
const int var_ref_idx = !fix_ref_idx;
if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
if (above_intra && left_intra) { // intra/intra (2)
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter
@@ -196,13 +199,14 @@
const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
const int has_above = above_mbmi != NULL;
const int has_left = left_mbmi != NULL;
- const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
- const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries correpsonding to real macroblocks.
// The prediction flags in these dummy entries are initialised to 0.
if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
if (above_intra && left_intra) { // intra/intra
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter or inter/intra
@@ -260,14 +264,15 @@
const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd));
const int has_above = above_mbmi != NULL;
const int has_left = left_mbmi != NULL;
- const int above_intra = has_above ? !is_inter_block(above_mbmi) : 1;
- const int left_intra = has_left ? !is_inter_block(left_mbmi) : 1;
// Note:
// The mode info data structure has a one element border above and to the
// left of the entries correpsonding to real macroblocks.
// The prediction flags in these dummy entries are initialised to 0.
if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
if (above_intra && left_intra) { // intra/intra
pred_context = 2;
} else if (above_intra || left_intra) { // intra/inter or inter/intra
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 727f5c4..3025ed4 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -191,38 +191,38 @@
#
# Loopfilter
#
-prototype void vp9_mb_lpf_vertical_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w sse2 neon dspr2
+prototype void vp9_lpf_vertical_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16 sse2 neon dspr2
-prototype void vp9_mb_lpf_vertical_edge_w_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
-specialize vp9_mb_lpf_vertical_edge_w_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_16_dual "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
+specialize vp9_lpf_vertical_16_dual sse2 neon dspr2
-prototype void vp9_mbloop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_vertical_edge sse2 neon dspr2
+prototype void vp9_lpf_vertical_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_8 sse2 neon dspr2
-prototype void vp9_mbloop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_8_dual sse2 neon dspr2
-prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_vertical_edge mmx neon dspr2
+prototype void vp9_lpf_vertical_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_vertical_4 mmx neon dspr2
-prototype void vp9_loop_filter_vertical_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_vertical_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_vertical_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_vertical_4_dual sse2 neon dspr2
-prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mb_lpf_horizontal_edge_w sse2 avx2 neon dspr2
+prototype void vp9_lpf_horizontal_16 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_16 sse2 avx2 neon dspr2
-prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_mbloop_filter_horizontal_edge sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_8 sse2 neon dspr2
-prototype void vp9_mbloop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_mbloop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_8_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_8_dual sse2 neon dspr2
-prototype void vp9_loop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
-specialize vp9_loop_filter_horizontal_edge mmx neon dspr2
+prototype void vp9_lpf_horizontal_4 "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
+specialize vp9_lpf_horizontal_4 mmx neon dspr2
-prototype void vp9_loop_filter_horizontal_edge_16 "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
-specialize vp9_loop_filter_horizontal_edge_16 sse2 neon dspr2
+prototype void vp9_lpf_horizontal_4_dual "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"
+specialize vp9_lpf_horizontal_4_dual sse2 neon dspr2
#
# post proc
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
index 3c5cb8f..439c028 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
@@ -933,7 +933,7 @@
}
}
-void vp9_mb_lpf_horizontal_edge_w_avx2(unsigned char *s, int p,
+void vp9_lpf_horizontal_16_avx2(unsigned char *s, int p,
const unsigned char *_blimit, const unsigned char *_limit,
const unsigned char *_thresh, int count) {
if (count == 1)
diff --git a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
index 3ca55cf..448ad5a 100644
--- a/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -846,24 +846,20 @@
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh,
- int count) {
+void vp9_lpf_horizontal_16_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh, int count) {
if (count == 1)
mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
else
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
-void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
- const unsigned char *_thresh,
- int count) {
+void vp9_lpf_horizontal_8_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+ const unsigned char *_thresh, int count) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
@@ -1083,13 +1079,13 @@
}
}
-void vp9_mbloop_filter_horizontal_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *_blimit0,
- const uint8_t *_limit0,
- const uint8_t *_thresh0,
- const uint8_t *_blimit1,
- const uint8_t *_limit1,
- const uint8_t *_thresh1) {
+void vp9_lpf_horizontal_8_dual_sse2(uint8_t *s, int p,
+ const uint8_t *_blimit0,
+ const uint8_t *_limit0,
+ const uint8_t *_thresh0,
+ const uint8_t *_blimit1,
+ const uint8_t *_limit1,
+ const uint8_t *_thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op2, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op1, 16);
DECLARE_ALIGNED_ARRAY(16, unsigned char, flat_op0, 16);
@@ -1330,14 +1326,13 @@
}
}
-void vp9_loop_filter_horizontal_edge_16_sse2(unsigned char *s,
- int p,
- const unsigned char *_blimit0,
- const unsigned char *_limit0,
- const unsigned char *_thresh0,
- const unsigned char *_blimit1,
- const unsigned char *_limit1,
- const unsigned char *_thresh1) {
+void vp9_lpf_horizontal_4_dual_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit0,
+ const unsigned char *_limit0,
+ const unsigned char *_thresh0,
+ const unsigned char *_blimit1,
+ const unsigned char *_limit1,
+ const unsigned char *_thresh1) {
const __m128i blimit =
_mm_unpacklo_epi64(_mm_load_si128((const __m128i *)_blimit0),
_mm_load_si128((const __m128i *)_blimit1));
@@ -1598,13 +1593,12 @@
} while (++idx8x8 < num_8x8_to_transpose);
}
-void vp9_loop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
+void vp9_lpf_vertical_4_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
unsigned char *src[2];
unsigned char *dst[2];
@@ -1613,8 +1607,8 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_loop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
- thresh0, blimit1, limit1, thresh1);
+ vp9_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
dst[0] = s - 4;
@@ -1624,11 +1618,10 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh,
- int count) {
+void vp9_lpf_vertical_8_sse2(unsigned char *s, int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh, int count) {
DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 8);
unsigned char *src[1];
unsigned char *dst[1];
@@ -1641,8 +1634,7 @@
transpose(src, p, dst, 8, 1);
// Loop filtering
- vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 4 * 8, 8, blimit, limit,
- thresh, 1);
+ vp9_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1651,13 +1643,12 @@
transpose(src, 8, dst, p, 1);
}
-void vp9_mbloop_filter_vertical_edge_16_sse2(uint8_t *s, int p,
- const uint8_t *blimit0,
- const uint8_t *limit0,
- const uint8_t *thresh0,
- const uint8_t *blimit1,
- const uint8_t *limit1,
- const uint8_t *thresh1) {
+void vp9_lpf_vertical_8_dual_sse2(uint8_t *s, int p, const uint8_t *blimit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *blimit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 16 * 8);
unsigned char *src[2];
unsigned char *dst[2];
@@ -1666,8 +1657,8 @@
transpose8x16(s - 4, s - 4 + p * 8, p, t_dst, 16);
// Loop filtering
- vp9_mbloop_filter_horizontal_edge_16_sse2(t_dst + 4 * 16, 16, blimit0, limit0,
- thresh0, blimit1, limit1, thresh1);
+ vp9_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0,
+ blimit1, limit1, thresh1);
src[0] = t_dst;
src[1] = t_dst + 8;
@@ -1678,10 +1669,10 @@
transpose(src, 16, dst, p, 2);
}
-void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh) {
+void vp9_lpf_vertical_16_sse2(unsigned char *s, int p,
+ const unsigned char *blimit,
+ const unsigned char *limit,
+ const unsigned char *thresh) {
DECLARE_ALIGNED_ARRAY(8, unsigned char, t_dst, 8 * 16);
unsigned char *src[2];
unsigned char *dst[2];
@@ -1706,10 +1697,9 @@
transpose(src, 8, dst, p, 2);
}
-void vp9_mb_lpf_vertical_edge_w_16_sse2(unsigned char *s, int p,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh) {
+void vp9_lpf_vertical_16_dual_sse2(unsigned char *s, int p,
+ const uint8_t *blimit, const uint8_t *limit,
+ const uint8_t *thresh) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
// Transpose 16x16
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm
index 4ebb51b..a7f6930 100644
--- a/vp9/common/x86/vp9_loopfilter_mmx.asm
+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm
@@ -12,7 +12,7 @@
%include "vpx_ports/x86_abi_support.asm"
-;void vp9_loop_filter_horizontal_edge_mmx
+;void vp9_lpf_horizontal_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -21,8 +21,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_loop_filter_horizontal_edge_mmx) PRIVATE
-sym(vp9_loop_filter_horizontal_edge_mmx):
+global sym(vp9_lpf_horizontal_4_mmx) PRIVATE
+sym(vp9_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -224,7 +224,7 @@
ret
-;void vp9_loop_filter_vertical_edge_mmx
+;void vp9_lpf_vertical_4_mmx
;(
; unsigned char *src_ptr,
; int src_pixel_step,
@@ -233,8 +233,8 @@
; const char *thresh,
; int count
;)
-global sym(vp9_loop_filter_vertical_edge_mmx) PRIVATE
-sym(vp9_loop_filter_vertical_edge_mmx):
+global sym(vp9_lpf_vertical_4_mmx) PRIVATE
+sym(vp9_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index c167004..eb2d8b5 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -76,9 +76,8 @@
}
}
-// len == 0 is not allowed
static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
- return start + len > start && start + len <= end;
+ return len != 0 && len <= end - start;
}
static int decode_unsigned_max(struct vp9_read_bit_buffer *rb, int max) {
@@ -855,10 +854,14 @@
if (!is_last) {
if (!read_is_valid(*data, 4, data_end))
vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
- "Truncated packet or corrupt tile length");
+ "Truncated packet or corrupt tile length");
size = read_be32(*data);
*data += 4;
+
+ if (size > data_end - *data)
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile size");
} else {
size = data_end - *data;
}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index daf4465..308bc6a 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -42,8 +42,6 @@
void vp9_entropy_mode_init();
void vp9_coef_tree_initialize();
-static void set_default_lf_deltas(struct loopfilter *lf);
-
#define DEFAULT_INTERP_FILTER SWITCHABLE
#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */
@@ -172,27 +170,15 @@
}
}
-static void setup_features(VP9_COMMON *cm) {
- struct loopfilter *const lf = &cm->lf;
+static void reset_segment_features(VP9_COMMON *cm) {
struct segmentation *const seg = &cm->seg;
// Set up default state for MB feature flags
seg->enabled = 0;
-
seg->update_map = 0;
seg->update_data = 0;
vpx_memset(seg->tree_probs, 255, sizeof(seg->tree_probs));
-
vp9_clearall_segfeatures(seg);
-
- lf->mode_ref_delta_enabled = 0;
- lf->mode_ref_delta_update = 0;
- vp9_zero(lf->ref_deltas);
- vp9_zero(lf->mode_deltas);
- vp9_zero(lf->last_ref_deltas);
- vp9_zero(lf->last_mode_deltas);
-
- set_default_lf_deltas(lf);
}
static void dealloc_compressor_data(VP9_COMP *cpi) {
@@ -489,23 +475,6 @@
}
}
-static void set_default_lf_deltas(struct loopfilter *lf) {
- lf->mode_ref_delta_enabled = 1;
- lf->mode_ref_delta_update = 1;
-
- vp9_zero(lf->ref_deltas);
- vp9_zero(lf->mode_deltas);
-
- // Test of ref frame deltas
- lf->ref_deltas[INTRA_FRAME] = 2;
- lf->ref_deltas[LAST_FRAME] = 0;
- lf->ref_deltas[GOLDEN_FRAME] = -2;
- lf->ref_deltas[ALTREF_FRAME] = -2;
-
- lf->mode_deltas[0] = 0; // Zero
- lf->mode_deltas[1] = 0; // New mv
-}
-
static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
SPEED_FEATURES *sf = &cpi->sf;
int i;
@@ -1219,7 +1188,7 @@
cm->refresh_frame_context = 1;
cm->reset_frame_context = 0;
- setup_features(cm);
+ reset_segment_features(cm);
set_high_precision_mv(cpi, 0);
{
@@ -2955,7 +2924,7 @@
if (frame_is_intra_only(cm)) {
vp9_setup_key_frame(cpi);
// Reset the loop filter deltas and segmentation map.
- setup_features(cm);
+ reset_segment_features(cm);
// If segmentation is enabled force a map update for key frames.
if (seg->enabled) {
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index c123c46..445198b 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -508,8 +508,9 @@
vpx_codec_alg_priv_t *ctx,
vpx_codec_frame_buffer_t *fb_list, int fb_count,
vpx_realloc_frame_buffer_cb_fn_t cb, void *user_priv) {
- if (fb_count < REF_FRAMES) {
- /* The application must pass in at least REF_FRAMES frame buffers. */
+ if (fb_count < (VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS)) {
+ /* The application must pass in at least VP9_MAXIMUM_REF_BUFFERS +
+ * VPX_MAXIMUM_WORK_BUFFERS frame buffers. */
return VPX_CODEC_INVALID_PARAM;
} else if (!ctx->pbi) {
/* If the decoder has already been initialized, do not accept external
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 5ab3256..6c644f0 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -238,8 +238,9 @@
* using external frame buffers.
*
* \note
- * When decoding VP9, the application must pass in at least 8 external
- * frame buffers, as VP9 can have up to 8 reference frames.
+ * When decoding VP9, the application must pass in at least
+ * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+ * buffers.
*/
typedef vpx_codec_err_t (*vpx_codec_set_frame_buffers_fn_t)(
vpx_codec_alg_priv_t *ctx,
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 30a74ea..24be82d 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -362,8 +362,9 @@
* using external frame buffers.
*
* \note
- * When decoding VP9, the application must pass in at least 8 external
- * frame buffers, as VP9 can have up to 8 reference frames.
+ * When decoding VP9, the application must pass in at least
+ * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame
+ * buffers.
*/
vpx_codec_err_t vpx_codec_set_frame_buffers(
vpx_codec_ctx_t *ctx,
diff --git a/vpx/vpx_external_frame_buffer.h b/vpx/vpx_external_frame_buffer.h
index 0b787b8..98ce5fd 100644
--- a/vpx/vpx_external_frame_buffer.h
+++ b/vpx/vpx_external_frame_buffer.h
@@ -17,6 +17,14 @@
#include "./vpx_integer.h"
+/*!\brief The maximum number of work buffers used by libvpx.
+ */
+#define VPX_MAXIMUM_WORK_BUFFERS 1
+
+/*!\brief The maximum number of reference buffers that a VP9 encoder may use.
+ */
+#define VP9_MAXIMUM_REF_BUFFERS 8
+
/*!\brief External frame buffer
*
* This structure is used to hold external frame buffers passed into the
diff --git a/vpx_scale/vpx_scale.h b/vpx_scale/vpx_scale.h
index 9ddf62e..43fcf9d 100644
--- a/vpx_scale/vpx_scale.h
+++ b/vpx_scale/vpx_scale.h
@@ -9,8 +9,8 @@
*/
-#ifndef VPXSCALE_H
-#define VPXSCALE_H
+#ifndef VPX_SCALE_VPX_SCALE_H_
+#define VPX_SCALE_VPX_SCALE_H_
#include "vpx_scale/yv12config.h"
@@ -24,4 +24,4 @@
unsigned int vratio,
unsigned int interlaced);
-#endif
+#endif // VPX_SCALE_VPX_SCALE_H_
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index f23e116..bf5fc07 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef YV12_CONFIG_H
-#define YV12_CONFIG_H
+#ifndef VPX_SCALE_YV12CONFIG_H_
+#define VPX_SCALE_YV12CONFIG_H_
#ifdef __cplusplus
extern "C" {
@@ -85,4 +85,4 @@
}
#endif
-#endif // YV12_CONFIG_H
+#endif // VPX_SCALE_YV12CONFIG_H_
diff --git a/y4minput.h b/y4minput.h
index b2a390c..615debe 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -10,8 +10,10 @@
* Based on code from the OggTheora software codec source code,
* Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
*/
-#if !defined(_y4minput_H)
-# define _y4minput_H (1)
+
+#ifndef Y4MINPUT_H_
+#define Y4MINPUT_H_
+
# include <stdio.h>
# include "vpx/vpx_image.h"
@@ -60,4 +62,4 @@
void y4m_input_close(y4m_input *_y4m);
int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);
-#endif
+#endif // Y4MINPUT_H_