Merge "Change for adding QP settings for key frames"
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index ce5f93f..4558aa1 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -156,6 +156,10 @@
objf=$(echo ${f%.*}.obj | sed -e 's/^[\./]\+//g' -e 's,/,_,g')
if ([ "$pat" == "asm" ] || [ "$pat" == "s" ]) && $asm_use_custom_step; then
+ # Avoid object file name collisions, i.e. vpx_config.c and
+ # vpx_config.asm produce the same object file without
+ # this additional suffix.
+ objf=${objf%.obj}_asm.obj
open_tag CustomBuild \
Include=".\\$f"
for plat in "${platforms[@]}"; do
diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c
index 29a266d..6ec1b62 100644
--- a/examples/vpx_temporal_scalable_patterns.c
+++ b/examples/vpx_temporal_scalable_patterns.c
@@ -52,6 +52,12 @@
double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
};
+// Note: these rate control metrics assume only 1 key frame in the
+// sequence (i.e., first frame only). So for temporal pattern# 7
+// (which has key frame for every frame on base layer), the metrics
+// computation will be off/wrong.
+// TODO(marpan): Update these metrics to account for multiple key frames
+// in the stream.
static void set_rate_control_metrics(struct RateControlMetrics *rc,
vpx_codec_enc_cfg_t *cfg) {
unsigned int i = 0;
@@ -565,6 +571,9 @@
}
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
+ // This controls the maximum target size of the key frame.
+ // For generating smaller key frames, use a smaller max_intra_size_pct
+ // value, like 100 or 200.
max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
* ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
diff --git a/test/cq_test.cc b/test/cq_test.cc
index a2c8291..7da7b80 100644
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -20,7 +20,7 @@
const int kCQLevelMin = 4;
const int kCQLevelMax = 63;
const int kCQLevelStep = 8;
-const int kCQTargetBitrate = 2000;
+const unsigned int kCQTargetBitrate = 2000;
class CQTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<int> {
@@ -66,17 +66,17 @@
return pow(10.0, avg_psnr / 10.0) / file_size_;
}
- int file_size() const { return file_size_; }
+ size_t file_size() const { return file_size_; }
int n_frames() const { return n_frames_; }
private:
int cq_level_;
- int file_size_;
+ size_t file_size_;
double psnr_;
int n_frames_;
};
-int prev_actual_bitrate = kCQTargetBitrate;
+unsigned int prev_actual_bitrate = kCQTargetBitrate;
TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
@@ -88,7 +88,8 @@
timebase.den, timebase.num, 0, 30);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const double cq_psnr_lin = GetLinearPSNROverBitrate();
- const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000);
+ const unsigned int cq_actual_bitrate =
+ static_cast<unsigned int>(file_size()) * 8 * 30 / (n_frames() * 1000);
EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate);
EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate);
prev_actual_bitrate = cq_actual_bitrate;
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index 8d115fa..cb5562e 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -512,6 +512,14 @@
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, Trans16x16DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct16x16_c,
+ &vp9_idct16x16_256_add_neon, 0)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16DCT,
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index a2608ac..013f451 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -248,6 +248,16 @@
make_tuple(&vp9_fdct32x32_c, &vp9_idct32x32_1024_add_c, 0),
make_tuple(&vp9_fdct32x32_rd_c, &vp9_idct32x32_1024_add_c, 1)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, Trans32x32Test,
+ ::testing::Values(
+ make_tuple(&vp9_fdct32x32_c,
+ &vp9_idct32x32_1024_add_neon, 0),
+ make_tuple(&vp9_fdct32x32_rd_c,
+ &vp9_idct32x32_1024_add_neon, 1)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans32x32Test,
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
index 30c20e9..4cd9efb 100644
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -16,8 +16,8 @@
namespace {
-const int kMaxErrorFrames = 8;
-const int kMaxDroppableFrames = 8;
+const int kMaxErrorFrames = 12;
+const int kMaxDroppableFrames = 12;
class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
@@ -175,6 +175,10 @@
}
}
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) a set of droppable
+// frames (i.e., frames that don't update any reference buffers).
+// Check both isolated and consecutive loss.
TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
@@ -186,14 +190,18 @@
init_flags_ = VPX_CODEC_USE_PSNR;
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- timebase.den, timebase.num, 0, 30);
+ timebase.den, timebase.num, 0, 40);
// Error resilient mode ON.
cfg_.g_error_resilient = 1;
+ cfg_.kf_mode = VPX_KF_DISABLED;
- // Set an arbitrary set of error frames same as droppable frames
- unsigned int num_droppable_frames = 2;
- unsigned int droppable_frame_list[] = {5, 16};
+ // Set an arbitrary set of error frames same as droppable frames.
+ // In addition to isolated loss/drop, add a long consecutive series
+ // (of size 9) of dropped frames.
+ unsigned int num_droppable_frames = 11;
+ unsigned int droppable_frame_list[] = {5, 16, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30};
SetDroppableFrames(num_droppable_frames, droppable_frame_list);
SetErrorFrames(num_droppable_frames, droppable_frame_list);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -202,7 +210,7 @@
<< GetMismatchFrames() << "\n";
EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
- // reset previously set error/droppable frames
+ // Reset previously set of error/droppable frames.
Reset();
#if 0
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index dc66687..127775c 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -286,6 +286,21 @@
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, Trans4x4DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct4x4_c,
+ &vp9_idct4x4_16_add_neon, 0)));
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_NEON, Trans4x4HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2),
+ make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 98aabe6..6f2d7d1 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -313,6 +313,20 @@
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, FwdTrans8x8DCT,
+ ::testing::Values(
+ make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_neon, 0)));
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_NEON, FwdTrans8x8HT,
+ ::testing::Values(
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2),
+ make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index 2a32410..8849ce6 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -140,6 +140,30 @@
make_tuple(&vp9_idct4x4_16_add_c,
&vp9_idct4x4_1_add_c,
TX_4X4, 1)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, PartialIDctTest,
+ ::testing::Values(
+ make_tuple(&vp9_idct32x32_1024_add_c,
+ &vp9_idct32x32_1_add_neon,
+ TX_32X32, 1),
+ make_tuple(&vp9_idct16x16_256_add_c,
+ &vp9_idct16x16_10_add_neon,
+ TX_16X16, 10),
+ make_tuple(&vp9_idct16x16_256_add_c,
+ &vp9_idct16x16_1_add_neon,
+ TX_16X16, 1),
+ make_tuple(&vp9_idct8x8_64_add_c,
+ &vp9_idct8x8_10_add_neon,
+ TX_8X8, 10),
+ make_tuple(&vp9_idct8x8_64_add_c,
+ &vp9_idct8x8_1_add_neon,
+ TX_8X8, 1),
+ make_tuple(&vp9_idct4x4_16_add_c,
+ &vp9_idct4x4_1_add_neon,
+ TX_4X4, 1)));
+#endif
+
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, PartialIDctTest,
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index 3434662..1b2f03f 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -193,6 +193,16 @@
make_tuple(8, 8, sixtap_8x8_c),
make_tuple(8, 4, sixtap_8x4_c),
make_tuple(4, 4, sixtap_4x4_c)));
+#if HAVE_NEON
+const sixtap_predict_fn_t sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
+const sixtap_predict_fn_t sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
+const sixtap_predict_fn_t sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
+INSTANTIATE_TEST_CASE_P(
+ NEON, SixtapPredictTest, ::testing::Values(
+ make_tuple(16, 16, sixtap_16x16_neon),
+ make_tuple(8, 8, sixtap_8x8_neon),
+ make_tuple(8, 4, sixtap_8x4_neon)));
+#endif
#if HAVE_MMX
const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 9e242a2..3efb955 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -105,6 +105,11 @@
INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
::testing::Values(vp8_subtract_b_c));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
+ ::testing::Values(vp8_subtract_b_neon));
+#endif
+
#if HAVE_MMX
INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
::testing::Values(vp8_subtract_b_mmx));
diff --git a/test/variance_test.cc b/test/variance_test.cc
index b9144ff..c9bf13a 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -307,6 +307,19 @@
make_tuple(4, 3, variance16x8_c),
make_tuple(4, 4, variance16x16_c)));
+#if HAVE_NEON
+const vp8_variance_fn_t variance8x8_neon = vp8_variance8x8_neon;
+const vp8_variance_fn_t variance8x16_neon = vp8_variance8x16_neon;
+const vp8_variance_fn_t variance16x8_neon = vp8_variance16x8_neon;
+const vp8_variance_fn_t variance16x16_neon = vp8_variance16x16_neon;
+INSTANTIATE_TEST_CASE_P(
+ NEON, VP8VarianceTest,
+ ::testing::Values(make_tuple(3, 3, variance8x8_neon),
+ make_tuple(3, 4, variance8x16_neon),
+ make_tuple(4, 3, variance16x8_neon),
+ make_tuple(4, 4, variance16x16_neon)));
+#endif
+
#if HAVE_MMX
const vp8_variance_fn_t variance4x4_mmx = vp8_variance4x4_mmx;
const vp8_variance_fn_t variance8x8_mmx = vp8_variance8x8_mmx;
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
deleted file mode 100644
index 79ff02c..0000000
--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dc_only_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
-; int pred_stride, unsigned char *dst_ptr,
-; int dst_stride)
-
-; r0 input_dc
-; r1 pred_ptr
-; r2 pred_stride
-; r3 dst_ptr
-; sp dst_stride
-
-|vp8_dc_only_idct_add_neon| PROC
- add r0, r0, #4
- asr r0, r0, #3
- ldr r12, [sp]
- vdup.16 q0, r0
-
- vld1.32 {d2[0]}, [r1], r2
- vld1.32 {d2[1]}, [r1], r2
- vld1.32 {d4[0]}, [r1], r2
- vld1.32 {d4[1]}, [r1]
-
- vaddw.u8 q1, q0, d2
- vaddw.u8 q2, q0, d4
-
- vqmovun.s16 d2, q1
- vqmovun.s16 d4, q2
-
- vst1.32 {d2[0]}, [r3], r12
- vst1.32 {d2[1]}, [r3], r12
- vst1.32 {d4[0]}, [r3], r12
- vst1.32 {d4[1]}, [r3]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.c b/vp8/common/arm/neon/dc_only_idct_add_neon.c
new file mode 100644
index 0000000..ad5f41d
--- /dev/null
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dc_only_idct_add_neon(
+ int16_t input_dc,
+ unsigned char *pred_ptr,
+ int pred_stride,
+ unsigned char *dst_ptr,
+ int dst_stride) {
+ int i;
+ uint16_t a1 = ((input_dc + 4) >> 3);
+ uint32x2_t d2u32 = vdup_n_u32(0);
+ uint8x8_t d2u8;
+ uint16x8_t q1u16;
+ uint16x8_t qAdd;
+
+ qAdd = vdupq_n_u16(a1);
+
+ for (i = 0; i < 2; i++) {
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
+ pred_ptr += pred_stride;
+ d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
+ pred_ptr += pred_stride;
+
+ q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
+ d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+ dst_ptr += dst_stride;
+ vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+ dst_ptr += dst_stride;
+ }
+}
diff --git a/vp8/common/arm/neon/dequant_idct_neon.asm b/vp8/common/arm/neon/dequant_idct_neon.asm
deleted file mode 100644
index 602cce6..0000000
--- a/vp8/common/arm/neon/dequant_idct_neon.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequant_idct_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_add_neon(short *input, short *dq,
-; unsigned char *dest, int stride)
-; r0 short *input,
-; r1 short *dq,
-; r2 unsigned char *dest
-; r3 int stride
-
-|vp8_dequant_idct_add_neon| PROC
- vld1.16 {q3, q4}, [r0]
- vld1.16 {q5, q6}, [r1]
-
- add r1, r2, r3 ; r1 = dest + stride
- lsl r3, #1 ; 2x stride
-
- vld1.32 {d14[0]}, [r2], r3
- vld1.32 {d14[1]}, [r1], r3
- vld1.32 {d15[0]}, [r2]
- vld1.32 {d15[1]}, [r1]
-
- adr r12, cospi8sqrt2minus1 ; pointer to the first constant
-
- vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
- vmul.i16 q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
- vld1.16 {d0}, [r12]
- vswp d3, d4 ;q2(vp[4] vp[12])
-
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
-; memset(input, 0, 32) -- 32bytes
- vmov.i16 q14, #0
-
- vswp d3, d4
- vqdmulh.s16 q3, q2, d0[2]
- vqdmulh.s16 q4, q2, d0[0]
-
- vqadd.s16 d12, d2, d3 ;a1
- vqsub.s16 d13, d2, d3 ;b1
-
- vmov q15, q14
-
- vshr.s16 q3, q3, #1
- vshr.s16 q4, q4, #1
-
- vqadd.s16 q3, q3, q2
- vqadd.s16 q4, q4, q2
-
- vqsub.s16 d10, d6, d9 ;c1
- vqadd.s16 d11, d7, d8 ;d1
-
- vqadd.s16 d2, d12, d11
- vqadd.s16 d3, d13, d10
- vqsub.s16 d4, d13, d10
- vqsub.s16 d5, d12, d11
-
- vst1.16 {q14, q15}, [r0]
-
- vrshr.s16 d2, d2, #3
- vrshr.s16 d3, d3, #3
- vrshr.s16 d4, d4, #3
- vrshr.s16 d5, d5, #3
-
- vtrn.32 d2, d4
- vtrn.32 d3, d5
- vtrn.16 d2, d3
- vtrn.16 d4, d5
-
- vaddw.u8 q1, q1, d14
- vaddw.u8 q2, q2, d15
-
- sub r2, r2, r3
- sub r1, r1, r3
-
- vqmovun.s16 d0, q1
- vqmovun.s16 d1, q2
-
- vst1.32 {d0[0]}, [r2], r3
- vst1.32 {d0[1]}, [r1], r3
- vst1.32 {d1[0]}, [r2]
- vst1.32 {d1[1]}, [r1]
-
- bx lr
-
- ENDP ; |vp8_dequant_idct_add_neon|
-
-; Constant Pool
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2 DCD 0x8a8c8a8c
-
- END
diff --git a/vp8/common/arm/neon/dequant_idct_neon.c b/vp8/common/arm/neon/dequant_idct_neon.c
new file mode 100644
index 0000000..58e1192
--- /dev/null
+++ b/vp8/common/arm/neon/dequant_idct_neon.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2 = 35468;
+
+void vp8_dequant_idct_add_neon(
+ int16_t *input,
+ int16_t *dq,
+ unsigned char *dst,
+ int stride) {
+ unsigned char *dst0;
+ int32x2_t d14, d15;
+ int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+ int16x8_t q1, q2, q3, q4, q5, q6;
+ int16x8_t qEmpty = vdupq_n_s16(0);
+ int32x2x2_t d2tmp0, d2tmp1;
+ int16x4x2_t d2tmp2, d2tmp3;
+
+ d14 = d15 = vdup_n_s32(0);
+
+ // load input
+ q3 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+ input += 8;
+ q4 = vld1q_s16(input);
+ vst1q_s16(input, qEmpty);
+
+ // load dq
+ q5 = vld1q_s16(dq);
+ dq += 8;
+ q6 = vld1q_s16(dq);
+
+ // load src from dst
+ dst0 = dst;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
+
+ q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
+ vreinterpretq_u16_s16(q5)));
+ q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
+ vreinterpretq_u16_s16(q6)));
+
+ d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
+ d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
+
+ q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ // loop 2
+ q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
+
+ q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+ q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+ d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
+ d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
+
+ q3 = vshrq_n_s16(q3, 1);
+ q4 = vshrq_n_s16(q4, 1);
+
+ q3 = vqaddq_s16(q3, q2);
+ q4 = vqaddq_s16(q4, q2);
+
+ d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+ d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+ d2 = vqadd_s16(d12, d11);
+ d3 = vqadd_s16(d13, d10);
+ d4 = vqsub_s16(d13, d10);
+ d5 = vqsub_s16(d12, d11);
+
+ d2 = vrshr_n_s16(d2, 3);
+ d3 = vrshr_n_s16(d3, 3);
+ d4 = vrshr_n_s16(d4, 3);
+ d5 = vrshr_n_s16(d5, 3);
+
+ d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+ d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+ d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+ vreinterpret_s16_s32(d2tmp1.val[0]));
+ d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+ vreinterpret_s16_s32(d2tmp1.val[1]));
+
+ q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
+ q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
+
+ q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
+ vreinterpret_u8_s32(d14)));
+ q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
+ vreinterpret_u8_s32(d15)));
+
+ d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
+ d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
+
+ dst0 = dst;
+ vst1_lane_s32((int32_t *)dst0, d14, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d14, 1);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 0);
+ dst0 += stride;
+ vst1_lane_s32((int32_t *)dst0, d15, 1);
+ return;
+}
diff --git a/vp8/common/arm/neon/dequantizeb_neon.asm b/vp8/common/arm/neon/dequantizeb_neon.asm
deleted file mode 100644
index c8e0c31..0000000
--- a/vp8/common/arm/neon/dequantizeb_neon.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_dequantize_b_loop_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-; r0 short *Q,
-; r1 short *DQC
-; r2 short *DQ
-|vp8_dequantize_b_loop_neon| PROC
- vld1.16 {q0, q1}, [r0]
- vld1.16 {q2, q3}, [r1]
-
- vmul.i16 q4, q0, q2
- vmul.i16 q5, q1, q3
-
- vst1.16 {q4, q5}, [r2]
-
- bx lr
-
- ENDP
-
- END
diff --git a/vp8/common/arm/neon/dequantizeb_neon.c b/vp8/common/arm/neon/dequantizeb_neon.c
new file mode 100644
index 0000000..60f69c8
--- /dev/null
+++ b/vp8/common/arm/neon/dequantizeb_neon.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dequantize_b_loop_neon(
+ int16_t *Q,
+ int16_t *DQC,
+ int16_t *DQ) {
+ int16x8x2_t qQ, qDQC, qDQ;
+
+ qQ = vld2q_s16(Q);
+ qDQC = vld2q_s16(DQC);
+
+ qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
+ qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
+
+ vst2q_s16(DQ, qDQ);
+ return;
+}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7f39646..849a0ed 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -19,7 +19,7 @@
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "psnr.h"
+#include "vpx/internal/vpx_psnr.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
@@ -2170,10 +2170,12 @@
8.0 / 1000.0 / time_encoded;
double samples = 3.0 / 2 * cpi->frames_in_layer[i] *
lst_yv12->y_width * lst_yv12->y_height;
- double total_psnr = vp8_mse2psnr(samples, 255.0,
- cpi->total_error2[i]);
- double total_psnr2 = vp8_mse2psnr(samples, 255.0,
- cpi->total_error2_p[i]);
+ double total_psnr =
+ vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_error2[i]);
+ double total_psnr2 =
+ vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_error2_p[i]);
double total_ssim = 100 * pow(cpi->sum_ssim[i] /
cpi->sum_weights[i], 8.0);
@@ -2190,9 +2192,9 @@
{
double samples = 3.0 / 2 * cpi->count *
lst_yv12->y_width * lst_yv12->y_height;
- double total_psnr = vp8_mse2psnr(samples, 255.0,
- cpi->total_sq_error);
- double total_psnr2 = vp8_mse2psnr(samples, 255.0,
+ double total_psnr = vpx_sse_to_psnr(samples, 255.0,
+ cpi->total_sq_error);
+ double total_psnr2 = vpx_sse_to_psnr(samples, 255.0,
cpi->total_sq_error2);
double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
@@ -2522,8 +2524,8 @@
pkt.data.psnr.samples[3] = width * height;
for (i = 0; i < 4; i++)
- pkt.data.psnr.psnr[i] = vp8_mse2psnr(pkt.data.psnr.samples[i], 255.0,
- (double)(pkt.data.psnr.sse[i]));
+ pkt.data.psnr.psnr[i] = vpx_sse_to_psnr(pkt.data.psnr.samples[i], 255.0,
+ (double)(pkt.data.psnr.sse[i]));
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
@@ -5284,11 +5286,11 @@
sq_error = (double)(ye + ue + ve);
- frame_psnr = vp8_mse2psnr(t_samples, 255.0, sq_error);
+ frame_psnr = vpx_sse_to_psnr(t_samples, 255.0, sq_error);
- cpi->total_y += vp8_mse2psnr(y_samples, 255.0, (double)ye);
- cpi->total_u += vp8_mse2psnr(uv_samples, 255.0, (double)ue);
- cpi->total_v += vp8_mse2psnr(uv_samples, 255.0, (double)ve);
+ cpi->total_y += vpx_sse_to_psnr(y_samples, 255.0, (double)ye);
+ cpi->total_u += vpx_sse_to_psnr(uv_samples, 255.0, (double)ue);
+ cpi->total_v += vpx_sse_to_psnr(uv_samples, 255.0, (double)ve);
cpi->total_sq_error += sq_error;
cpi->total += frame_psnr;
#if CONFIG_POSTPROC
@@ -5311,14 +5313,14 @@
sq_error2 = (double)(ye + ue + ve);
- frame_psnr2 = vp8_mse2psnr(t_samples, 255.0, sq_error2);
+ frame_psnr2 = vpx_sse_to_psnr(t_samples, 255.0, sq_error2);
- cpi->totalp_y += vp8_mse2psnr(y_samples,
- 255.0, (double)ye);
- cpi->totalp_u += vp8_mse2psnr(uv_samples,
- 255.0, (double)ue);
- cpi->totalp_v += vp8_mse2psnr(uv_samples,
- 255.0, (double)ve);
+ cpi->totalp_y += vpx_sse_to_psnr(y_samples,
+ 255.0, (double)ye);
+ cpi->totalp_u += vpx_sse_to_psnr(uv_samples,
+ 255.0, (double)ue);
+ cpi->totalp_v += vpx_sse_to_psnr(uv_samples,
+ 255.0, (double)ve);
cpi->total_sq_error2 += sq_error2;
cpi->totalp += frame_psnr2;
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
deleted file mode 100644
index b3a3d95..0000000
--- a/vp8/encoder/psnr.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_scale/yv12config.h"
-#include "math.h"
-#include "vp8/common/systemdependent.h" /* for vp8_clear_system_state() */
-
-#define MAX_PSNR 100
-
-double vp8_mse2psnr(double Samples, double Peak, double Mse)
-{
- double psnr;
-
- if ((double)Mse > 0.0)
- psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
- else
- psnr = MAX_PSNR; /* Limit to prevent / 0 */
-
- if (psnr > MAX_PSNR)
- psnr = MAX_PSNR;
-
- return psnr;
-}
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
deleted file mode 100644
index 0c6c088..0000000
--- a/vp8/encoder/psnr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP8_ENCODER_PSNR_H_
-#define VP8_ENCODER_PSNR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern double vp8_mse2psnr(double Samples, double Peak, double Mse);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP8_ENCODER_PSNR_H_
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 7e3af71..513b2bf 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -16,7 +16,6 @@
#include "vp8/common/alloccommon.h"
#include "mcomp.h"
#include "firstpass.h"
-#include "psnr.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/extend.h"
#include "ratectrl.h"
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index d1eb445..ac91d7a 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -159,7 +159,6 @@
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
# common (neon)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfilter_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
@@ -174,10 +173,8 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict16x16_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/save_reg_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon$(ASM)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance8x8_neon$(ASM)
@@ -187,6 +184,9 @@
# common (neon intrinsics)
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c
+VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index cd091f3..d7c6dd1 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -50,7 +50,6 @@
VP8_CX_SRCS-yes += encoder/modecosts.h
VP8_CX_SRCS-yes += encoder/onyx_int.h
VP8_CX_SRCS-yes += encoder/pickinter.h
-VP8_CX_SRCS-yes += encoder/psnr.h
VP8_CX_SRCS-yes += encoder/quantize.h
VP8_CX_SRCS-yes += encoder/ratectrl.h
VP8_CX_SRCS-yes += encoder/rdopt.h
@@ -61,7 +60,6 @@
VP8_CX_SRCS-yes += encoder/onyx_if.c
VP8_CX_SRCS-yes += encoder/pickinter.c
VP8_CX_SRCS-yes += encoder/picklpf.c
-VP8_CX_SRCS-yes += encoder/psnr.c
VP8_CX_SRCS-yes += encoder/quantize.c
VP8_CX_SRCS-yes += encoder/ratectrl.c
VP8_CX_SRCS-yes += encoder/rdopt.c
diff --git a/vp9/common/generic/vp9_systemdependent.c b/vp9/common/generic/vp9_systemdependent.c
deleted file mode 100644
index 536febb..0000000
--- a/vp9/common/generic/vp9_systemdependent.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_onyxc_int.h"
-
-void vp9_machine_specific_config(VP9_COMMON *cm) {
- (void)cm;
- vp9_rtcd();
-}
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 6f77199..ff4b7c1 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -201,7 +201,7 @@
}
void vp9_create_common(VP9_COMMON *cm) {
- vp9_machine_specific_config(cm);
+ vp9_rtcd();
}
void vp9_remove_common(VP9_COMMON *cm) {
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 96ba3e4..71a41a9 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -382,34 +382,34 @@
/* slower path if the block needs border extension */
if (x0 + 2 * bs <= frame_width) {
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1);
+ vpx_memcpy(above_row, above_ref, 2 * bs);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 + bs <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, bs + 1);
+ vpx_memcpy(above_row, above_ref, bs);
vpx_memset(above_row + bs, above_row[bs - 1], bs);
}
} else if (x0 <= frame_width) {
const int r = frame_width - x0;
if (right_available && bs == 4) {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
} else {
- vpx_memcpy(above_row - 1, above_ref - 1, r + 1);
+ vpx_memcpy(above_row, above_ref, r);
vpx_memset(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
}
- above_row[-1] = left_available ? above_ref[-1] : 129;
}
+ above_row[-1] = left_available ? above_ref[-1] : 129;
} else {
/* faster path if the block does not need extension */
if (bs == 4 && right_available && left_available) {
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index 7455abc..72edbca5 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -76,9 +76,6 @@
}
#endif
-struct VP9Common;
-void vp9_machine_specific_config(struct VP9Common *cm);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a9b51e0..7b15d0a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -94,7 +94,8 @@
128, 128, 128, 128, 128, 128, 128, 128
};
-static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
+static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
+ MACROBLOCK *x,
BLOCK_SIZE bs) {
unsigned int var, sse;
var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
@@ -102,19 +103,49 @@
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
-static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi) {
- unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
- if (var < 256)
- return BLOCK_64X64;
- else
- return BLOCK_32X32;
+static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
+ MACROBLOCK *x,
+ int mi_row,
+ int mi_col,
+ BLOCK_SIZE bs) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ int offset = (mi_row * MI_SIZE) * yv12->y_stride + (mi_col * MI_SIZE);
+ unsigned int var, sse;
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
+ x->plane[0].src.stride,
+ yv12->y_buffer + offset,
+ yv12->y_stride,
+ &sse);
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
-static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi) {
- unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
- if (var < 1024)
+static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
+ int mi_row,
+ int mi_col) {
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ mi_row, mi_col,
+ BLOCK_64X64);
+ if (var < 8)
+ return BLOCK_64X64;
+ else if (var < 128)
return BLOCK_32X32;
- else if (var < 4096)
+ else if (var < 2048)
+ return BLOCK_16X16;
+ else
+ return BLOCK_8X8;
+}
+
+static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
+ int mi_row,
+ int mi_col) {
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ mi_row, mi_col,
+ BLOCK_64X64);
+ if (var < 8)
+ return BLOCK_64X64;
+ else if (var < 64)
+ return BLOCK_32X32;
+ else if (var < 2048)
return BLOCK_16X16;
else
return BLOCK_8X8;
@@ -1230,10 +1261,13 @@
PARTITION_CONTEXT sl[8], sa[8];
int last_part_rate = INT_MAX;
int64_t last_part_dist = INT64_MAX;
+ int64_t last_part_rd = INT64_MAX;
int none_rate = INT_MAX;
int64_t none_dist = INT64_MAX;
+ int64_t none_rd = INT64_MAX;
int chosen_rate = INT_MAX;
int64_t chosen_dist = INT64_MAX;
+ int64_t chosen_rd = INT64_MAX;
BLOCK_SIZE sub_subsize = BLOCK_4X4;
int splits_below = 0;
BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
@@ -1262,7 +1296,8 @@
x->mb_energy = vp9_block_energy(cpi, x, bsize);
}
- if (cpi->sf.adjust_partitioning_from_last_frame) {
+ if (cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ cpi->sf.adjust_partitioning_from_last_frame) {
// Check if any of the sub blocks are further split.
if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
@@ -1288,7 +1323,11 @@
pl = partition_plane_context(cpi->above_seg_context,
cpi->left_seg_context,
mi_row, mi_col, bsize);
- none_rate += x->partition_cost[pl][PARTITION_NONE];
+
+ if (none_rate < INT_MAX) {
+ none_rate += x->partition_cost[pl][PARTITION_NONE];
+ none_rd = RDCOST(x->rdmult, x->rddiv, none_rate, none_dist);
+ }
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
mi_8x8[0]->mbmi.sb_type = bs_type;
@@ -1383,10 +1422,13 @@
pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
mi_row, mi_col, bsize);
- if (last_part_rate < INT_MAX)
+ if (last_part_rate < INT_MAX) {
last_part_rate += x->partition_cost[pl][partition];
+ last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist);
+ }
if (cpi->sf.adjust_partitioning_from_last_frame
+ && cpi->sf.partition_search_type == SEARCH_PARTITION
&& partition != PARTITION_SPLIT && bsize > BLOCK_8X8
&& (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
&& (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
@@ -1442,21 +1484,21 @@
mi_row, mi_col, bsize);
if (chosen_rate < INT_MAX) {
chosen_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist);
}
}
// If last_part is better set the partitioning to that...
- if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
- < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
+ if (last_part_rd < chosen_rd) {
mi_8x8[0]->mbmi.sb_type = bsize;
if (bsize >= BLOCK_8X8)
*(get_sb_partitioning(x, bsize)) = subsize;
chosen_rate = last_part_rate;
chosen_dist = last_part_dist;
+ chosen_rd = last_part_rd;
}
// If none was better set the partitioning to that...
- if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
- > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
+ if (none_rd < chosen_rd) {
if (bsize >= BLOCK_8X8)
*(get_sb_partitioning(x, bsize)) = bsize;
chosen_rate = none_rate;
@@ -1986,7 +2028,7 @@
// map to the same thing.
BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- bsize = get_rd_var_based_fixed_partition(cpi);
+ bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
@@ -2304,8 +2346,8 @@
int bw = num_8x8_blocks_wide_lookup[bsize];
int bh = num_8x8_blocks_high_lookup[bsize];
- int brate;
- int64_t bdist;
+ int brate = 0;
+ int64_t bdist = 0;
*rate = 0;
*dist = 0;
@@ -2368,7 +2410,9 @@
// TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
// Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
// map to the same thing.
- BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi);
+ BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi,
+ mi_row,
+ mi_col);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
bsize, &dummy_rate, &dummy_dist, 1);
} else {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 8cb2843..b3964ad 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -105,10 +105,9 @@
return pt;
}
-static void optimize_b(MACROBLOCK *mb,
- int plane, int block, BLOCK_SIZE plane_bsize,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- TX_SIZE tx_size) {
+static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, MACROBLOCK *mb,
+ struct optimize_ctx *ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
struct macroblock_plane *p = &mb->plane[plane];
struct macroblockd_plane *pd = &xd->plane[plane];
@@ -134,6 +133,11 @@
const scan_order *so = get_scan(xd, tx_size, type, block);
const int16_t *scan = so->scan;
const int16_t *nb = so->neighbors;
+ ENTROPY_CONTEXT *a, *l;
+ int tx_x, tx_y;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &tx_x, &tx_y);
+ a = &ctx->ta[plane][tx_x];
+ l = &ctx->tl[plane][tx_y];
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
@@ -307,27 +311,15 @@
*a = *l = (final_eob > 0);
}
-void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
- int x, y;
- txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
- optimize_b(mb, plane, block, plane_bsize,
- &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
-}
-
static void optimize_init_b(int plane, BLOCK_SIZE bsize,
struct encode_b_args *args) {
const MACROBLOCKD *xd = &args->x->e_mbd;
const struct macroblockd_plane* const pd = &xd->plane[plane];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
- vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
- pd->above_context, pd->left_context,
- num_4x4_w, num_4x4_h);
+ vp9_get_entropy_contexts(bsize, tx_size, pd,
+ args->ctx->ta[plane], args->ctx->tl[plane]);
}
static INLINE void fdct32x32(int rd_transform,
@@ -419,7 +411,7 @@
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
+ optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
} else {
ctx->ta[plane][i] = p->eobs[block] > 0;
ctx->tl[plane][j] = p->eobs[block] > 0;
@@ -453,8 +445,7 @@
}
static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
- struct encode_b_args *const args = arg;
- MACROBLOCK *const x = args->x;
+ MACROBLOCK *const x = (MACROBLOCK *)arg;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -466,24 +457,14 @@
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
- if (p->eobs[block] == 0)
- return;
-
- xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ if (p->eobs[block] > 0)
+ xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
}
-void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- struct optimize_ctx ctx;
- MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
- struct encode_b_args arg = {x, &ctx, &mbmi->skip};
-
+void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
vp9_subtract_plane(x, bsize, 0);
- if (x->optimize)
- optimize_init_b(0, bsize, &arg);
-
- vp9_foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1,
- &arg);
+ vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
+ encode_block_pass1, x);
}
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
@@ -533,7 +514,7 @@
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
// if (x->optimize)
- // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
+ // optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
switch (tx_size) {
case TX_32X32:
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index a61f776..dcf6e87 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -21,7 +21,7 @@
#endif
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
-void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index af710a8..be6abc2 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -224,18 +224,11 @@
}
}
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h) {
- vp9_clear_system_state();
- vp9_cost_tokens(mvjoint, mvctx->joints, vp9_mv_joint_tree);
- if (mvc_flag_v)
- build_nmv_component_cost_table(mvcost[0], &mvctx->comps[0], usehp);
- if (mvc_flag_h)
- build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* ctx, int usehp) {
+ vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree);
+ build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
+ build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
}
static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound,
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index f0463bbd..7f997ff 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -25,12 +25,8 @@
void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref,
const nmv_context* mvctx, int usehp);
-void vp9_build_nmv_cost_table(int *mvjoint,
- int *mvcost[2],
- const nmv_context* const mvctx,
- int usehp,
- int mvc_flag_v,
- int mvc_flag_h);
+void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context* mvctx, int usehp);
void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index ddb901d..32ed969 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -675,7 +675,7 @@
xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE;
vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
- vp9_encode_sby(x, bsize);
+ vp9_encode_sby_pass1(x, bsize);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 33f588f..8f7a2bc 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -14,6 +14,8 @@
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
+#include "vpx/internal/vpx_psnr.h"
+#include "vpx_ports/vpx_timer.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_filter.h"
@@ -30,7 +32,6 @@
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_picklpf.h"
-#include "vp9/encoder/vp9_psnr.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
@@ -38,8 +39,6 @@
#include "vp9/encoder/vp9_vaq.h"
#include "vp9/encoder/vp9_resize.h"
-#include "vpx_ports/vpx_timer.h"
-
void vp9_entropy_mode_init();
void vp9_coef_tree_initialize();
@@ -491,18 +490,6 @@
sf->thresh_mult[THR_D207_PRED] += 2500;
sf->thresh_mult[THR_D63_PRED] += 2500;
- // disable using golden frame modes if golden frames are not being used
- if (cpi->rc.frames_till_gf_update_due == INT_MAX) {
- sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
- sf->thresh_mult[THR_ZEROG ] = INT_MAX;
- sf->thresh_mult[THR_NEARG ] = INT_MAX;
- sf->thresh_mult[THR_NEWG ] = INT_MAX;
- sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
- }
-
/* disable frame modes if flags not set */
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
sf->thresh_mult[THR_NEWMV ] = INT_MAX;
@@ -852,6 +839,9 @@
sf->adaptive_rd_thresh = 5;
sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+ sf->adjust_partitioning_from_last_frame =
+ cm->last_frame_type == KEY_FRAME || (0 ==
+ (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency);
sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
@@ -859,7 +849,6 @@
}
sf->frame_parameter_update = 0;
sf->encode_breakout_thresh = 1000;
-
sf->search_method = FAST_HEX;
}
if (speed >= 6) {
@@ -2043,11 +2032,11 @@
if (cpi->b_calculate_psnr) {
const double total_psnr =
- vp9_mse2psnr((double)cpi->total_samples, 255.0,
- (double)cpi->total_sq_error);
+ vpx_sse_to_psnr((double)cpi->total_samples, 255.0,
+ (double)cpi->total_sq_error);
const double totalp_psnr =
- vp9_mse2psnr((double)cpi->totalp_samples, 255.0,
- (double)cpi->totalp_sq_error);
+ vpx_sse_to_psnr((double)cpi->totalp_samples, 255.0,
+ (double)cpi->totalp_sq_error);
const double total_ssim = 100 * pow(cpi->summed_quality /
cpi->summed_weights, 8.0);
const double totalp_ssim = 100 * pow(cpi->summedp_quality /
@@ -2228,7 +2217,7 @@
w, h);
psnr->sse[1 + i] = sse;
psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vp9_mse2psnr(samples, 255.0, (double) sse);
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse);
total_sse += sse;
total_samples += samples;
@@ -2236,7 +2225,8 @@
psnr->sse[0] = total_sse;
psnr->samples[0] = total_samples;
- psnr->psnr[0] = vp9_mse2psnr((double)total_samples, 255.0, (double)total_sse);
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, 255.0,
+ (double)total_sse);
}
static void generate_psnr_packet(VP9_COMP *cpi) {
@@ -3072,6 +3062,9 @@
if (cpi->gold_is_last)
cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+ if (cpi->rc.frames_till_gf_update_due == INT_MAX)
+ cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+
if (cpi->alt_is_last)
cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 383d927..0d0dc0c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -183,9 +183,11 @@
BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
- MB_PREDICTION_MODE this_mode;
- MV_REFERENCE_FRAME ref_frame;
+ MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
+ MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
@@ -240,6 +242,8 @@
clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
+ mbmi->ref_frame[0] = ref_frame;
+
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate = cost[INTER_OFFSET(this_mode)];
int64_t dist;
@@ -253,25 +257,32 @@
continue;
}
- dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+ mbmi->mode = this_mode;
+ mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+
+ dist = cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, INT_MAX);
+
this_rd = rate + dist;
if (this_rd < best_rd) {
best_rd = this_rd;
- mbmi->mode = this_mode;
- mbmi->ref_frame[0] = ref_frame;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
- xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
- mbmi->uv_mode = this_mode;
+ best_mode = this_mode;
+ best_ref_frame = ref_frame;
}
}
}
+ mbmi->mode = best_mode;
+ mbmi->ref_frame[0] = best_ref_frame;
+ mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rd > inter_mode_thresh) {
- struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &xd->plane[0];
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
mbmi->tx_size, this_mode,
diff --git a/vp9/encoder/vp9_psnr.c b/vp9/encoder/vp9_psnr.c
deleted file mode 100644
index 58294e1..0000000
--- a/vp9/encoder/vp9_psnr.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-
-#include "vpx_scale/yv12config.h"
-
-#define MAX_PSNR 100
-
-double vp9_mse2psnr(double samples, double peak, double mse) {
- double psnr;
-
- if (mse > 0.0)
- psnr = 10.0 * log10(peak * peak * samples / mse);
- else
- psnr = MAX_PSNR; // Limit to prevent / 0
-
- if (psnr > MAX_PSNR)
- psnr = MAX_PSNR;
-
- return psnr;
-}
diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h
deleted file mode 100644
index ffe00ed..0000000
--- a/vp9/encoder/vp9_psnr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_PSNR_H_
-#define VP9_ENCODER_VP9_PSNR_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-double vp9_mse2psnr(double samples, double peak, double mse);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_PSNR_H_
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index f3c5684..dc6c118 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -958,17 +958,10 @@
}
// Clip the active best and worst quality values to limits.
- if (active_worst_quality > rc->worst_quality)
- active_worst_quality = rc->worst_quality;
-
- if (active_best_quality < rc->best_quality)
- active_best_quality = rc->best_quality;
-
- if (active_best_quality > rc->worst_quality)
- active_best_quality = rc->worst_quality;
-
- if (active_worst_quality < active_best_quality)
- active_worst_quality = active_best_quality;
+ active_best_quality = clamp(active_best_quality,
+ rc->best_quality, rc->worst_quality);
+ active_worst_quality = clamp(active_worst_quality,
+ active_best_quality, rc->worst_quality);
*top_index = active_worst_quality;
*bottom_index = active_best_quality;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 753cd7c..76d7777 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -310,8 +310,7 @@
vp9_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
- &cm->fc.nmvc,
- cm->allow_high_precision_mv, 1, 1);
+ &cm->fc.nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp9_cost_tokens((int *)x->inter_mode_cost[i],
@@ -565,18 +564,16 @@
const PLANE_TYPE type = pd->plane_type;
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = p->eobs[block];
- const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
- const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
+ const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- x->token_costs[tx_size][type][ref];
- const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
+ x->token_costs[tx_size][type][is_inter_block(mbmi)];
uint8_t *p_tok = x->token_cache;
- int pt = combine_entropy_contexts(above_ec, left_ec);
+ int pt = combine_entropy_contexts(*A, *L);
int c, cost;
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
- : get_uv_tx_size(mbmi) == tx_size);
+ : get_uv_tx_size(mbmi) == tx_size);
if (eob == 0) {
// single eob token
@@ -586,7 +583,7 @@
int band_left = *band_count++;
// dc token
- int v = qcoeff_ptr[0];
+ int v = qcoeff[0];
int prev_t = vp9_dct_value_tokens_ptr[v].token;
cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
p_tok[0] = vp9_pt_energy_class[prev_t];
@@ -597,7 +594,7 @@
const int rc = scan[c];
int t;
- v = qcoeff_ptr[rc];
+ v = qcoeff[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = get_coef_context(nb, p_tok, c);
cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
@@ -694,10 +691,16 @@
}
}
-void vp9_get_entropy_contexts(TX_SIZE tx_size,
- ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
- const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
- int num_4x4_w, int num_4x4_h) {
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const ENTROPY_CONTEXT *const above = pd->above_context;
+ const ENTROPY_CONTEXT *const left = pd->left_context;
+
int i;
switch (tx_size) {
case TX_4X4:
@@ -734,9 +737,6 @@
BLOCK_SIZE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
- const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
- const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
struct rdcost_block_args args = { 0 };
args.x = x;
args.best_rd = ref_best_rd;
@@ -744,9 +744,7 @@
if (plane == 0)
xd->mi_8x8[0]->mbmi.tx_size = tx_size;
- vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
- pd->above_context, pd->left_context,
- num_4x4_w, num_4x4_h);
+ vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
args.so = get_scan(xd, tx_size, pd->plane_type, 0);
@@ -937,27 +935,23 @@
}
}
-static void super_block_yrd(VP9_COMP *cpi,
- MACROBLOCK *x, int *rate, int64_t *distortion,
- int *skip, int64_t *psse, BLOCK_SIZE bs,
- int64_t txfm_cache[TX_MODES],
- int64_t ref_best_rd) {
+static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- const int b_inter_mode = is_inter_block(mbmi);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE tx_size;
-
assert(bs == mbmi->sb_type);
- if (b_inter_mode)
- vp9_subtract_plane(x, bs, 0);
- if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
- (cpi->sf.tx_size_search_method != USE_FULL_RD &&
- !b_inter_mode)) {
+ vp9_subtract_plane(x, bs, 0);
+
+ if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
ref_best_rd, bs);
@@ -966,8 +960,7 @@
return;
}
- if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
- b_inter_mode) {
+ if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
&r[tx_size][0], &d[tx_size], &s[tx_size]);
@@ -985,6 +978,36 @@
*psse = sse[mbmi->tx_size];
}
+static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, BLOCK_SIZE bs,
+ int64_t txfm_cache[TX_MODES],
+ int64_t ref_best_rd) {
+ int64_t sse[TX_SIZES];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
+
+ assert(bs == mbmi->sb_type);
+ if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
+ vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
+ choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
+ ref_best_rd, bs);
+ } else {
+ int r[TX_SIZES][2], s[TX_SIZES];
+ int64_t d[TX_SIZES];
+ TX_SIZE tx_size;
+ for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
+ txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
+ &s[tx_size], &sse[tx_size],
+ ref_best_rd, 0, bs, tx_size);
+ choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
+ skip, txfm_cache, bs);
+ }
+ if (psse)
+ *psse = sse[mbmi->tx_size];
+}
+
+
static int conditional_skipintra(MB_PREDICTION_MODE mode,
MB_PREDICTION_MODE best_intra_mode) {
if (mode == D117_PRED &&
@@ -1245,8 +1268,8 @@
}
mic->mbmi.mode = mode;
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
- bsize, local_tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, local_tx_cache, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1281,7 +1304,7 @@
return best_rd;
}
-static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
+static void super_block_uvrd(MACROBLOCK *x,
int *rate, int64_t *distortion, int *skippable,
int64_t *sse, BLOCK_SIZE bsize,
int64_t ref_best_rd) {
@@ -1331,6 +1354,7 @@
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
+ MACROBLOCKD *xd = &x->e_mbd;
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE mode_selected = DC_PRED;
int64_t best_rd = INT64_MAX, this_rd;
@@ -1341,9 +1365,9 @@
if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
continue;
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode;
- super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ super_block_uvrd(x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1361,7 +1385,7 @@
if (!x->select_txfm_size) {
int i;
struct macroblock_plane *const p = x->plane;
- struct macroblockd_plane *const pd = x->e_mbd.plane;
+ struct macroblockd_plane *const pd = xd->plane;
for (i = 1; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][2];
p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
@@ -1382,25 +1406,21 @@
}
}
- x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
+ xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
return best_rd;
}
-static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
+static int64_t rd_sbuv_dcpred(const VP9_COMMON *cm, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize) {
- int64_t this_rd;
- int64_t this_sse;
+ int64_t unused;
x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
- super_block_uvrd(cpi, x, rate_tokenonly, distortion,
- skippable, &this_sse, bsize, INT64_MAX);
- *rate = *rate_tokenonly +
- x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
- this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
-
- return this_rd;
+ super_block_uvrd(x, rate_tokenonly, distortion,
+ skippable, &unused, bsize, INT64_MAX);
+ *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
@@ -1413,8 +1433,8 @@
// Use an estimated rd for uv_intra based on DC_PRED if the
// appropriate speed flag is set.
if (cpi->sf.use_uv_intra_rd_estimate) {
- rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
- bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+ rd_sbuv_dcpred(&cpi->common, x, rate_uv, rate_uv_tokenonly, dist_uv,
+ skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
// Else do a proper rd search for each possible transform size that may
// be considered in the main rd loop.
} else {
@@ -1428,8 +1448,7 @@
static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
int mode_context) {
MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
@@ -1454,7 +1473,7 @@
int *rate_mv);
static int labels2mode(MACROBLOCK *x, int i,
- MB_PREDICTION_MODE this_mode,
+ MB_PREDICTION_MODE mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
int_mv seg_mvs[MAX_REF_FRAMES],
@@ -1464,23 +1483,18 @@
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mi_8x8[0];
MB_MODE_INFO *mbmi = &mic->mbmi;
- int cost = 0, thismvcost = 0;
+ int thismvcost = 0;
int idx, idy;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
const int has_second_rf = has_second_ref(mbmi);
- /* We have to be careful retrieving previously-encoded motion vectors.
- Ones from this macroblock have to be pulled from the BLOCKD array
- as they have not yet made it to the bmi array in our MB_MODE_INFO. */
- MB_PREDICTION_MODE m;
-
// the only time we should do costing for new motion vector or mode
// is when we are on a new label (jbb May 08, 2007)
- switch (m = this_mode) {
+ switch (mode) {
case NEWMV:
this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
- thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+ thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
mvjcost, mvcost, MV_COST_WEIGHT_SUB);
if (has_second_rf) {
this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
@@ -1492,14 +1506,12 @@
case NEARESTMV:
this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
break;
case NEARMV:
this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
if (has_second_rf)
- this_second_mv->as_int =
- frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+ this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
break;
case ZEROMV:
this_mv->as_int = 0;
@@ -1510,22 +1522,19 @@
break;
}
- cost = cost_mv_ref(cpi, this_mode,
- mbmi->mode_context[mbmi->ref_frame[0]]);
-
mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
if (has_second_rf)
mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
- mic->bmi[i].as_mode = m;
+ mic->bmi[i].as_mode = mode;
for (idy = 0; idy < num_4x4_blocks_high; ++idy)
for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
&mic->bmi[i], sizeof(mic->bmi[i]));
- cost += thismvcost;
- return cost;
+ return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
+ thismvcost;
}
static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
@@ -2374,7 +2383,7 @@
int sadpb = x->sadperbit16;
MV mvp_full;
int ref = mbmi->ref_frame[0];
- int_mv ref_mv = mbmi->ref_mvs[ref][0];
+ MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
int tmp_col_min = x->mv_col_min;
int tmp_col_max = x->mv_col_max;
@@ -2384,10 +2393,10 @@
const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
ref);
- int_mv pred_mv[3];
- pred_mv[0] = mbmi->ref_mvs[ref][0];
- pred_mv[1] = mbmi->ref_mvs[ref][1];
- pred_mv[2] = x->pred_mv[ref];
+ MV pred_mv[3];
+ pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
+ pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref].as_mv;
if (scaled_ref_frame) {
int i;
@@ -2400,7 +2409,7 @@
setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
}
- vp9_set_mv_search_range(x, &ref_mv.as_mv);
+ vp9_set_mv_search_range(x, &ref_mv);
// Work out the size of the first step in the mv step search.
// 0 here is maximum length first step. 1 is MAX >> 1 etc.
@@ -2445,7 +2454,7 @@
}
}
- mvp_full = pred_mv[x->mv_best_ref_index[ref]].as_mv;
+ mvp_full = pred_mv[x->mv_best_ref_index[ref]];
mvp_full.col >>= 3;
mvp_full.row >>= 3;
@@ -2456,24 +2465,24 @@
if (cpi->sf.search_method == FAST_HEX) {
bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == HEX) {
bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == SQUARE) {
bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else if (cpi->sf.search_method == BIGDIA) {
bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
&cpi->fn_ptr[bsize], 1,
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 1,
&cpi->fn_ptr[bsize],
- &ref_mv.as_mv, &tmp_mv->as_mv);
+ &ref_mv, &tmp_mv->as_mv);
}
x->mv_col_min = tmp_col_min;
@@ -2483,7 +2492,7 @@
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
- cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
+ cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
@@ -2492,7 +2501,7 @@
x->nmvjointcost, x->mvcost,
&dis, &x->pred_sse[ref]);
}
- *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
+ *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
@@ -3008,8 +3017,8 @@
int64_t rdcosty = INT64_MAX;
// Y cost and distortion
- super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
- bsize, txfm_cache, ref_best_rd);
+ inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
+ bsize, txfm_cache, ref_best_rd);
if (*rate_y == INT_MAX) {
*rate2 = INT_MAX;
@@ -3024,7 +3033,7 @@
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
- super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
+ super_block_uvrd(x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
bsize, ref_best_rd - rdcosty);
if (*rate_uv == INT_MAX) {
*rate2 = INT_MAX;
@@ -3394,8 +3403,8 @@
if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
- super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
- bsize, tx_cache, best_rd);
+ intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
+ bsize, tx_cache, best_rd);
if (rate_y == INT_MAX)
continue;
@@ -4151,7 +4160,7 @@
// then dont bother looking at UV
vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
BLOCK_8X8);
- super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ super_block_uvrd(x, &rate_uv, &distortion_uv, &uv_skippable,
&uv_sse, BLOCK_8X8, tmp_best_rdu);
if (rate_uv == INT_MAX)
continue;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index 96cea42..6b85d67 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -80,10 +80,10 @@
void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
const MV *mv);
-void vp9_get_entropy_contexts(TX_SIZE tx_size,
- ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
- const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
- int num_4x4_w, int num_4x4_h);
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 0040477..502e4b6 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -20,7 +20,6 @@
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_onyx_int.h"
-#include "vp9/encoder/vp9_psnr.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index a448b3c..9fb6115 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -25,7 +25,6 @@
VP9_COMMON_SRCS-yes += common/vp9_filter.h
VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c
VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h
-VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c
VP9_COMMON_SRCS-yes += common/vp9_idct.c
VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h
VP9_COMMON_SRCS-yes += common/vp9_blockd.h
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 27dd6f6..6679f89 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -38,7 +38,6 @@
VP9_CX_SRCS-yes += encoder/vp9_lookahead.h
VP9_CX_SRCS-yes += encoder/vp9_mcomp.h
VP9_CX_SRCS-yes += encoder/vp9_onyx_int.h
-VP9_CX_SRCS-yes += encoder/vp9_psnr.h
VP9_CX_SRCS-yes += encoder/vp9_quantize.h
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
@@ -50,7 +49,6 @@
VP9_CX_SRCS-yes += encoder/vp9_onyx_if.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
-VP9_CX_SRCS-yes += encoder/vp9_psnr.c
VP9_CX_SRCS-yes += encoder/vp9_quantize.c
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
diff --git a/vpx/internal/vpx_psnr.h b/vpx/internal/vpx_psnr.h
new file mode 100644
index 0000000..07d81bb
--- /dev/null
+++ b/vpx/internal/vpx_psnr.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_INTERNAL_VPX_PSNR_H_
+#define VPX_INTERNAL_VPX_PSNR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t
+
+/*!\brief Converts SSE to PSNR
+ *
+ * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).
+ *
+ * \param[in] samples Number of samples
+ * \param[in] peak Max sample value
+ * \param[in] sse Sum of squared errors
+ */
+double vpx_sse_to_psnr(double samples, double peak, double sse);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_INTERNAL_VPX_PSNR_H_
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c
index c12d8a3..edc18db 100644
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -13,6 +13,7 @@
* VP9 SVC encoding support via libvpx
*/
+#include <math.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -40,6 +41,7 @@
#define SUPERFRAME_SLOTS (8)
#define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
#define OPTION_BUFFER_SIZE 256
+#define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v
static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27";
static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
@@ -58,8 +60,9 @@
int quantizer[VPX_SS_MAX_LAYERS];
// accumulated statistics
- double psnr_in_layer[VPX_SS_MAX_LAYERS];
- uint32_t bytes_in_layer[VPX_SS_MAX_LAYERS];
+ double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V
+ uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
+ uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
// codec encoding values
int width; // width of highest layer
@@ -855,7 +858,7 @@
switch (cx_pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
- si->bytes_in_layer[si->layer] += frame_pkt_size;
+ si->bytes_sum[si->layer] += frame_pkt_size;
svc_log(svc_ctx, SVC_LOG_DEBUG,
"SVC frame: %d, layer: %d, size: %u\n",
si->encode_frame_count, si->layer, frame_pkt_size);
@@ -873,13 +876,23 @@
break;
}
case VPX_CODEC_PSNR_PKT: {
+ int i;
svc_log(svc_ctx, SVC_LOG_DEBUG,
"SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
"%2.3f %2.3f %2.3f %2.3f \n",
si->encode_frame_count, si->layer,
cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
- si->psnr_in_layer[si->layer] += cx_pkt->data.psnr.psnr[0];
+ svc_log(svc_ctx, SVC_LOG_DEBUG,
+ "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): "
+ "%2.3f %2.3f %2.3f %2.3f \n",
+ si->encode_frame_count, si->layer,
+ cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1],
+ cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]);
+ for (i = 0; i < COMPONENTS; i++) {
+ si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i];
+ si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i];
+ }
break;
}
default: {
@@ -957,11 +970,21 @@
si->frame_within_gop = 0;
}
+static double calc_psnr(double d) {
+ if (d == 0) return 100;
+ return -10.0 * log(d) / log(10.0);
+}
+
// dump accumulated statistics and reset accumulated values
const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
int number_of_frames, number_of_keyframes, encode_frame_count;
- int i;
+ int i, j;
uint32_t bytes_total = 0;
+ double scale[COMPONENTS];
+ double psnr[COMPONENTS];
+ double mse[COMPONENTS];
+ double y_scale;
+
SvcInternal *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || si == NULL) return NULL;
@@ -979,12 +1002,36 @@
(i == 1 || i == 3)) {
number_of_frames -= number_of_keyframes;
}
- svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d PSNR=[%2.3f], Bytes=[%u]\n", i,
- (double)si->psnr_in_layer[i] / number_of_frames,
- si->bytes_in_layer[i]);
- bytes_total += si->bytes_in_layer[i];
- si->psnr_in_layer[i] = 0;
- si->bytes_in_layer[i] = 0;
+ svc_log(svc_ctx, SVC_LOG_INFO,
+ "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n",
+ i, (double)si->psnr_sum[i][0] / number_of_frames,
+ (double)si->psnr_sum[i][1] / number_of_frames,
+ (double)si->psnr_sum[i][2] / number_of_frames,
+ (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]);
+ // the following psnr calculation is deduced from ffmpeg.c#print_report
+ y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames;
+ scale[1] = y_scale;
+ scale[2] = scale[3] = y_scale / 4; // U or V
+ scale[0] = y_scale * 1.5; // total
+
+ for (j = 0; j < COMPONENTS; j++) {
+ psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]);
+ mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j];
+ }
+ svc_log(svc_ctx, SVC_LOG_INFO,
+ "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0],
+ psnr[1], psnr[2], psnr[3]);
+ svc_log(svc_ctx, SVC_LOG_INFO,
+ "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0],
+ mse[1], mse[2], mse[3]);
+
+ bytes_total += si->bytes_sum[i];
+ // clear sums for next time
+ si->bytes_sum[i] = 0;
+ for (j = 0; j < COMPONENTS; ++j) {
+ si->psnr_sum[i][j] = 0;
+ si->sse_sum[i][j] = 0;
+ }
}
// only display statistics once
diff --git a/vpx/src/vpx_psnr.c b/vpx/src/vpx_psnr.c
new file mode 100644
index 0000000..05843ac
--- /dev/null
+++ b/vpx/src/vpx_psnr.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "vpx/internal/vpx_psnr.h"
+
+#define MAX_PSNR 100.0
+
+double vpx_sse_to_psnr(double samples, double peak, double sse) {
+ if (sse > 0.0) {
+ const double psnr = 10.0 * log10(samples * peak * peak / sse);
+ return psnr > MAX_PSNR ? MAX_PSNR : psnr;
+ } else {
+ return MAX_PSNR;
+ }
+}
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 111c87e..98d1d56 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -34,8 +34,10 @@
API_SRCS-yes += src/vpx_encoder.c
API_SRCS-yes += vpx_encoder.h
API_SRCS-yes += internal/vpx_codec_internal.h
+API_SRCS-yes += internal/vpx_psnr.h
API_SRCS-yes += src/vpx_codec.c
API_SRCS-yes += src/vpx_image.c
+API_SRCS-yes += src/vpx_psnr.c
API_SRCS-yes += vpx_codec.h
API_SRCS-yes += vpx_codec.mk
API_SRCS-yes += vpx_frame_buffer.h