Merge "vpxdec: add --keep-going option"
diff --git a/configure b/configure
index 0dca4c3..b98480e 100755
--- a/configure
+++ b/configure
@@ -274,6 +274,7 @@
multiple_arf
spatial_svc
denoising
+ fp_mb_stats
"
CONFIG_LIST="
external_build
diff --git a/test/md5_helper.h b/test/md5_helper.h
index dd446f4..dc95582 100644
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -28,10 +28,11 @@
// plane, we never want to round down and thus skip a pixel so if
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
// This works only for chroma_shift of 0 and 1.
+ const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
img->y_chroma_shift : img->d_h;
- const int w = plane ? (img->d_w + img->x_chroma_shift) >>
- img->x_chroma_shift : img->d_w;
+ const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
+ img->x_chroma_shift : img->d_w) * bytes_per_sample;
for (int y = 0; y < h; ++y) {
MD5Update(&md5_, buf, w);
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index af1815c..f9c09c6 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -6,6 +6,15 @@
2dadee5306245fa5eeb0f99652d0e17afbcba96d invalid-vp90-02.webm.res
df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03.webm
8fe6fd82bf537340f586f97a7ae31fb37ccda302 invalid-vp90-03.webm.res
+a432f96ff0a787268e2f94a8092ab161a18d1b06 park_joy_90p_10_420.y4m
+0b194cc312c3a2e84d156a221b0a5eb615dfddc5 park_joy_90p_10_422.y4m
+ff0e0a21dc2adc95b8c1b37902713700655ced17 park_joy_90p_10_444.y4m
+614c32ae1eca391e867c70d19974f0d62664dd99 park_joy_90p_12_420.y4m
+c92825f1ea25c5c37855083a69faac6ac4641a9e park_joy_90p_12_422.y4m
+b592189b885b6cc85db55cc98512a197d73d3b34 park_joy_90p_12_444.y4m
+4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c park_joy_90p_8_420.y4m
+7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 park_joy_90p_8_422.y4m
+bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 park_joy_90p_8_444.y4m
b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m
5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
diff --git a/test/test.mk b/test/test.mk
index f06e28e..85212d9 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -15,7 +15,7 @@
##
## Black box tests only use the public API.
##
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc
@@ -42,6 +42,9 @@
LIBVPX_TEST_SRCS-yes += encode_test_driver.cc
LIBVPX_TEST_SRCS-yes += encode_test_driver.h
+## Y4m parsing.
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h
+
## WebM Parsing
ifeq ($(CONFIG_WEBM_IO), yes)
LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp
@@ -134,6 +137,17 @@
##
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
diff --git a/test/video_source.h b/test/video_source.h
index 6d1855a..4250cb7 100644
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -50,6 +50,15 @@
return fopen(path_to_source.c_str(), "rb");
}
+static FILE *OpenTestOutFile(const std::string& file_name) {
+ const std::string path_to_source = GetDataPath() + "/" + file_name;
+ return fopen(path_to_source.c_str(), "wb");
+}
+
+static FILE *OpenTempOutFile() {
+ return tmpfile();
+}
+
// Abstract base class for test video sources, which provide a stream of
// vpx_image_t images with associated timestamps and duration.
class VideoSource {
diff --git a/test/vp9_thread_test.cc b/test/vp9_thread_test.cc
index 68ee99a..72719a6 100644
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -28,11 +28,11 @@
protected:
virtual ~VP9WorkerThreadTest() {}
virtual void SetUp() {
- vp9_worker_init(&worker_);
+ vp9_get_worker_interface()->init(&worker_);
}
virtual void TearDown() {
- vp9_worker_end(&worker_);
+ vp9_get_worker_interface()->end(&worker_);
}
VP9Worker worker_;
@@ -45,10 +45,11 @@
}
TEST_P(VP9WorkerThreadTest, HookSuccess) {
- EXPECT_NE(vp9_worker_sync(&worker_), 0); // should be a no-op.
+ // should be a no-op.
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
for (int i = 0; i < 2; ++i) {
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
int hook_data = 0;
int return_value = 1; // return successfully from the hook
@@ -58,20 +59,21 @@
const bool synchronous = GetParam();
if (synchronous) {
- vp9_worker_execute(&worker_);
+ vp9_get_worker_interface()->execute(&worker_);
} else {
- vp9_worker_launch(&worker_);
+ vp9_get_worker_interface()->launch(&worker_);
}
- EXPECT_NE(vp9_worker_sync(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
EXPECT_EQ(5, hook_data);
- EXPECT_NE(vp9_worker_sync(&worker_), 0); // should be a no-op.
+ // should be a no-op.
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
}
}
TEST_P(VP9WorkerThreadTest, HookFailure) {
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
int hook_data = 0;
int return_value = 0; // return failure from the hook
@@ -81,26 +83,49 @@
const bool synchronous = GetParam();
if (synchronous) {
- vp9_worker_execute(&worker_);
+ vp9_get_worker_interface()->execute(&worker_);
} else {
- vp9_worker_launch(&worker_);
+ vp9_get_worker_interface()->launch(&worker_);
}
- EXPECT_FALSE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(vp9_get_worker_interface()->sync(&worker_));
EXPECT_EQ(1, worker_.had_error);
// Ensure _reset() clears the error and _launch() can be called again.
return_value = 1;
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
- vp9_worker_launch(&worker_);
- EXPECT_NE(vp9_worker_sync(&worker_), 0);
+ vp9_get_worker_interface()->launch(&worker_);
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
}
+TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
+ EXPECT_EQ(0, vp9_set_worker_interface(NULL));
+ EXPECT_TRUE(vp9_get_worker_interface() != NULL);
+ for (int i = 0; i < 6; ++i) {
+ VP9WorkerInterface winterface = *vp9_get_worker_interface();
+ switch (i) {
+ default:
+ case 0: winterface.init = NULL; break;
+ case 1: winterface.reset = NULL; break;
+ case 2: winterface.sync = NULL; break;
+ case 3: winterface.launch = NULL; break;
+ case 4: winterface.execute = NULL; break;
+ case 5: winterface.end = NULL; break;
+ }
+ EXPECT_EQ(0, vp9_set_worker_interface(&winterface));
+ }
+}
+
// -----------------------------------------------------------------------------
// Multi-threaded decode tests
#if CONFIG_WEBM_IO
+struct FileList {
+ const char *name;
+ const char *expected_md5;
+};
+
// Decodes |filename| with |num_threads|. Returns the md5 of the decoded frames.
string DecodeFile(const string& filename, int num_threads) {
libvpx_test::WebMVideoSource video(filename);
@@ -130,39 +155,77 @@
return string(md5.Get());
}
+void DecodeFiles(const FileList files[]) {
+ for (const FileList *iter = files; iter->name != NULL; ++iter) {
+ SCOPED_TRACE(iter->name);
+ for (int t = 2; t <= 8; ++t) {
+ EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t))
+ << "threads = " << t;
+ }
+ }
+}
+
+// Trivial serialized thread worker interface implementation.
+// Note any worker that requires synchronization between other workers will
+// hang.
+namespace impl {
+
+void Init(VP9Worker *const worker) { memset(worker, 0, sizeof(*worker)); }
+int Reset(VP9Worker *const /*worker*/) { return 1; }
+int Sync(VP9Worker *const worker) { return !worker->had_error; }
+
+void Execute(VP9Worker *const worker) {
+ worker->had_error |= worker->hook(worker->data1, worker->data2);
+}
+
+void Launch(VP9Worker *const worker) { Execute(worker); }
+void End(VP9Worker *const /*worker*/) {}
+
+} // namespace impl
+
+TEST(VP9WorkerThreadTest, TestSerialInterface) {
+ static const VP9WorkerInterface serial_interface = {
+ impl::Init, impl::Reset, impl::Sync, impl::Launch, impl::Execute, impl::End
+ };
+ // TODO(jzern): Avoid using a file that will use the row-based thread
+ // loopfilter, with the simple serialized implementation it will hang. This is
+ // due to its expectation that rows will be run in parallel as they wait on
+ // progress in the row above before proceeding.
+ static const char expected_md5[] = "b35a1b707b28e82be025d960aba039bc";
+ static const char filename[] = "vp90-2-03-size-226x226.webm";
+ VP9WorkerInterface default_interface = *vp9_get_worker_interface();
+
+ EXPECT_NE(vp9_set_worker_interface(&serial_interface), 0);
+ EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
+
+ // Reset the interface.
+ EXPECT_NE(vp9_set_worker_interface(&default_interface), 0);
+ EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
+}
+
TEST(VP9DecodeMultiThreadedTest, Decode) {
// no tiles or frame parallel; this exercises loop filter threading.
- EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
- DecodeFile("vp90-2-03-size-226x226.webm", 2).c_str());
+ EXPECT_EQ("b35a1b707b28e82be025d960aba039bc",
+ DecodeFile("vp90-2-03-size-226x226.webm", 2));
}
TEST(VP9DecodeMultiThreadedTest, Decode2) {
- static const struct {
- const char *name;
- const char *expected_md5;
- } files[] = {
+ static const FileList files[] = {
{ "vp90-2-08-tile_1x2_frame_parallel.webm",
"68ede6abd66bae0a2edf2eb9232241b6" },
{ "vp90-2-08-tile_1x4_frame_parallel.webm",
"368ebc6ebf3a5e478d85b2c3149b2848" },
{ "vp90-2-08-tile_1x8_frame_parallel.webm",
"17e439da2388aff3a0f69cb22579c6c1" },
+ { NULL, NULL }
};
- for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
- for (int t = 2; t <= 8; ++t) {
- EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
- << "threads = " << t;
- }
- }
+ DecodeFiles(files);
}
// Test tile quantity changes within one file.
TEST(VP9DecodeMultiThreadedTest, Decode3) {
- static const struct {
- const char *name;
- const char *expected_md5;
- } files[] = {
+ static const FileList files[] = {
{ "vp90-2-14-resize-fp-tiles-1-16.webm",
"0cd5e632c326297e975f38949c31ea94" },
{ "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
@@ -207,14 +270,10 @@
"ae96f21f21b6370cc0125621b441fc52" },
{ "vp90-2-14-resize-fp-tiles-8-4.webm",
"3eb4f24f10640d42218f7fd7b9fd30d4" },
+ { NULL, NULL }
};
- for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
- for (int t = 2; t <= 8; ++t) {
- EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
- << "threads = " << t;
- }
- }
+ DecodeFiles(files);
}
#endif // CONFIG_WEBM_IO
diff --git a/test/y4m_test.cc b/test/y4m_test.cc
new file mode 100644
index 0000000..cfa30e8
--- /dev/null
+++ b/test/y4m_test.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "./y4menc.h"
+
+namespace {
+
+using std::string;
+using std::tr1::make_tuple;
+
+static const unsigned int kWidth = 160;
+static const unsigned int kHeight = 90;
+static const unsigned int kFrames = 10;
+
+typedef std::tr1::tuple<const char *, const unsigned int,
+ const vpx_img_fmt, const char *> test_entry_type;
+
+static const test_entry_type kY4mTestVectors[] = {
+ make_tuple("park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420,
+ "e5406275b9fc6bb3436c31d4a05c1cab"),
+ make_tuple("park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422,
+ "284a47a47133b12884ec3a14e959a0b6"),
+ make_tuple("park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444,
+ "90517ff33843d85de712fd4fe60dbed0"),
+ make_tuple("park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016,
+ "63f21f9f717d8b8631bd2288ee87137b"),
+ make_tuple("park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216,
+ "48ab51fb540aed07f7ff5af130c9b605"),
+ make_tuple("park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416,
+ "067bfd75aa85ff9bae91fa3e0edd1e3e"),
+ make_tuple("park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016,
+ "9e6d8f6508c6e55625f6b697bc461cef"),
+ make_tuple("park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216,
+ "b239c6b301c0b835485be349ca83a7e3"),
+ make_tuple("park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416,
+ "5a6481a550821dab6d0192f5c63845e9")
+};
+
+static void write_image_file(const vpx_image_t *img, FILE *file) {
+ int plane, y;
+ for (plane = 0; plane < 3; ++plane) {
+ const unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
+ const int h = (plane ? (img->d_h + img->y_chroma_shift) >>
+ img->y_chroma_shift : img->d_h);
+ const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
+ img->x_chroma_shift : img->d_w);
+ for (y = 0; y < h; ++y) {
+ fwrite(buf, bytes_per_sample, w, file);
+ buf += stride;
+ }
+ }
+}
+
+class Y4mVideoSourceTest
+ : public ::testing::TestWithParam<test_entry_type>,
+ public ::libvpx_test::Y4mVideoSource {
+ protected:
+ Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
+
+ virtual ~Y4mVideoSourceTest() {
+ CloseSource();
+ }
+
+ virtual void Init(const std::string &file_name, int limit) {
+ file_name_ = file_name;
+ start_ = 0;
+ limit_ = limit;
+ frame_ = 0;
+ Begin();
+ }
+
+ // Checks y4m header information
+ void HeaderChecks(unsigned int bit_depth, vpx_img_fmt_t fmt) {
+ ASSERT_TRUE(input_file_ != NULL);
+ ASSERT_EQ(y4m_.pic_w, (int)kWidth);
+ ASSERT_EQ(y4m_.pic_h, (int)kHeight);
+ ASSERT_EQ(img()->d_w, kWidth);
+ ASSERT_EQ(img()->d_h, kHeight);
+ ASSERT_EQ(y4m_.bit_depth, bit_depth);
+ ASSERT_EQ(y4m_.vpx_fmt, fmt);
+ if (fmt == VPX_IMG_FMT_I420 || fmt == VPX_IMG_FMT_I42016) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
+ ASSERT_EQ(img()->x_chroma_shift, 1U);
+ ASSERT_EQ(img()->y_chroma_shift, 1U);
+ }
+ if (fmt == VPX_IMG_FMT_I422 || fmt == VPX_IMG_FMT_I42216) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
+ ASSERT_EQ(img()->x_chroma_shift, 1U);
+ ASSERT_EQ(img()->y_chroma_shift, 0U);
+ }
+ if (fmt == VPX_IMG_FMT_I444 || fmt == VPX_IMG_FMT_I44416) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
+ ASSERT_EQ(img()->x_chroma_shift, 0U);
+ ASSERT_EQ(img()->y_chroma_shift, 0U);
+ }
+ }
+
+ // Checks MD5 of the raw frame data
+ void Md5Check(const string &expected_md5) {
+ ASSERT_TRUE(input_file_ != NULL);
+ libvpx_test::MD5 md5;
+ for (unsigned int i = start_; i < limit_; i++) {
+ md5.Add(img());
+ Next();
+ }
+ ASSERT_EQ(string(md5.Get()), expected_md5);
+ }
+};
+
+TEST_P(Y4mVideoSourceTest, SourceTest) {
+ const char *filename = GET_PARAM(0);
+ const unsigned int bit_depth = GET_PARAM(1);
+ const vpx_img_fmt format = GET_PARAM(2);
+ const char *md5raw = GET_PARAM(3);
+
+ Init(filename, kFrames);
+ HeaderChecks(bit_depth, format);
+ Md5Check(md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest,
+ ::testing::ValuesIn(kY4mTestVectors));
+
+class Y4mVideoWriteTest
+ : public Y4mVideoSourceTest {
+ protected:
+ Y4mVideoWriteTest() : Y4mVideoSourceTest() {}
+
+ virtual void ReplaceInputFp(FILE *input_file) {
+ CloseSource();
+ frame_ = 0;
+ input_file_ = input_file;
+ rewind(input_file_);
+ ReadSourceToStart();
+ }
+
+ // Writes out a y4m file and then reads it back
+ void WriteY4mAndReadBack() {
+ ASSERT_TRUE(input_file_ != NULL);
+ char buf[Y4M_BUFFER_SIZE] = {0};
+ const struct VpxRational framerate = {y4m_.fps_n, y4m_.fps_d};
+ FILE *out_file = libvpx_test::OpenTempOutFile();
+ ASSERT_TRUE(out_file != NULL);
+ y4m_write_file_header(buf, sizeof(buf),
+ kWidth, kHeight,
+ &framerate, y4m_.vpx_fmt,
+ y4m_.bit_depth);
+ fputs(buf, out_file);
+ for (unsigned int i = start_; i < limit_; i++) {
+ y4m_write_frame_header(buf, sizeof(buf));
+ fputs(buf, out_file);
+ write_image_file(img(), out_file);
+ Next();
+ }
+ ReplaceInputFp(out_file);
+ }
+
+ virtual void Init(const std::string &file_name, int limit) {
+ Y4mVideoSourceTest::Init(file_name, limit);
+ WriteY4mAndReadBack();
+ }
+};
+
+TEST_P(Y4mVideoWriteTest, WriteTest) {
+ const char *filename = GET_PARAM(0);
+ const unsigned int bit_depth = GET_PARAM(1);
+ const vpx_img_fmt format = GET_PARAM(2);
+ const char *md5raw = GET_PARAM(3);
+
+ Init(filename, kFrames);
+ HeaderChecks(bit_depth, format);
+ Md5Check(md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest,
+ ::testing::ValuesIn(kY4mTestVectors));
+
+} // namespace
diff --git a/test/y4m_video_source.h b/test/y4m_video_source.h
index 7419043..378e75b 100644
--- a/test/y4m_video_source.h
+++ b/test/y4m_video_source.h
@@ -38,24 +38,30 @@
CloseSource();
}
- virtual void Begin() {
+ virtual void OpenSource() {
CloseSource();
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
- << file_name_;
+ << file_name_;
+ }
- y4m_input_open(&y4m_, input_file_, NULL, 0, 0);
+ virtual void ReadSourceToStart() {
+ ASSERT_TRUE(input_file_ != NULL);
+ ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0));
framerate_numerator_ = y4m_.fps_n;
framerate_denominator_ = y4m_.fps_d;
-
frame_ = 0;
for (unsigned int i = 0; i < start_; i++) {
- Next();
+ Next();
}
-
FillFrame();
}
+ virtual void Begin() {
+ OpenSource();
+ ReadSourceToStart();
+ }
+
virtual void Next() {
++frame_;
FillFrame();
diff --git a/tools_common.h b/tools_common.h
index e033de2..6a9f4f7 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -90,6 +90,7 @@
uint32_t width;
uint32_t height;
vpx_img_fmt_t fmt;
+ vpx_bit_depth_t bit_depth;
int only_i420;
uint32_t fourcc;
struct VpxRational framerate;
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index eef2d79..66f4bf6 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -18,18 +18,18 @@
#if HAVE_EDSP
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
unsigned char * cx_data,
const unsigned char *cx_data_end,
int num_parts,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 68613ec..f52dccb 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -714,6 +714,9 @@
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vp9_subtract_block/, "$sse2_x86inc";
+add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
+
add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 8efae95..8b96abb 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -771,6 +771,7 @@
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -783,7 +784,7 @@
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
- if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
+ if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Loop filter thread creation failed");
}
@@ -869,13 +870,13 @@
// decoding has completed: finish up the loop filter in this thread.
if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue;
- vp9_worker_sync(&pbi->lf_worker);
+ winterface->sync(&pbi->lf_worker);
lf_data->start = lf_start;
lf_data->stop = mi_row;
if (pbi->max_threads > 1) {
- vp9_worker_launch(&pbi->lf_worker);
+ winterface->launch(&pbi->lf_worker);
} else {
- vp9_worker_execute(&pbi->lf_worker);
+ winterface->execute(&pbi->lf_worker);
}
}
}
@@ -884,10 +885,10 @@
// Loopfilter remaining rows in the frame.
if (cm->lf.filter_level) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
- vp9_worker_sync(&pbi->lf_worker);
+ winterface->sync(&pbi->lf_worker);
lf_data->start = lf_data->stop;
lf_data->stop = cm->mi_rows;
- vp9_worker_execute(&pbi->lf_worker);
+ winterface->execute(&pbi->lf_worker);
}
// Get last tile data.
@@ -931,6 +932,7 @@
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -957,11 +959,11 @@
VP9Worker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
- vp9_worker_init(worker);
+ winterface->init(worker);
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
- if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
+ if (i < num_threads - 1 && !winterface->reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
}
@@ -1024,9 +1026,9 @@
worker->had_error = 0;
if (i == num_workers - 1 || n == tile_cols - 1) {
- vp9_worker_execute(worker);
+ winterface->execute(worker);
} else {
- vp9_worker_launch(worker);
+ winterface->launch(worker);
}
if (buf->col == tile_cols - 1) {
@@ -1038,7 +1040,7 @@
for (; i > 0; --i) {
VP9Worker *const worker = &pbi->tile_workers[i - 1];
- pbi->mb.corrupted |= !vp9_worker_sync(worker);
+ pbi->mb.corrupted |= !winterface->sync(worker);
}
if (final_worker > -1) {
TileWorkerData *const tile_data =
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 84cb84a..d154e9d 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -76,7 +76,7 @@
cm->error.setjmp = 0;
- vp9_worker_init(&pbi->lf_worker);
+ vp9_get_worker_interface()->init(&pbi->lf_worker);
return pbi;
}
@@ -86,12 +86,12 @@
int i;
vp9_remove_common(cm);
- vp9_worker_end(&pbi->lf_worker);
+ vp9_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi->tile_data);
for (i = 0; i < pbi->num_tile_workers; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
- vp9_worker_end(worker);
+ vp9_get_worker_interface()->end(worker);
vpx_free(worker->data1);
vpx_free(worker->data2);
}
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 170ccb5..5dda49a 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -138,6 +138,7 @@
int frame_filter_level,
int y_only) {
VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -197,15 +198,15 @@
// Start loopfiltering
if (i == num_workers - 1) {
- vp9_worker_execute(worker);
+ winterface->execute(worker);
} else {
- vp9_worker_launch(worker);
+ winterface->launch(worker);
}
}
// Wait till all rows are finished
for (i = 0; i < num_workers; ++i) {
- vp9_worker_sync(&pbi->tile_workers[i]);
+ winterface->sync(&pbi->tile_workers[i]);
}
}
diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c
index 5d31d3d..348bdf6 100644
--- a/vp9/decoder/vp9_thread.c
+++ b/vp9/decoder/vp9_thread.c
@@ -11,71 +11,79 @@
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
-// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
-
+// 100644 blob 08ad4e1fecba302bf1247645e84a7d2779956bc3 src/utils/thread.c
#include <assert.h>
#include <string.h> // for memset()
#include "./vp9_thread.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "vpx_mem/vpx_mem.h"
#if CONFIG_MULTITHREAD
+struct VP9WorkerImpl {
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+};
+
//------------------------------------------------------------------------------
-static THREADFN thread_loop(void *ptr) { // thread loop
- VP9Worker* const worker = (VP9Worker*)ptr;
+static void execute(VP9Worker *const worker); // Forward declaration.
+
+static THREADFN thread_loop(void *ptr) {
+ VP9Worker *const worker = (VP9Worker*)ptr;
int done = 0;
while (!done) {
- pthread_mutex_lock(&worker->mutex_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
while (worker->status_ == OK) { // wait in idling mode
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
}
if (worker->status_ == WORK) {
- vp9_worker_execute(worker);
+ execute(worker);
worker->status_ = OK;
} else if (worker->status_ == NOT_OK) { // finish the worker
done = 1;
}
- // signal to the main thread that we're done (for Sync())
- pthread_cond_signal(&worker->condition_);
- pthread_mutex_unlock(&worker->mutex_);
+ // signal to the main thread that we're done (for sync())
+ pthread_cond_signal(&worker->impl_->condition_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
return THREAD_RETURN(NULL); // Thread is finished
}
// main thread state control
-static void change_state(VP9Worker* const worker,
+static void change_state(VP9Worker *const worker,
VP9WorkerStatus new_status) {
- // no-op when attempting to change state on a thread that didn't come up
- if (worker->status_ < OK) return;
+ // No-op when attempting to change state on a thread that didn't come up.
+ // Checking status_ without acquiring the lock first would result in a data
+ // race.
+ if (worker->impl_ == NULL) return;
- pthread_mutex_lock(&worker->mutex_);
- // wait for the worker to finish
- while (worker->status_ != OK) {
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ if (worker->status_ >= OK) {
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->impl_->condition_);
+ }
}
- // assign new status and release the working thread if needed
- if (new_status != OK) {
- worker->status_ = new_status;
- pthread_cond_signal(&worker->condition_);
- }
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
#endif // CONFIG_MULTITHREAD
//------------------------------------------------------------------------------
-void vp9_worker_init(VP9Worker* const worker) {
+static void init(VP9Worker *const worker) {
memset(worker, 0, sizeof(*worker));
worker->status_ = NOT_OK;
}
-int vp9_worker_sync(VP9Worker* const worker) {
+static int sync(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, OK);
#endif
@@ -83,59 +91,93 @@
return !worker->had_error;
}
-int vp9_worker_reset(VP9Worker* const worker) {
+static int reset(VP9Worker *const worker) {
int ok = 1;
worker->had_error = 0;
if (worker->status_ < OK) {
#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&worker->mutex_, NULL) ||
- pthread_cond_init(&worker->condition_, NULL)) {
+ worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_));
+ if (worker->impl_ == NULL) {
return 0;
}
- pthread_mutex_lock(&worker->mutex_);
- ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
+ if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+ goto Error;
+ }
+ if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ goto Error;
+ }
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker);
if (ok) worker->status_ = OK;
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
+ if (!ok) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
+ Error:
+ vpx_free(worker->impl_);
+ worker->impl_ = NULL;
+ return 0;
+ }
#else
worker->status_ = OK;
#endif
} else if (worker->status_ > OK) {
- ok = vp9_worker_sync(worker);
+ ok = sync(worker);
}
assert(!ok || (worker->status_ == OK));
return ok;
}
-void vp9_worker_execute(VP9Worker* const worker) {
+static void execute(VP9Worker *const worker) {
if (worker->hook != NULL) {
worker->had_error |= !worker->hook(worker->data1, worker->data2);
}
}
-void vp9_worker_launch(VP9Worker* const worker) {
+static void launch(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, WORK);
#else
- vp9_worker_execute(worker);
+ execute(worker);
#endif
}
-void vp9_worker_end(VP9Worker* const worker) {
+static void end(VP9Worker *const worker) {
if (worker->status_ >= OK) {
#if CONFIG_MULTITHREAD
change_state(worker, NOT_OK);
- pthread_join(worker->thread_, NULL);
- pthread_mutex_destroy(&worker->mutex_);
- pthread_cond_destroy(&worker->condition_);
+ pthread_join(worker->impl_->thread_, NULL);
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
#else
worker->status_ = NOT_OK;
#endif
}
+ vpx_free(worker->impl_);
+ worker->impl_ = NULL;
assert(worker->status_ == NOT_OK);
}
//------------------------------------------------------------------------------
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
+static VP9WorkerInterface g_worker_interface = {
+ init, reset, sync, launch, execute, end
+};
+
+int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) {
+ if (winterface == NULL ||
+ winterface->init == NULL || winterface->reset == NULL ||
+ winterface->sync == NULL || winterface->launch == NULL ||
+ winterface->execute == NULL || winterface->end == NULL) {
+ return 0;
+ }
+ g_worker_interface = *winterface;
+ return 1;
+}
+
+const VP9WorkerInterface *vp9_get_worker_interface(void) {
+ return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index 2f8728d..864579c 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -11,8 +11,7 @@
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
-// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
-
+// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h
#ifndef VP9_DECODER_VP9_THREAD_H_
#define VP9_DECODER_VP9_THREAD_H_
@@ -163,40 +162,53 @@
// arguments (data1 and data2), and should return false in case of error.
typedef int (*VP9WorkerHook)(void*, void*);
-// Synchronize object used to launch job in the worker thread
+// Platform-dependent implementation details for the worker.
+typedef struct VP9WorkerImpl VP9WorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
typedef struct {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t mutex_;
- pthread_cond_t condition_;
- pthread_t thread_;
-#endif
+ VP9WorkerImpl *impl_;
VP9WorkerStatus status_;
VP9WorkerHook hook; // hook to call
- void* data1; // first argument passed to 'hook'
- void* data2; // second argument passed to 'hook'
+ void *data1; // first argument passed to 'hook'
+ void *data2; // second argument passed to 'hook'
int had_error; // return value of the last call to 'hook'
} VP9Worker;
-// Must be called first, before any other method.
-void vp9_worker_init(VP9Worker* const worker);
-// Must be called to initialize the object and spawn the thread. Re-entrant.
-// Will potentially launch the thread. Returns false in case of error.
-int vp9_worker_reset(VP9Worker* const worker);
-// Makes sure the previous work is finished. Returns true if worker->had_error
-// was not set and no error condition was triggered by the working thread.
-int vp9_worker_sync(VP9Worker* const worker);
-// Triggers the thread to call hook() with data1 and data2 argument. These
-// hook/data1/data2 can be changed at any time before calling this function,
-// but not be changed afterward until the next call to vp9_worker_sync().
-void vp9_worker_launch(VP9Worker* const worker);
-// This function is similar to vp9_worker_launch() except that it calls the
-// hook directly instead of using a thread. Convenient to bypass the thread
-// mechanism while still using the VP9Worker structs. vp9_worker_sync() must
-// still be called afterward (for error reporting).
-void vp9_worker_execute(VP9Worker* const worker);
-// Kill the thread and terminate the object. To use the object again, one
-// must call vp9_worker_reset() again.
-void vp9_worker_end(VP9Worker* const worker);
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+ // Must be called first, before any other method.
+ void (*init)(VP9Worker *const worker);
+ // Must be called to initialize the object and spawn the thread. Re-entrant.
+ // Will potentially launch the thread. Returns false in case of error.
+ int (*reset)(VP9Worker *const worker);
+ // Makes sure the previous work is finished. Returns true if worker->had_error
+ // was not set and no error condition was triggered by the working thread.
+ int (*sync)(VP9Worker *const worker);
+ // Triggers the thread to call hook() with data1 and data2 arguments. These
+ // hook/data1/data2 values can be changed at any time before calling this
+ // function, but not be changed afterward until the next call to Sync().
+ void (*launch)(VP9Worker *const worker);
+ // This function is similar to launch() except that it calls the
+ // hook directly instead of using a thread. Convenient to bypass the thread
+ // mechanism while still using the VP9Worker structs. sync() must
+ // still be called afterward (for error reporting).
+ void (*execute)(VP9Worker *const worker);
+ // Kill the thread and terminate the object. To use the object again, one
+ // must call reset() again.
+ void (*end)(VP9Worker *const worker);
+} VP9WorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+int vp9_set_worker_interface(const VP9WorkerInterface *const winterface);
+
+// Retrieve the currently set thread worker interface.
+const VP9WorkerInterface *vp9_get_worker_interface(void);
//------------------------------------------------------------------------------
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c
index 0d6b41d..33f9239 100644
--- a/vp9/encoder/vp9_aq_complexity.c
+++ b/vp9/encoder/vp9_aq_complexity.c
@@ -15,8 +15,19 @@
#include "vp9/encoder/vp9_segmentation.h"
-static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
- {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+#define AQ_C_SEGMENTS 3
+#define AQ_C_STRENGTHS 3
+static const int aq_c_active_segments[AQ_C_STRENGTHS] = {1, 2, 3};
+static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
+ {{1.0, 1.0, 1.0}, {1.0, 2.0, 1.0}, {1.0, 1.5, 2.5}};
+static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
+ {{1.0, 1.0, 1.0}, {1.0, 0.25, 0.0}, {1.0, 0.5, 0.25}};
+
+static int get_aq_c_strength(int q_index) {
+ // Approximate base quatizer (truncated to int)
+ int base_quant = vp9_ac_quant(q_index, 0) / 4;
+ return (base_quant > 20) + (base_quant > 45);
+}
void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -29,6 +40,8 @@
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
int segment;
+ const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int active_segments = aq_c_active_segments[aq_strength];
// Clear down the segment map.
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
@@ -36,9 +49,17 @@
// Clear down the complexity map used for rd.
vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
- vp9_enable_segmentation(seg);
vp9_clearall_segfeatures(seg);
+ // Segmentation only makes sense if the target bits per SB is above a
+ // threshold. Below this the overheads will usually outweigh any benefit.
+ if (cpi->rc.sb64_target_rate < 256) {
+ vp9_disable_segmentation(seg);
+ return;
+ }
+
+ vp9_enable_segmentation(seg);
+
// Select delta coding method.
seg->abs_delta = SEGMENT_DELTADATA;
@@ -46,14 +67,14 @@
vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
// Use some of the segments for in frame Q adjustment.
- for (segment = 1; segment < 2; segment++) {
+ for (segment = 1; segment < active_segments; ++segment) {
int qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
- in_frame_q_adj_ratio[segment]);
+ aq_c_q_adj_factor[aq_strength][segment]);
- // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0.
- // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta
- // is sometimes applied without going back around the rd loop.
+ // For AQ complexity mode, we dont allow Q0 in a segment if the base
+ // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
+ // Q delta is sometimes applied without going back around the rd loop.
// This could lead to an illegal combination of partition size and q.
if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
qindex_delta = -cm->base_qindex + 1;
@@ -66,10 +87,15 @@
}
}
-// Select a segment for the current SB64
+// Select a segment for the current SB64 block.
+// The choice of segment for a block depends on the ratio of the projected
+// bits for the block vs a target average.
+// An "aq_strength" value determines how many segments are supported,
+// the set of transition points to use and the extent of the quantizer
+// adjustment for each segment (configured in vp9_setup_in_frame_q_adj()).
void vp9_select_in_frame_q_segment(VP9_COMP *cpi,
- int mi_row, int mi_col,
- int output_enabled, int projected_rate) {
+ int mi_row, int mi_col,
+ int output_enabled, int projected_rate) {
VP9_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
@@ -89,11 +115,22 @@
// It is converted to bits * 256 units.
const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
(bw * bh);
+ const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int active_segments = aq_c_active_segments[aq_strength];
- if (projected_rate < (target_rate / 4)) {
- segment = 1;
- } else {
- segment = 0;
+ // The number of segments considered and the transition points used to
+ // select them is determined by the "aq_strength" value.
+ // Currently this loop only supports segments that reduce Q (i.e. where
+ // there is undershoot.
+ // The loop counts down towards segment 0 which is the default segment
+ // with no Q adjustment.
+ segment = active_segments - 1;
+ while (segment > 0) {
+ if (projected_rate <
+ (target_rate * aq_c_transitions[aq_strength][segment])) {
+ break;
+ }
+ --segment;
}
if (target_rate > 0) {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 61d9d5d..ab7991e 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -35,6 +35,7 @@
// Quantizer setings
int16_t *quant_fp;
+ int16_t *round_fp;
int16_t *quant;
int16_t *quant_shift;
int16_t *zbin;
@@ -110,6 +111,9 @@
int use_lp32x32fdct;
int skip_encode;
+ // use fast quantization process
+ int quant_fp;
+
// skip forward transform and quantization
int skip_txfm;
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index ff66abb..f6393e0 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -15,34 +15,84 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_denoiser.h"
+/* The VP9 denoiser is a work-in-progress. It currently is only designed to work
+ * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
+ * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to
+ * make the calls to the vp9_denoiser_* functions when in speed 5).
+ *
+ * The implementation is very similar to that of the VP8 denoiser. While
+ * choosing the motion vectors / reference frames, the denoiser is run, and if
+ * it did not modify the signal to much, the denoised block is copied to the
+ * signal.
+ */
+
+#ifdef OUTPUT_YUV_DENOISED
+static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
+#endif
+
static const int widths[] = {4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64};
static const int heights[] = {4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64};
-int vp9_denoiser_filter() {
- return 0;
+static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ return 3 + (increase_denoising ? 1 : 0);
}
-static int update_running_avg(const uint8_t *mc_avg, int mc_avg_stride,
- uint8_t *avg, int avg_stride,
- const uint8_t *sig, int sig_stride,
- int increase_denoising, BLOCK_SIZE bs) {
+static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ (void)increase_denoising;
+ return 4;
+}
+
+static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ (void)increase_denoising;
+ return 25 * 25;
+}
+
+static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 60 : 40);
+}
+
+static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
+ int mv_row, int mv_col) {
+ if (mv_row * mv_row + mv_col * mv_col >
+ noise_motion_thresh(bs, increase_denoising)) {
+ return 0;
+ } else {
+ return widths[bs] * heights[bs] * 20;
+ }
+}
+
+static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
+}
+
+static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
+}
+
+static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride,
+ const uint8_t *mc_avg,
+ int mc_avg_stride,
+ uint8_t *avg, int avg_stride,
+ int increase_denoising,
+ BLOCK_SIZE bs) {
int r, c;
- int diff, adj, absdiff;
- int shift_inc1 = 0, shift_inc2 = 1;
+ const uint8_t *sig_start = sig;
+ const uint8_t *mc_avg_start = mc_avg;
+ uint8_t *avg_start = avg;
+ int diff, adj, absdiff, delta;
int adj_val[] = {3, 4, 6};
int total_adj = 0;
- if (increase_denoising) {
- shift_inc1 = 1;
- shift_inc2 = 2;
- }
-
+ // First attempt to apply a strong temporal denoising filter.
for (r = 0; r < heights[bs]; ++r) {
for (c = 0; c < widths[bs]; ++c) {
diff = mc_avg[c] - sig[c];
absdiff = abs(diff);
- if (absdiff <= 3 + shift_inc1) {
+ if (absdiff <= absdiff_thresh(bs, increase_denoising)) {
avg[c] = mc_avg[c];
total_adj += diff;
} else {
@@ -70,7 +120,47 @@
avg += avg_stride;
mc_avg += mc_avg_stride;
}
- return total_adj;
+
+ // If the strong filter did not modify the signal too much, we're all set.
+ if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) {
+ return FILTER_BLOCK;
+ }
+
+ // Otherwise, we try to dampen the filter if the delta is not too high.
+ delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
+ >> 8) + 1;
+ if (delta > delta_thresh(bs, increase_denoising)) {
+ return COPY_BLOCK;
+ }
+
+ mc_avg = mc_avg_start;
+ avg = avg_start;
+ sig = sig_start;
+ for (r = 0; r < heights[bs]; ++r) {
+ for (c = 0; c < widths[bs]; ++c) {
+ diff = mc_avg[c] - sig[c];
+ adj = abs(diff);
+ if (adj > delta) {
+ adj = delta;
+ }
+ if (diff > 0) {
+ avg[c] = MAX(0, avg[c] - adj);
+ total_adj += adj;
+ } else {
+ avg[c] = MIN(UINT8_MAX, avg[c] + adj);
+ total_adj -= adj;
+ }
+ }
+ sig += sig_stride;
+ avg += avg_stride;
+ mc_avg += mc_avg_stride;
+ }
+
+ // We can use the filter if it has been sufficiently dampened
+ if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) {
+ return FILTER_BLOCK;
+ }
+ return COPY_BLOCK;
}
static uint8_t *block_start(uint8_t *framebuf, int stride,
@@ -78,33 +168,24 @@
return framebuf + (stride * mi_row * 8) + (mi_col * 8);
}
-void copy_block(uint8_t *dest, int dest_stride,
- uint8_t *src, int src_stride, BLOCK_SIZE bs) {
- int r, c;
+static void copy_block(uint8_t *dest, int dest_stride,
+ const uint8_t *src, int src_stride, BLOCK_SIZE bs) {
+ int r;
for (r = 0; r < heights[bs]; ++r) {
- for (c = 0; c < widths[bs]; ++c) {
- dest[c] = src[c];
- }
+ vpx_memcpy(dest, src, widths[bs]);
dest += dest_stride;
src += src_stride;
}
}
-static int perform_motion_compensation(VP9_DENOISER *denoiser, MACROBLOCK *mb,
- BLOCK_SIZE bs, int increase_denoising,
- int mi_row, int mi_col) {
- // constants
- // TODO(tkopp): empirically determine good constants, or functions of block
- // size.
- int NOISE_MOTION_THRESHOLD = 25 * 25;
- int SSE_DIFF_THRESHOLD = heights[bs] * widths[bs] * 20;
- unsigned int SSE_THRESH = heights[bs] * widths[bs] * 40;
- unsigned int SSE_THRESH_HI = heights[bs] * widths[bs] * 60;
-
+static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
+ MACROBLOCK *mb,
+ BLOCK_SIZE bs,
+ int increase_denoising,
+ int mi_row,
+ int mi_col) {
int mv_col, mv_row;
int sse_diff = denoiser->zero_mv_sse - denoiser->best_sse;
- int sse_diff_thresh;
- int sse_thresh;
MV_REFERENCE_FRAME frame;
MACROBLOCKD *filter_mbd = &mb->e_mbd;
MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
@@ -116,20 +197,15 @@
saved_pre[0] = filter_mbd->plane[0].pre[0];
saved_pre[1] = filter_mbd->plane[0].pre[1];
- // Decide the threshold for sum squared error.
mv_col = denoiser->best_sse_mv.as_mv.col;
mv_row = denoiser->best_sse_mv.as_mv.row;
- if (mv_row * mv_row + mv_col * mv_col > NOISE_MOTION_THRESHOLD) {
- sse_diff_thresh = 0;
- } else {
- sse_diff_thresh = SSE_DIFF_THRESHOLD;
- }
frame = denoiser->best_reference_frame;
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
- if (frame != INTRA_FRAME && sse_diff > sse_diff_thresh) {
+ if (frame != INTRA_FRAME &&
+ sse_diff > sse_diff_thresh(bs, increase_denoising, mv_row, mv_col)) {
mbmi->ref_frame[0] = denoiser->best_reference_frame;
mbmi->mode = denoiser->best_sse_inter_mode;
mbmi->mv[0] = denoiser->best_sse_mv;
@@ -212,11 +288,12 @@
mv_row = denoiser->best_sse_mv.as_mv.row;
mv_col = denoiser->best_sse_mv.as_mv.col;
- sse_thresh = denoiser->increase_denoising ? SSE_THRESH_HI : SSE_THRESH;
- // TODO(tkopp) why 8?
- if (denoiser->best_sse > sse_thresh ||
- mv_row * mv_row + mv_col * mv_col > 8 * NOISE_MOTION_THRESHOLD) {
+ if (denoiser->best_sse > sse_thresh(bs, increase_denoising)) {
+ return COPY_BLOCK;
+ }
+ if (mv_row * mv_row + mv_col * mv_col >
+ 8 * noise_motion_thresh(bs, increase_denoising)) {
return COPY_BLOCK;
}
return FILTER_BLOCK;
@@ -224,8 +301,7 @@
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs) {
- int decision = COPY_BLOCK;
-
+ VP9_DENOISER_DECISION decision = FILTER_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
@@ -236,28 +312,30 @@
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
mi_row, mi_col);
- update_running_avg(mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride,
- mb->plane[0].src.buf, mb->plane[0].src.stride, 0, bs);
if (decision == FILTER_BLOCK) {
- // TODO(tkopp)
+ decision = denoiser_filter(src.buf, src.stride,
+ mc_avg_start, mc_avg.y_stride,
+ avg_start, avg.y_stride,
+ 0, bs);
}
- if (decision == COPY_BLOCK) {
+
+ if (decision == FILTER_BLOCK) {
+ copy_block(src.buf, src.stride, avg_start, avg.y_stride, bs);
+ } else { // COPY_BLOCK
copy_block(avg_start, avg.y_stride, src.buf, src.stride, bs);
}
}
static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
- int r, c;
+ int r;
const uint8_t *srcbuf = src.y_buffer;
uint8_t *destbuf = dest.y_buffer;
assert(dest.y_width == src.y_width);
assert(dest.y_height == src.y_height);
for (r = 0; r < dest.y_height; ++r) {
- for (c = 0; c < dest.y_width; ++c) {
- destbuf[c] = srcbuf[c];
- }
+ vpx_memcpy(destbuf, srcbuf, dest.y_width);
destbuf += dest.y_stride;
srcbuf += src.y_stride;
}
@@ -325,6 +403,9 @@
vp9_denoiser_free(denoiser);
return 1;
}
+#ifdef OUTPUT_YUV_DENOISED
+ make_grayscale(&denoiser->running_avg_y[i]);
+#endif
}
fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
@@ -333,7 +414,9 @@
vp9_denoiser_free(denoiser);
return 1;
}
-
+#ifdef OUTPUT_YUV_DENOISED
+ make_grayscale(&denoiser->running_avg_y[i]);
+#endif
denoiser->increase_denoising = 0;
return 0;
@@ -353,3 +436,22 @@
vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
}
}
+
+#ifdef OUTPUT_YUV_DENOISED
+static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
+ int r, c;
+ uint8_t *u = yuv->u_buffer;
+ uint8_t *v = yuv->v_buffer;
+
+ // The '/2's are there because we have a 440 buffer, but we want to output
+ // 420.
+ for (r = 0; r < yuv->uv_height / 2; ++r) {
+ for (c = 0; c < yuv->uv_width / 2; ++c) {
+ u[c] = UINT8_MAX / 2;
+ v[c] = UINT8_MAX / 2;
+ }
+ u += yuv->uv_stride + yuv->uv_width / 2;
+ v += yuv->uv_stride + yuv->uv_width / 2;
+ }
+}
+#endif
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index 7855989..cbb6423 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -18,10 +18,10 @@
extern "C" {
#endif
-enum vp9_denoiser_decision {
+typedef enum vp9_denoiser_decision {
COPY_BLOCK,
FILTER_BLOCK
-};
+} VP9_DENOISER_DECISION;
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 94143d9..dab3ff7 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3074,6 +3074,7 @@
init_encode_frame_mb_context(cpi);
set_prev_mi(cm);
+ x->quant_fp = cpi->sf.use_quant_fp;
x->skip_txfm = 0;
if (sf->use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 0961f3b..d97226e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -306,6 +306,56 @@
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+ int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ int i, j;
+ const int16_t *src_diff;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+ switch (tx_size) {
+ case TX_32X32:
+ fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+ vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan_order->scan,
+ scan_order->iscan);
+ break;
+ case TX_16X16:
+ vp9_fdct16x16(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ case TX_8X8:
+ vp9_fdct8x8(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ case TX_4X4:
+ x->fwd_txm4x4(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -424,11 +474,15 @@
if (x->skip_txfm == 0) {
// full forward transform and quantization
- if (!x->skip_recode)
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ if (!x->skip_recode) {
+ if (x->quant_fp)
+ vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ else
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ }
} else if (x->skip_txfm == 2) {
// fast path forward transform and quantization
- vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
} else {
// skip forward transform
p->eobs[block] = 0;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 3196c99..0b8c3d2 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -24,6 +24,8 @@
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 54fb68b..a1007c0 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -65,7 +65,7 @@
// #define OUTPUT_YUV_REC
#ifdef OUTPUT_YUV_DENOISED
-FILE *yuv_denoised_file;
+FILE *yuv_denoised_file = NULL;
#endif
#ifdef OUTPUT_YUV_SRC
FILE *yuv_file;
@@ -199,6 +199,13 @@
vpx_free(cpi->source_diff_var);
cpi->source_diff_var = NULL;
}
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ vpx_free(cpi->twopass.this_frame_mb_stats.mb_stats);
+ cpi->twopass.this_frame_mb_stats.mb_stats = NULL;
+ }
+#endif
}
static void save_coding_context(VP9_COMP *cpi) {
@@ -657,9 +664,11 @@
cpi->ext_refresh_frame_context_pending = 0;
#if CONFIG_DENOISING
- vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS);
+ }
#endif
}
@@ -766,6 +775,17 @@
sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
+#if CONFIG_FP_MB_STATS
+ cpi->use_fp_mb_stats = 0;
+ if (cpi->use_fp_mb_stats) {
+ // a place holder for the mb stats obtained from the first pass
+ CHECK_MEM_ERROR(cm, cpi->twopass.this_frame_mb_stats.mb_stats,
+ vpx_calloc(cm->MBs * sizeof(FIRSTPASS_MB_STATS), 1));
+ } else {
+ cpi->twopass.this_frame_mb_stats.mb_stats = NULL;
+ }
+#endif
+
cpi->refresh_alt_ref_frame = 0;
// Note that at the moment multi_arf will not work with svc.
@@ -839,8 +859,12 @@
cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
+#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- yuv_denoised_file = fopen("denoised.yuv", "ab");
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ yuv_denoised_file = fopen("denoised.yuv", "ab");
+ }
+#endif
#endif
#ifdef OUTPUT_YUV_SRC
yuv_file = fopen("bd.yuv", "ab");
@@ -1079,7 +1103,9 @@
}
#if CONFIG_DENOISING
- vp9_denoiser_free(&(cpi->denoiser));
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_free(&(cpi->denoiser));
+ }
#endif
dealloc_compressor_data(cpi);
@@ -1093,8 +1119,12 @@
vp9_remove_common(&cpi->common);
vpx_free(cpi);
+#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- fclose(yuv_denoised_file);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ fclose(yuv_denoised_file);
+ }
+#endif
#endif
#ifdef OUTPUT_YUV_SRC
fclose(yuv_file);
@@ -1305,6 +1335,7 @@
}
#endif
+#if CONFIG_DENOISING
#if defined(OUTPUT_YUV_DENOISED)
// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
@@ -1336,6 +1367,7 @@
} while (--h);
}
#endif
+#endif
#ifdef OUTPUT_YUV_REC
void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
@@ -1574,12 +1606,14 @@
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
#if CONFIG_DENOISING
- vp9_denoiser_update_frame_info(&cpi->denoiser,
- *cpi->Source,
- cpi->common.frame_type,
- cpi->refresh_alt_ref_frame,
- cpi->refresh_golden_frame,
- cpi->refresh_last_frame);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_update_frame_info(&cpi->denoiser,
+ *cpi->Source,
+ cpi->common.frame_type,
+ cpi->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame,
+ cpi->refresh_last_frame);
+ }
#endif
}
@@ -2171,16 +2205,21 @@
}
#endif
-#ifdef OUTPUT_YUV_DENOISED
- vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
- yuv_denoised_file);
-#endif
#ifdef OUTPUT_YUV_SRC
vp9_write_yuv_frame(cpi->Source, yuv_file);
#endif
set_speed_features(cpi);
+#if CONFIG_DENOISING
+#ifdef OUTPUT_YUV_DENOISED
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
+ yuv_denoised_file);
+ }
+#endif
+#endif
+
// Decide q and q bounds.
q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 5e8430a..b38f9c2 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -348,6 +348,10 @@
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
+#if CONFIG_FP_MB_STATS
+ int use_fp_mb_stats;
+#endif
+
TWO_PASS twopass;
YV12_BUFFER_CONFIG alt_ref_buffer;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 971b159..d505ebf 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -89,23 +89,43 @@
// Read frame stats at an offset from the current position.
-static int read_frame_stats(const TWO_PASS *p,
- FIRSTPASS_STATS *frame_stats, int offset) {
- const FIRSTPASS_STATS *fps_ptr = p->stats_in;
-
- // Check legality of offset.
- if (offset >= 0) {
- if (&fps_ptr[offset] >= p->stats_in_end)
- return EOF;
- } else if (offset < 0) {
- if (&fps_ptr[offset] < p->stats_in_start)
- return EOF;
+static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
+ if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
+ (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
+ return NULL;
}
- *frame_stats = fps_ptr[offset];
+ return &p->stats_in[offset];
+}
+
+#if CONFIG_FP_MB_STATS
+static int input_mb_stats(FIRSTPASS_FRAME_MB_STATS *fp_frame_stats,
+ const VP9_COMMON *const cm) {
+ FILE *fpfile;
+ int ret;
+
+ fpfile = fopen("firstpass_mb.stt", "r");
+ fseek(fpfile, cm->current_video_frame * cm->MBs * sizeof(FIRSTPASS_MB_STATS),
+ SEEK_SET);
+ ret = fread(fp_frame_stats->mb_stats, sizeof(FIRSTPASS_MB_STATS), cm->MBs,
+ fpfile);
+ fclose(fpfile);
+ if (ret < cm->MBs) {
+ return EOF;
+ }
return 1;
}
+static void output_mb_stats(FIRSTPASS_FRAME_MB_STATS *fp_frame_stats,
+ const VP9_COMMON *const cm) {
+ FILE *fpfile;
+
+ fpfile = fopen("firstpass_mb.stt", "a");
+ fwrite(fp_frame_stats->mb_stats, sizeof(FIRSTPASS_MB_STATS), cm->MBs, fpfile);
+ fclose(fpfile);
+}
+#endif
+
static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
if (p->stats_in >= p->stats_in_end)
return EOF;
@@ -452,6 +472,10 @@
const MV zero_mv = {0, 0};
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+#if CONFIG_FP_MB_STATS
+ FIRSTPASS_FRAME_MB_STATS *this_frame_mb_stats = &twopass->this_frame_mb_stats;
+#endif
+
vp9_clear_system_state();
set_first_pass_params(cpi);
@@ -579,6 +603,17 @@
// Accumulate the intra error.
intra_error += (int64_t)this_error;
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mode =
+ DC_PRED;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].err =
+ this_error;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mv.as_int
+ = 0;
+ }
+#endif
+
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
@@ -704,6 +739,17 @@
best_ref_mv.as_int = mv.as_int;
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mode =
+ NEWMV;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].err =
+ motion_error;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mv.
+ as_int = mv.as_int;
+ }
+#endif
+
if (mv.as_int) {
++mvcount;
@@ -808,6 +854,12 @@
twopass->this_frame_stats = fps;
output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
accumulate_stats(&twopass->total_stats, &fps);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ output_mb_stats(this_frame_mb_stats, cm);
+ }
+#endif
}
// Copy the previous Last Frame back into gf and and arf buffers if
@@ -1053,24 +1105,16 @@
// score in the frame following a flash frame. The offset passed in should
// reflect this.
static int detect_flash(const TWO_PASS *twopass, int offset) {
- FIRSTPASS_STATS next_frame;
+ const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
- int flash_detected = 0;
-
- // Read the frame data.
- // The return is FALSE (no flash detected) if not a valid frame
- if (read_frame_stats(twopass, &next_frame, offset) != EOF) {
- // What we are looking for here is a situation where there is a
- // brief break in prediction (such as a flash) but subsequent frames
- // are reasonably well predicted by an earlier (pre flash) frame.
- // The recovery after a flash is indicated by a high pcnt_second_ref
- // compared to pcnt_inter.
- if (next_frame.pcnt_second_ref > next_frame.pcnt_inter &&
- next_frame.pcnt_second_ref >= 0.5)
- flash_detected = 1;
- }
-
- return flash_detected;
+ // What we are looking for here is a situation where there is a
+ // brief break in prediction (such as a flash) but subsequent frames
+ // are reasonably well predicted by an earlier (pre flash) frame.
+ // The recovery after a flash is indicated by a high pcnt_second_ref
+ // compared to pcnt_inter.
+ return next_frame != NULL &&
+ next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
+ next_frame->pcnt_second_ref >= 0.5;
}
// Update the motion related elements to the GF arf boost calculation.
@@ -1130,7 +1174,6 @@
static int calc_arf_boost(VP9_COMP *cpi, int offset,
int f_frames, int b_frames,
int *f_boost, int *b_boost) {
- FIRSTPASS_STATS this_frame;
TWO_PASS *const twopass = &cpi->twopass;
int i;
double boost_score = 0.0;
@@ -1144,11 +1187,12 @@
// Search forward from the proposed arf/next gf position.
for (i = 0; i < f_frames; ++i) {
- if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL)
break;
// Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(&this_frame,
+ accumulate_frame_motion_stats(this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
&mv_ratio_accumulator);
@@ -1160,12 +1204,12 @@
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ boost_score += decay_accumulator * calc_frame_boost(twopass, this_frame,
this_frame_mv_in_out);
}
@@ -1181,11 +1225,12 @@
// Search backward towards last gf position.
for (i = -1; i >= -b_frames; --i) {
- if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL)
break;
// Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(&this_frame,
+ accumulate_frame_motion_stats(this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
&mv_ratio_accumulator);
@@ -1197,12 +1242,12 @@
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ boost_score += decay_accumulator * calc_frame_boost(twopass, this_frame,
this_frame_mv_in_out);
}
*b_boost = (int)boost_score;
@@ -2167,6 +2212,12 @@
// Update the total stats remaining structure.
subtract_stats(&twopass->total_left_stats, &this_frame);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ input_mb_stats(&twopass->this_frame_mb_stats, cm);
+ }
+#endif
}
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 1ee56a3..7e4c9ee 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -18,6 +18,18 @@
extern "C" {
#endif
+#if CONFIG_FP_MB_STATS
+typedef struct {
+ PREDICTION_MODE mode;
+ int err;
+ int_mv mv;
+} FIRSTPASS_MB_STATS;
+
+typedef struct {
+ FIRSTPASS_MB_STATS *mb_stats;
+} FIRSTPASS_FRAME_MB_STATS;
+#endif
+
typedef struct {
double frame;
double intra_error;
@@ -76,6 +88,10 @@
double kf_intra_err_min;
double gf_intra_err_min;
+#if CONFIG_FP_MB_STATS
+ FIRSTPASS_FRAME_MB_STATS this_frame_mb_stats;
+#endif
+
// Projected total bits available for a key frame group of frames
int64_t kf_group_bits;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index d9edeae..22ad064 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -141,7 +141,7 @@
// TODO(jingning) exploiting adaptive motion search control in non-RD
// mode decision too.
- step_param = 6;
+ step_param = cpi->sf.mv.fullpel_search_step_param;
for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
@@ -440,7 +440,9 @@
int i;
#if CONFIG_DENOISING
- vp9_denoiser_reset_frame_stats(&cpi->denoiser);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_reset_frame_stats(&cpi->denoiser);
+ }
#endif
if (cpi->sf.reuse_inter_pred_sby) {
@@ -658,7 +660,9 @@
}
#if CONFIG_DENOISING
- vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y, this_mode);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y, this_mode);
+ }
#endif
if (this_rd < best_rd || x->skip) {
@@ -774,7 +778,9 @@
}
#if CONFIG_DENOISING
- vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
+ }
#endif
return INT64_MAX;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index f817bcc..1846da9 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -42,9 +42,9 @@
}
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int16_t *round_ptr, const int16_t quant,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
int eob = -1;
if (!skip_block) {
@@ -63,6 +63,47 @@
*eob_ptr = eob + 1;
}
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, eob = -1;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> 16;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -207,11 +248,16 @@
const int qrounding_factor = q == 0 ? 64 : 48;
for (i = 0; i < 2; ++i) {
+ int qrounding_factor_fp = i == 0 ? 48 : 42;
+ if (q == 0)
+ qrounding_factor_fp = 64;
+
// y
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
: vp9_ac_quant(q, 0);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
quants->y_quant_fp[q][i] = (1 << 16) / quant;
+ quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
cm->y_dequant[q][i] = quant;
@@ -222,6 +268,7 @@
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
+ quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
cm->uv_dequant[q][i] = quant;
@@ -240,6 +287,7 @@
for (i = 2; i < 8; i++) {
quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
+ quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
quants->y_zbin[q][i] = quants->y_zbin[q][1];
quants->y_round[q][i] = quants->y_round[q][1];
@@ -247,6 +295,7 @@
quants->uv_quant[q][i] = quants->uv_quant[q][1];
quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
+ quants->uv_round_fp[q][i] = quants->uv_round_fp[q][1];
quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
quants->uv_round[q][i] = quants->uv_round[q][1];
@@ -276,6 +325,7 @@
// Y
x->plane[0].quant = quants->y_quant[qindex];
x->plane[0].quant_fp = quants->y_quant_fp[qindex];
+ x->plane[0].round_fp = quants->y_round_fp[qindex];
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
@@ -286,6 +336,7 @@
for (i = 1; i < 3; i++) {
x->plane[i].quant = quants->uv_quant[qindex];
x->plane[i].quant_fp = quants->uv_quant_fp[qindex];
+ x->plane[i].round_fp = quants->uv_round_fp[qindex];
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 0e90462..24e4491 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -28,6 +28,8 @@
// if we want to deprecate the current use of y_quant.
DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_round_fp[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e1a03a6..a4cdd33 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -4289,6 +4289,10 @@
rd->thresh_mult[THR_NEWA] += 1000;
rd->thresh_mult[THR_NEWG] += 1000;
+ // Adjust threshold only in real time mode, which only use last reference
+ // frame.
+ rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
+
rd->thresh_mult[THR_NEARMV] += 1000;
rd->thresh_mult[THR_NEARA] += 1000;
rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
@@ -4351,10 +4355,6 @@
rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
}
-
- // Adjust threshold only in real time mode, which only use last reference
- // frame.
- rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
}
void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 574df62..897ae01 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -27,6 +27,8 @@
void vp9_disable_segmentation(struct segmentation *seg) {
seg->enabled = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
}
void vp9_set_segment_data(struct segmentation *seg,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8111870..1eac02f 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -282,6 +282,8 @@
sf->elevate_newmv_thresh = 2000;
}
if (speed >= 7) {
+ sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
+ sf->mv.fullpel_search_step_param = 10;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
800 : 300;
@@ -311,6 +313,7 @@
sf->mv.reduce_first_step_size = 0;
sf->mv.auto_mv_step_size = 0;
sf->mv.max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
@@ -318,6 +321,7 @@
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
+ sf->use_quant_fp = 0;
sf->reference_masking = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
@@ -357,7 +361,6 @@
sf->search_type_check_frequency = 50;
sf->encode_breakout_thresh = 0;
sf->elevate_newmv_thresh = 0;
-
// Recode loop tolerence %.
sf->recode_tolerance = 25;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index f6d6311..4ccb77a 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -162,6 +162,9 @@
// Control when to stop subpel search
int subpel_force_stop;
+
+ // This variable sets the step_param used in full pel motion search.
+ int fullpel_search_step_param;
} MV_SPEED_FEATURES;
typedef struct SPEED_FEATURES {
@@ -284,6 +287,9 @@
// was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
int adaptive_pred_interp_filter;
+ // Fast quantization process path
+ int use_quant_fp;
+
// Search through variable block partition types in non-RD mode decision
// encoding process for RTC.
int partition_check;
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 48ccef8..62da865 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -217,3 +217,185 @@
INIT_XMM ssse3
QUANTIZE_FN b, 7
QUANTIZE_FN b_32x32, 7
+
+%macro QUANTIZE_FP 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+ eob, scan, iscan
+ cmp dword skipm, 0
+ jne .blank
+
+ ; actual quantize loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+%ifidn %1, b_32x32
+; TODO(jingning) to be continued with 32x32 quantization process
+ pcmpeqw m5, m5
+ psrlw m5, 15
+ paddw m0, m5
+ paddw m1, m5
+ psrlw m0, 1 ; m0 = (m0 + 1) / 2
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ mova m3, [r2q] ; m3 = dequant
+ mov r3, qcoeffmp
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, b_32x32
+ psllw m4, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea iscanq, [ iscanq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpeqw m7, m7
+ pcmpeqw m12, m12
+
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [qcoeffq+ncoeffq*2+ 0], m8
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m8
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jz .accumulate_eob
+
+.ac_only_loop:
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpeqw m7, m7
+ pcmpeqw m12, m12
+%ifidn %1, b_32x32
+ pmovmskb r6, m7
+ pmovmskb r2, m12
+ or r6, r2
+ jz .skip_iter
+%endif
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [qcoeffq+ncoeffq*2+ 0], m14
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; dqc[i] = qc[i] * q
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m14
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+%ifidn %1, b_32x32
+ jmp .accumulate_eob
+.skip_iter:
+ mova [qcoeffq+ncoeffq*2+ 0], m5
+ mova [qcoeffq+ncoeffq*2+16], m5
+ mova [dqcoeffq+ncoeffq*2+ 0], m5
+ mova [dqcoeffq+ncoeffq*2+16], m5
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+%endif
+
+.accumulate_eob:
+ ; horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ pextrw r6, m8, 0
+ mov [r2], r6
+ RET
+
+ ; skip-block, i.e. just write all zeroes
+.blank:
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+ DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ neg ncoeffq
+ pxor m7, m7
+.blank_loop:
+ mova [dqcoeffq+ncoeffq*2+ 0], m7
+ mova [dqcoeffq+ncoeffq*2+16], m7
+ mova [qcoeffq+ncoeffq*2+ 0], m7
+ mova [qcoeffq+ncoeffq*2+16], m7
+ add ncoeffq, mmsize
+ jl .blank_loop
+ mov word [eobq], 0
+ RET
+%endmacro
+
+INIT_XMM ssse3
+QUANTIZE_FP fp, 7
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index d60883c..b90c37b 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -31,6 +31,7 @@
img->fmt = VPX_IMG_FMT_I420;
bps = 12;
}
+ img->bit_depth = 8;
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index 36eda95..dc8fcbc 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -40,13 +40,13 @@
}
}
-static vpx_image_t *img_alloc_helper(vpx_image_t *img,
- vpx_img_fmt_t fmt,
- unsigned int d_w,
- unsigned int d_h,
- unsigned int buf_align,
- unsigned int stride_align,
- unsigned char *img_data) {
+static vpx_image_t *img_alloc_helper(vpx_image_t *img,
+ vpx_img_fmt_t fmt,
+ unsigned int d_w,
+ unsigned int d_h,
+ unsigned int buf_align,
+ unsigned int stride_align,
+ unsigned char *img_data) {
unsigned int h, w, s, xcs, ycs, bps;
int align;
@@ -94,6 +94,21 @@
case VPX_IMG_FMT_VPXYV12:
bps = 12;
break;
+ case VPX_IMG_FMT_I422:
+ bps = 16;
+ break;
+ case VPX_IMG_FMT_I444:
+ bps = 24;
+ break;
+ case VPX_IMG_FMT_I42016:
+ bps = 24;
+ break;
+ case VPX_IMG_FMT_I42216:
+ bps = 32;
+ break;
+ case VPX_IMG_FMT_I44416:
+ bps = 48;
+ break;
default:
bps = 16;
break;
@@ -105,6 +120,9 @@
case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_VPXI420:
case VPX_IMG_FMT_VPXYV12:
+ case VPX_IMG_FMT_I422:
+ case VPX_IMG_FMT_I42016:
+ case VPX_IMG_FMT_I42216:
xcs = 1;
break;
default:
@@ -156,6 +174,7 @@
goto fail;
img->fmt = fmt;
+ img->bit_depth = (fmt & VPX_IMG_FMT_HIGH) ? 16 : 8;
img->w = w;
img->h = h;
img->x_chroma_shift = xcs;
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 03d2dec..45e7023 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -212,6 +212,15 @@
vpx_codec_priv_t *priv; /**< Algorithm private storage */
} vpx_codec_ctx_t;
+ /*!\brief Bit depth for codec
+ * *
+ * This enumeration determines the bit depth of the codec.
+ */
+ typedef enum vpx_bit_depth {
+ VPX_BITS_8, /**< 8 bits */
+ VPX_BITS_10, /**< 10 bits */
+ VPX_BITS_12 /**< 12 bits */
+ } vpx_bit_depth_t;
/*
* Library Version Number Interface
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index d45b003..7b04b70 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -103,8 +103,9 @@
vpx_img_fmt_t fmt; /**< Image Format */
/* Image storage dimensions */
- unsigned int w; /**< Stored image width */
- unsigned int h; /**< Stored image height */
+ unsigned int w; /**< Stored image width */
+ unsigned int h; /**< Stored image height */
+ unsigned int bit_depth; /**< Stored image bit-depth */
/* Image display dimensions */
unsigned int d_w; /**< Displayed image width */
diff --git a/vpxdec.c b/vpxdec.c
index a3a1da5..1213ab6 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -900,7 +900,8 @@
len = y4m_write_file_header(buf, sizeof(buf),
vpx_input_ctx.width,
vpx_input_ctx.height,
- &vpx_input_ctx.framerate, img->fmt);
+ &vpx_input_ctx.framerate,
+ img->fmt, 8);
if (do_md5) {
MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
} else {
diff --git a/vpxenc.c b/vpxenc.c
index d46a83e..fce6807 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -756,6 +756,7 @@
input->framerate.numerator = input->y4m.fps_n;
input->framerate.denominator = input->y4m.fps_d;
input->fmt = input->y4m.vpx_fmt;
+ input->bit_depth = input->y4m.bit_depth;
} else
fatal("Unsupported Y4M stream.");
} else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
@@ -1533,6 +1534,7 @@
input.framerate.numerator = 30;
input.framerate.denominator = 1;
input.only_i420 = 1;
+ input.bit_depth = 0;
/* First parse the global configuration values, because we want to apply
* other parameters on top of the default configuration provided by the
diff --git a/y4menc.c b/y4menc.c
index 8b1c95e..9211452 100644
--- a/y4menc.c
+++ b/y4menc.c
@@ -8,16 +8,48 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "./y4menc.h"
int y4m_write_file_header(char *buf, size_t len, int width, int height,
const struct VpxRational *framerate,
- vpx_img_fmt_t fmt) {
- const char *const color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
- fmt == VPX_IMG_FMT_I444 ? "C444\n" :
- fmt == VPX_IMG_FMT_I422 ? "C422\n" :
- "C420jpeg\n";
-
+ vpx_img_fmt_t fmt, unsigned int bit_depth) {
+ const char *color;
+ switch (bit_depth) {
+ case 8:
+ color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
+ fmt == VPX_IMG_FMT_I444 ? "C444\n" :
+ fmt == VPX_IMG_FMT_I422 ? "C422\n" :
+ "C420jpeg\n";
+ break;
+ case 9:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p9 XYSCSS=444P9\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9\n" :
+ "C420p9 XYSCSS=420P9\n";
+ break;
+ case 10:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p10 XYSCSS=444P10\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10\n" :
+ "C420p10 XYSCSS=420P10\n";
+ break;
+ case 12:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p12 XYSCSS=444P12\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12\n" :
+ "C420p12 XYSCSS=420P12\n";
+ break;
+ case 14:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p14 XYSCSS=444P14\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14\n" :
+ "C420p14 XYSCSS=420P14\n";
+ break;
+ case 16:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p16 XYSCSS=444P16\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16\n" :
+ "C420p16 XYSCSS=420P16\n";
+ break;
+ default:
+ assert(0);
+ }
return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
framerate->numerator, framerate->denominator, 'p', color);
}
diff --git a/y4menc.h b/y4menc.h
index 0fabf56..69d5904 100644
--- a/y4menc.h
+++ b/y4menc.h
@@ -23,7 +23,7 @@
int y4m_write_file_header(char *buf, size_t len, int width, int height,
const struct VpxRational *framerate,
- vpx_img_fmt_t fmt);
+ vpx_img_fmt_t fmt, unsigned int bit_depth);
int y4m_write_frame_header(char *buf, size_t len);
#ifdef __cplusplus
diff --git a/y4minput.c b/y4minput.c
index 90c5310..b005b71 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -737,15 +737,52 @@
return -1;
}
_y4m->vpx_fmt = VPX_IMG_FMT_I420;
- _y4m->vpx_bps = 12;
+ _y4m->bps = 12;
+ _y4m->bit_depth = 8;
if (strcmp(_y4m->chroma_type, "420") == 0 ||
strcmp(_y4m->chroma_type, "420jpeg") == 0) {
_y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
+ 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- /*Natively supported: no conversion required.*/
+ /* Natively supported: no conversion required. */
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
+ } else if (strcmp(_y4m->chroma_type, "420p10") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 2;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) *
+ ((_y4m->pic_h + 1) / 2));
+ /* Natively supported: no conversion required. */
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ _y4m->bit_depth = 10;
+ _y4m->bps = 15;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "420p12") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 2;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) *
+ ((_y4m->pic_h + 1) / 2));
+ /* Natively supported: no conversion required. */
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ _y4m->bit_depth = 12;
+ _y4m->bps = 18;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
_y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
@@ -786,7 +823,7 @@
_y4m->convert = y4m_convert_422_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_I422;
- _y4m->vpx_bps = 16;
+ _y4m->bps = 16;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
@@ -794,7 +831,39 @@
/*Natively supported: no conversion required.*/
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
- }
+ }
+ } else if (strcmp(_y4m->chroma_type, "422p10") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
+ _y4m->bps = 20;
+ _y4m->bit_depth = 10;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "422p12") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
+ _y4m->bps = 24;
+ _y4m->bit_depth = 12;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "411") == 0) {
_y4m->src_c_dec_h = 4;
_y4m->dst_c_dec_h = 2;
@@ -823,7 +892,7 @@
_y4m->convert = y4m_convert_444_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_I444;
- _y4m->vpx_bps = 24;
+ _y4m->bps = 24;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
@@ -831,6 +900,36 @@
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
}
+ } else if (strcmp(_y4m->chroma_type, "444p10") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
+ _y4m->bps = 30;
+ _y4m->bit_depth = 10;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "444p12") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
+ _y4m->bps = 36;
+ _y4m->bit_depth = 12;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
_y4m->src_c_dec_h = 1;
_y4m->src_c_dec_v = 1;
@@ -847,7 +946,7 @@
_y4m->convert = y4m_convert_444_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_444A;
- _y4m->vpx_bps = 32;
+ _y4m->bps = 32;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
@@ -871,7 +970,10 @@
_y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
+ 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
- _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ if (_y4m->bit_depth == 8)
+ _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ else
+ _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
_y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
return 0;
}
@@ -887,6 +989,7 @@
int c_w;
int c_h;
int c_sz;
+ int bytes_per_sample = _y4m->bit_depth > 8 ? 2 : 1;
/*Read and skip the frame header.*/
if (!file_read(frame, 6, _fin)) return 0;
if (memcmp(frame, "FRAME", 5)) {
@@ -924,14 +1027,16 @@
_img->h = _img->d_h = _y4m->pic_h;
_img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
_img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
- _img->bps = _y4m->vpx_bps;
+ _img->bps = _y4m->bps;
/*Set up the buffer pointers.*/
- pic_sz = _y4m->pic_w * _y4m->pic_h;
+ pic_sz = _y4m->pic_w * _y4m->pic_h * bytes_per_sample;
c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ c_w *= bytes_per_sample;
c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
c_sz = c_w * c_h;
- _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
+ _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] =
+ _y4m->pic_w * bytes_per_sample;
_img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
_img->planes[PLANE_Y] = _y4m->dst_buf;
_img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
diff --git a/y4minput.h b/y4minput.h
index d53eb65..356cebb 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -58,7 +58,8 @@
unsigned char *dst_buf;
unsigned char *aux_buf;
enum vpx_img_fmt vpx_fmt;
- int vpx_bps;
+ int bps;
+ unsigned int bit_depth;
};
int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,