Merge "Alternate reference frame" into nextgenv2
diff --git a/.mailmap b/.mailmap
index 0bfda12..42f3617 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,14 +1,21 @@
Adrian Grange <agrange@google.com>
-Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
+Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com>
+Aℓex Converse <aconverse@google.com>
+Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
Hangyu Kuang <hkuang@google.com>
+Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com>
+Hui Su <huisu@google.com>
+Jacky Chen <jackychen@google.com>
Jim Bankoski <jimbankoski@google.com>
Johann Koenig <johannkoenig@google.com>
Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
John Koleszar <jkoleszar@google.com>
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
Marco Paniconi <marpan@google.com>
@@ -17,10 +24,13 @@
Paul Wilkins <paulwilkins@google.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
+Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
+Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com>
diff --git a/AUTHORS b/AUTHORS
index 2f63d7c..f89b677 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -5,9 +5,9 @@
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <aconverse@google.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
@@ -16,8 +16,10 @@
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
chm <chm@rock-chips.com>
@@ -27,6 +29,7 @@
Dim Temp <dimtemp0@gmail.com>
Dmitry Kovalev <dkovalev@google.com>
Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
Ehsan Akhgari <ehsan.akhgari@gmail.com>
Erik Niemeyer <erik.a.niemeyer@intel.com>
Fabio Pedretti <fabio.ped@libero.it>
@@ -34,6 +37,8 @@
Fredrik Söderquist <fs@opera.com>
Fritz Koenig <frkoenig@google.com>
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Guillaume Martres <gmartres@google.com>
@@ -44,7 +49,7 @@
Hui Su <huisu@google.com>
Ivan Maltz <ivanmaltz@google.com>
Jacek Caban <cjacek@gmail.com>
-JackyChen <jackychen@google.com>
+Jacky Chen <jackychen@google.com>
James Berry <jamesberry@google.com>
James Yu <james.yu@linaro.org>
James Zern <jzern@google.com>
@@ -60,9 +65,11 @@
Joey Parrish <joeyparrish@google.com>
Johann Koenig <johannkoenig@google.com>
John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
John Stark <jhnstrk@gmail.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
KO Myung-Hun <komh@chollian.net>
@@ -82,6 +89,7 @@
Mikhal Shemer <mikhal@google.com>
Minghai Shang <minghai@google.com>
Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
Parag Salasakar <img.mips1@gmail.com>
Pascal Massimino <pascal.massimino@gmail.com>
Patrik Westin <patrik.westin@gmail.com>
@@ -96,7 +104,7 @@
Rafaël Carré <funman@videolan.org>
Ralph Giles <giles@xiph.org>
Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rbultje@google.com>
+Ronald S. Bultje <rsbultje@gmail.com>
Rui Ueyama <ruiu@google.com>
Sami Pietilä <samipietila@google.com>
Scott Graham <scottmg@chromium.org>
@@ -104,6 +112,7 @@
Sean McGovern <gseanmcg@gmail.com>
Sergey Ulanov <sergeyu@chromium.org>
Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
Taekhyun Kim <takim@nvidia.com>
diff --git a/CHANGELOG b/CHANGELOG
index b0d3064..7746cc6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,19 @@
-xxxx-yy-zz v1.4.0 "Changes for next release"
- vpxenc is changed to use VP9 by default.
- Encoder controls added for 1 pass SVC.
- Decoder control to toggle on/off loopfilter.
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+ This release improves upon the VP9 encoder and speeds up the encoding and
+ decoding processes.
+
+ - Upgrading:
+ This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+ controls and adds a variety of VP9 controls for testing.
+
+ The vpxenc utility now prefers VP9 by default.
+
+ - Enhancements:
+ Faster VP9 encoding and decoding
+ Smaller library size by combining functions used by VP8 and VP9
+
+ - Bug Fixes:
+ A variety of fuzzing issues
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.
diff --git a/configure b/configure
index eca4a2b..315c427 100755
--- a/configure
+++ b/configure
@@ -273,6 +273,7 @@
ext_inter
ext_interp
ext_refs
+ supertx
"
CONFIG_LIST="
dependency_tracking
diff --git a/libs.mk b/libs.mk
index c65df6b..e6fb068 100644
--- a/libs.mk
+++ b/libs.mk
@@ -260,7 +260,7 @@
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 2
+SO_VERSION_MAJOR := 3
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
@@ -429,12 +429,10 @@
if [ -n "$${sha1sum}" ]; then\
set -e;\
echo "Checking test data:";\
- if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
- for f in $(call enabled,LIBVPX_TEST_DATA); do\
- grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
- (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
- done; \
- fi; \
+ for f in $(call enabled,LIBVPX_TEST_DATA); do\
+ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
+ (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
+ done; \
else\
echo "Skipping test data integrity check, sha1sum not found.";\
fi
diff --git a/test/resize_test.cc b/test/resize_test.cc
index 98b6f87..bc91fe2 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -286,11 +286,11 @@
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
-class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
+class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
protected:
- ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
- virtual ~ResizeInternalRealtimeTest() {}
+ ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+ virtual ~ResizeRealtimeTest() {}
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
@@ -318,8 +318,6 @@
}
void DefaultConfig() {
- cfg_.g_w = 352;
- cfg_.g_h = 288;
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 600;
cfg_.rc_buf_sz = 1000;
@@ -346,13 +344,34 @@
bool change_bitrate_;
};
+TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
+ ResizingVideoSource video;
+ DefaultConfig();
+ change_bitrate_ = false;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ const unsigned int frame = static_cast<unsigned>(info->pts);
+ const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
+ const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
+
+ EXPECT_EQ(expected_w, info->w)
+ << "Frame " << frame << " had unexpected width";
+ EXPECT_EQ(expected_h, info->h)
+ << "Frame " << frame << " had unexpected height";
+ }
+}
+
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Run at low bitrate, with resize_allowed = 1, and verify that we get
// one resize down event.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 299);
DefaultConfig();
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
change_bitrate_ = false;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -378,15 +397,17 @@
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
// Start at low target bitrate, raise the bitrate in the middle of the clip,
// scaling-up should occur after bitrate changed.
-TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
- 30, 1, 0, 299);
+ 30, 1, 0, 359);
DefaultConfig();
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
change_bitrate_ = true;
// Disable dropped frames.
cfg_.rc_dropframe_thresh = 0;
// Starting bitrate low.
- cfg_.rc_target_bitrate = 100;
+ cfg_.rc_target_bitrate = 80;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
unsigned int last_w = cfg_.g_w;
@@ -411,7 +432,7 @@
}
// Verify that we get 2 resize events in this test.
- ASSERT_EQ(2, resize_count) << "Resizing should occur twice.";
+ ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
}
vpx_img_fmt_t CspForFrameNumber(int frame) {
@@ -524,7 +545,7 @@
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
::testing::Values(::libvpx_test::kOnePassBest));
-VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
+VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
::testing::Values(::libvpx_test::kRealTime),
::testing::Range(5, 9));
VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
diff --git a/test/test.mk b/test/test.mk
index 0ac9a8a..face2ad 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -173,6 +173,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_txfm_test.h
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm1d_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm1d_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_fwd_txfm2d_test.cc
endif # CONFIG_SHARED
diff --git a/test/vp10_fwd_txfm2d_test.cc b/test/vp10_fwd_txfm2d_test.cc
new file mode 100644
index 0000000..e6416cc
--- /dev/null
+++ b/test/vp10_fwd_txfm2d_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/vp10_txfm_test.h"
+#include "vp10/common/vp10_fwd_txfm2d.h"
+#include "vp10/common/vp10_fwd_txfm2d_cfg.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+
+const int txfm_size_num = 4;
+const int txfm_size_ls[4] = {4, 8, 16, 32};
+const TXFM_2D_CFG fwd_txfm_cfg_ls[4][4] = {
+ {fwd_txfm_2d_cfg_dct_dct_4, fwd_txfm_2d_cfg_dct_adst_4,
+ fwd_txfm_2d_cfg_adst_adst_4, fwd_txfm_2d_cfg_adst_dct_4},
+ {fwd_txfm_2d_cfg_dct_dct_8, fwd_txfm_2d_cfg_dct_adst_8,
+ fwd_txfm_2d_cfg_adst_adst_8, fwd_txfm_2d_cfg_adst_dct_8},
+ {fwd_txfm_2d_cfg_dct_dct_16, fwd_txfm_2d_cfg_dct_adst_16,
+ fwd_txfm_2d_cfg_adst_adst_16, fwd_txfm_2d_cfg_adst_dct_16},
+ {fwd_txfm_2d_cfg_dct_dct_32, fwd_txfm_2d_cfg_dct_adst_32,
+ fwd_txfm_2d_cfg_adst_adst_32, fwd_txfm_2d_cfg_adst_dct_32}};
+
+const Fwd_Txfm2d_Func fwd_txfm_func_ls[4] = {
+ vp10_fwd_txfm2d_4x4, vp10_fwd_txfm2d_8x8, vp10_fwd_txfm2d_16x16,
+ vp10_fwd_txfm2d_32x32};
+
+const int txfm_type_num = 4;
+const TYPE_TXFM type_ls_0[4] = {TYPE_DCT, TYPE_DCT, TYPE_ADST, TYPE_ADST};
+const TYPE_TXFM type_ls_1[4] = {TYPE_DCT, TYPE_ADST, TYPE_ADST, TYPE_DCT};
+
+TEST(vp10_fwd_txfm2d, accuracy) {
+ for (int txfm_size_idx = 0; txfm_size_idx < txfm_size_num; ++txfm_size_idx) {
+ int txfm_size = txfm_size_ls[txfm_size_idx];
+ int sqr_txfm_size = txfm_size * txfm_size;
+ int16_t* input = new int16_t[sqr_txfm_size];
+ int32_t* output = new int32_t[sqr_txfm_size];
+ double* ref_input = new double[sqr_txfm_size];
+ double* ref_output = new double[sqr_txfm_size];
+
+ for (int txfm_type_idx = 0; txfm_type_idx < txfm_type_num;
+ ++txfm_type_idx) {
+ TXFM_2D_CFG fwd_txfm_cfg = fwd_txfm_cfg_ls[txfm_size_idx][txfm_type_idx];
+ Fwd_Txfm2d_Func fwd_txfm_func = fwd_txfm_func_ls[txfm_size_idx];
+ TYPE_TXFM type0 = type_ls_0[txfm_type_idx];
+ TYPE_TXFM type1 = type_ls_1[txfm_type_idx];
+ int amplify_bit =
+ fwd_txfm_cfg.shift[0] + fwd_txfm_cfg.shift[1] + fwd_txfm_cfg.shift[2];
+ double amplify_factor =
+ amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int count = 5000;
+ double avg_abs_error = 0;
+ for (int ci = 0; ci < count; ci++) {
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ input[ni] = rnd.Rand16() % base;
+ ref_input[ni] = static_cast<double>(input[ni]);
+ output[ni] = 0;
+ ref_output[ni] = 0;
+ }
+
+ fwd_txfm_func(input, output, txfm_size, &fwd_txfm_cfg, bd);
+ reference_hybrid_2d(ref_input, ref_output, txfm_size, type0, type1);
+
+ for (int ni = 0; ni < sqr_txfm_size; ++ni) {
+ ref_output[ni] = round(ref_output[ni] * amplify_factor);
+ EXPECT_LE(fabs(output[ni] - ref_output[ni]) / amplify_factor, 30);
+ }
+ avg_abs_error += compute_avg_abs_error<int32_t, double>(
+ output, ref_output, sqr_txfm_size);
+ }
+
+ avg_abs_error /= amplify_factor;
+ avg_abs_error /= count;
+ // max_abs_avg_error comes from upper bound of avg_abs_error
+ // printf("type0: %d type1: %d txfm_size: %d accuracy_avg_abs_error:
+ // %f\n", type0, type1, txfm_size, avg_abs_error);
+ double max_abs_avg_error = 1.5;
+ EXPECT_LE(avg_abs_error, max_abs_avg_error);
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+}
+
+} // anonymous namespace
diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h
index c4dce60..03e34e0 100644
--- a/vp10/common/blockd.h
+++ b/vp10/common/blockd.h
@@ -89,7 +89,6 @@
// 1: an ext intra mode is used; 0: otherwise.
uint8_t use_ext_intra_mode[PLANE_TYPES];
EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
- uint8_t ext_intra_angle[PLANE_TYPES];
} EXT_INTRA_MODE_INFO;
#endif // CONFIG_EXT_INTRA
@@ -124,6 +123,7 @@
#if CONFIG_EXT_INTRA
EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int8_t angle_delta[2];
#endif // CONFIG_EXT_INTRA
// TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
@@ -328,14 +328,14 @@
#endif // CONFIG_EXT_TX
#if CONFIG_EXT_INTRA
-// 0: use both directional and filter modes; 1: use directional modes only.
-#define DR_ONLY 0
-// 0: use slow exhaustive search; 1: use fast sub-optimal search.
+#define ALLOW_FILTER_INTRA_MODES 1
+#define ANGLE_STEP 3
+#define MAX_ANGLE_DELTAS 3
#define ANGLE_FAST_SEARCH 1
-// A parameter to adjust early termination in the fast search of angles.
-#define RD_ADJUSTER 1.4
-// Number of different angles that are supported
-#define EXT_INTRA_ANGLES 128
+
+static uint8_t mode_to_angle_map[INTRA_MODES] = {
+ 0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
+};
static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
DCT_DCT, // FILTER_DC
@@ -349,13 +349,6 @@
ADST_DCT, // FILTER_D63
ADST_ADST, // FILTER_TM
};
-
-// Maps the angle index to the actual prediction angle (in degrees).
-// Angle index is in the range [0, EXT_INTRA_ANGLES); the actual prediction
-// angle is in the range (0, 270).
-static INLINE int prediction_angle_map(int angle_in) {
- return (10 + 2 * angle_in);
-}
#endif // CONFIG_EXT_INTRA
static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
@@ -363,33 +356,44 @@
int block_idx, TX_SIZE tx_size) {
const MODE_INFO *const mi = xd->mi[0];
const MB_MODE_INFO *const mbmi = &mi->mbmi;
-#if CONFIG_EXT_INTRA
- const int use_ext_intra_mode_info =
- mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
- const EXT_INTRA_MODE ext_intra_mode =
- mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
- if (!is_inter_block(mbmi) && use_ext_intra_mode_info) {
- if (!xd->lossless[mbmi->segment_id] && tx_size < TX_32X32
+#if CONFIG_EXT_INTRA
+ if (!is_inter_block(mbmi)) {
+ const int use_ext_intra_mode_info =
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+ const EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+ const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y) ?
+ get_y_mode(mi, block_idx) : mbmi->uv_mode;
+
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+ return DCT_DCT;
+
#if CONFIG_EXT_TX
- && !(mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+ if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+ return mbmi->tx_type;
#endif // CONFIG_EXT_TX
- ) {
- if (ext_intra_mode > FILTER_TM_PRED) {
- int angle = mbmi->ext_intra_mode_info.ext_intra_angle[plane_type];
- angle = prediction_angle_map(angle);
- assert(angle > 0 && angle < 270);
- if (angle == 135)
- return ADST_ADST;
- else if (angle < 45 || angle > 225)
- return DCT_DCT;
- else if (angle < 135)
- return ADST_DCT;
- else
- return DCT_ADST;
- } else {
- return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
- }
+
+ if (use_ext_intra_mode_info)
+ return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+
+ if (mode == DC_PRED) {
+ return DCT_DCT;
+ } else if (mode == TM_PRED) {
+ return ADST_ADST;
+ } else {
+ int angle = mode_to_angle_map[mode];
+ if (mbmi->sb_type >= BLOCK_8X8)
+ angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+ assert(angle > 0 && angle < 270);
+ if (angle == 135)
+ return ADST_ADST;
+ else if (angle < 45 || angle > 225)
+ return DCT_DCT;
+ else if (angle < 135)
+ return ADST_DCT;
+ else
+ return DCT_ADST;
}
}
#endif // CONFIG_EXT_INTRA
diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index f2502b9..ceb55df 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -774,7 +774,23 @@
#if CONFIG_EXT_TX
const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
[TREE_SIZE(TX_TYPES)] = {
- {
+ { // ToDo(yaowu): remove used entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
-DCT_DCT, 4,
@@ -809,7 +825,23 @@
const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
[TREE_SIZE(TX_TYPES)] = {
- {
+ { // ToDo(yaowu): remove unused entry 0.
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -DST_DST, 6,
+ 8, 18,
+ 10, 12,
+ -DST_DCT, -DCT_DST,
+ 14, 16,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 20, 26,
+ 22, 24,
+ -DST_ADST, -ADST_DST,
+ -DST_FLIPADST, -FLIPADST_DST,
+ 28, 30,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST,
}, {
-IDTX, 2,
-DCT_DCT, 4,
@@ -832,8 +864,17 @@
static const vpx_prob
default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
- {
- // unused
+ { // ToDo(yaowu): remove unused entry 0.
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128 },
+#endif
}, {
{ 12, 112, 16, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128 },
@@ -865,8 +906,94 @@
static const vpx_prob
default_intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES]
[INTRA_MODES][TX_TYPES - 1] = {
- {
- // unused
+ { // ToDo(yaowu): remove unused entry 0.
+ {
+ { 8, 11, 24, 112, 87, 137, 127, 134,
+ 128, 86, 128, 124, 125, 133, 176, 123, },
+ { 10, 9, 39, 106, 73, 155, 163, 228,
+ 35, 62, 129, 127, 133, 114, 213, 234, },
+ { 10, 9, 14, 88, 91, 127, 151, 51,
+ 210, 89, 126, 58, 52, 116, 217, 24, },
+ { 9, 6, 29, 113, 98, 131, 149, 210,
+ 119, 60, 124, 93, 90, 143, 170, 197, },
+ { 8, 8, 38, 101, 111, 166, 167, 141,
+ 130, 105, 128, 75, 75, 118, 197, 117, },
+ { 7, 8, 39, 91, 101, 153, 166, 200,
+ 99, 77, 123, 90, 83, 144, 224, 192, },
+ { 7, 10, 26, 86, 119, 154, 130, 101,
+ 152, 91, 129, 75, 79, 137, 219, 77, },
+ { 10, 13, 20, 86, 102, 162, 112, 76,
+ 171, 86, 134, 122, 106, 124, 196, 44, },
+ { 8, 9, 33, 108, 100, 144, 148, 215,
+ 77, 60, 125, 125, 128, 126, 198, 220, },
+ { 3, 10, 29, 111, 69, 141, 204, 141,
+ 139, 93, 120, 75, 77, 163, 242, 124, },
+ }, {
+ { 2, 53, 18, 147, 96, 98, 136, 133,
+ 131, 120, 153, 163, 169, 137, 173, 124, },
+ { 4, 18, 34, 133, 54, 130, 179, 228,
+ 28, 72, 153, 164, 168, 118, 227, 239, },
+ { 4, 18, 13, 125, 72, 110, 176, 36,
+ 221, 104, 148, 75, 72, 117, 225, 19, },
+ { 8, 33, 24, 162, 113, 99, 147, 226,
+ 103, 85, 153, 143, 153, 124, 155, 210, },
+ { 2, 15, 35, 107, 127, 158, 192, 128,
+ 126, 116, 151, 95, 88, 182, 241, 119, },
+ { 3, 15, 36, 112, 100, 146, 194, 189,
+ 90, 98, 152, 99, 100, 165, 235, 175, },
+ { 3, 16, 29, 109, 103, 140, 182, 76,
+ 173, 104, 147, 82, 85, 159, 235, 70, },
+ { 9, 24, 14, 120, 86, 156, 161, 34,
+ 177, 121, 142, 128, 128, 126, 185, 37, },
+ { 5, 24, 29, 152, 98, 99, 174, 228,
+ 82, 76, 147, 149, 128, 132, 191, 225, },
+ { 2, 15, 29, 111, 77, 126, 200, 135,
+ 117, 93, 152, 96, 84, 191, 245, 135, },
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#if EXT_TX_SIZES == 4
+ }, {
+ { 2, 69, 13, 173, 111, 69, 137, 159,
+ 159, 146, 151, 193, 203, 131, 180, 123, },
+ { 1, 12, 33, 164, 32, 98, 204, 242,
+ 23, 99, 149, 215, 232, 110, 239, 245, },
+ { 1, 17, 9, 136, 82, 83, 171, 28,
+ 231, 128, 135, 76, 64, 118, 235, 17, },
+ { 4, 41, 17, 195, 131, 58, 161, 237,
+ 141, 97, 153, 189, 191, 117, 182, 202, },
+ { 2, 17, 36, 104, 149, 137, 217, 139,
+ 191, 119, 125, 107, 115, 223, 249, 110, },
+ { 2, 14, 24, 127, 91, 135, 219, 198,
+ 113, 91, 164, 125, 173, 211, 250, 116, },
+ { 3, 19, 24, 120, 102, 130, 209, 81,
+ 187, 95, 143, 102, 50, 190, 244, 56, },
+ { 4, 27, 10, 128, 91, 157, 181, 33,
+ 181, 150, 141, 141, 166, 114, 215, 25, },
+ { 2, 34, 27, 187, 102, 77, 210, 245,
+ 113, 107, 136, 184, 188, 121, 210, 234, },
+ { 1, 15, 22, 141, 59, 94, 208, 133,
+ 154, 95, 152, 112, 105, 191, 242, 111, },
+#endif
+ },
}, {
{
{ 8, 11, 24, 112, 87, 137, 127, 134,
@@ -967,7 +1094,7 @@
#endif
#if CONFIG_EXT_INTRA
-static const vpx_prob default_ext_intra_probs[2] = {200, 200};
+static const vpx_prob default_ext_intra_probs[2] = {230, 230};
#endif // CONFIG_EXT_INTRA
static void init_mode_probs(FRAME_CONTEXT *fc) {
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index 00eacc5..2b5c948 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -32,11 +32,6 @@
#define PALETTE_BLOCK_SIZES (BLOCK_64X64 - BLOCK_8X8 + 1)
#define PALETTE_Y_MODE_CONTEXTS 3
-#if CONFIG_EXT_INTRA
-// Probability that an ext_intra mode is a directional prediction mode
-#define DR_EXT_INTRA_PROB 144
-#endif // CONFIG_EXT_INTRA
-
struct VP10Common;
struct tx_probs {
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 56e9c90..3f9395e 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -168,7 +168,6 @@
FILTER_D207_PRED,
FILTER_D63_PRED,
FILTER_TM_PRED,
- EXT_DR_PRED,
EXT_INTRA_MODES,
} EXT_INTRA_MODE;
diff --git a/vp10/common/reconintra.c b/vp10/common/reconintra.c
index 28aa915..87dc13a 100644
--- a/vp10/common/reconintra.c
+++ b/vp10/common/reconintra.c
@@ -264,7 +264,7 @@
}
#if CONFIG_MISC_FIXES
-static inline void memset16(uint16_t *dst, int val, int n) {
+static INLINE void memset16(uint16_t *dst, int val, int n) {
while (n--)
*dst++ = val;
}
@@ -405,32 +405,11 @@
}
}
-static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- int r;
- (void) left;
-
- for (r = 0; r < bs; r++) {
- memcpy(dst, above, bs);
- dst += stride;
- }
-}
-
-static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
- const uint8_t *above, const uint8_t *left) {
- int r;
- (void) above;
-
- for (r = 0; r < bs; r++) {
- memset(dst, left[r], bs);
- dst += stride;
- }
-}
-
-static void dr_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
const uint8_t *above, const uint8_t *left, int angle) {
double t = 0;
int dx, dy;
+ int bs = 4 << tx_size;
if (angle != 90 && angle != 180)
t = tan(angle * PI / 180.0);
@@ -448,9 +427,9 @@
dy = -((int)(256 * t));
dr_prediction_z3(dst, stride, bs, above, left, dx, dy);
} else if (angle == 90) {
- v_predictor(dst, stride, bs, above, left);
+ pred[V_PRED][tx_size](dst, stride, above, left);
} else if (angle == 180) {
- h_predictor(dst, stride, bs, above, left);
+ pred[H_PRED][tx_size](dst, stride, above, left);
}
}
@@ -915,11 +894,7 @@
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
-#if CONFIG_MISC_FIXES
- DECLARE_ALIGNED(16, uint16_t, left_col[32]);
-#else
DECLARE_ALIGNED(16, uint16_t, left_col[64]);
-#endif
DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
@@ -946,37 +921,38 @@
&xd->mi[0]->mbmi.ext_intra_mode_info;
const EXT_INTRA_MODE ext_intra_mode =
ext_intra_mode_info->ext_intra_mode[plane != 0];
- const int angle =
- prediction_angle_map(ext_intra_mode_info->ext_intra_angle[plane != 0]);
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+#if CONFIG_MISC_FIXES
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+#else
+ if (p_angle < 90)
+ need_above = 0, need_aboveright = 1, need_left = 0;
+ else if (p_angle == 90)
+ need_above = 1, need_aboveright = 0, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_aboveright = 0, need_left = 1;
+ else
+ need_above = 0, need_aboveright = 0, need_left = 1;
+#endif // CONFIG_MISC_FIXES
+ }
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
EXT_INTRA_MODE ext_intra_mode =
ext_intra_mode_info->ext_intra_mode[plane != 0];
- if (ext_intra_mode <= FILTER_TM_PRED) {
- need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
- need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
- need_aboveright =
- ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
- } else {
- assert(angle > 0 && angle < 270);
-#if CONFIG_MISC_FIXES
- if (angle <= 90)
- need_above = 1, need_left = 0;
- else if (angle < 180)
- need_above = 1, need_left = 1;
- else
- need_above = 0, need_left = 1;
-#else
- if (angle < 90)
- need_above = 0, need_aboveright = 1, need_left = 0;
- else if (angle == 90)
- need_above = 1, need_aboveright = 0, need_left = 0;
- else if (angle < 180)
- need_above = 1, need_aboveright = 0, need_left = 1;
- else
- need_above = 0, need_aboveright = 0, need_left = 1;
-#endif // CONFIG_MISC_FIXES
- }
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ need_aboveright =
+ ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
}
#endif // CONFIG_EXT_INTRA
@@ -993,10 +969,10 @@
#if CONFIG_EXT_INTRA
int need_bottom;
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
need_bottom = 0;
- else
- need_bottom = angle > 180;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
} else {
need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
}
@@ -1024,10 +1000,10 @@
#if CONFIG_EXT_INTRA
int need_right;
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
- need_right = 1;
- else
- need_right = angle < 90;
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
} else {
need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
}
@@ -1052,7 +1028,9 @@
(void)need_aboveright;
#if CONFIG_EXT_INTRA
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
- (extend_modes[mode] & NEED_ABOVELEFT)) {
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ?
(n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
}
@@ -1185,13 +1163,15 @@
#if CONFIG_EXT_INTRA
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
- highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
- const_above_row, left_col,
- bd);
- else
- highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col,
- angle, bd);
+ highbd_filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col, bd);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col,
+ p_angle, bd);
return;
}
#endif // CONFIG_EXT_INTRA
@@ -1247,37 +1227,39 @@
&xd->mi[0]->mbmi.ext_intra_mode_info;
const EXT_INTRA_MODE ext_intra_mode =
ext_intra_mode_info->ext_intra_mode[plane != 0];
- const int angle =
- prediction_angle_map(ext_intra_mode_info->ext_intra_angle[plane != 0]);
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+
+#if CONFIG_MISC_FIXES
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+#else
+ if (p_angle < 90)
+ need_above = 0, need_aboveright = 1, need_left = 0;
+ else if (p_angle == 90)
+ need_above = 1, need_aboveright = 0, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_aboveright = 0, need_left = 1;
+ else
+ need_above = 0, need_aboveright = 0, need_left = 1;
+#endif // CONFIG_MISC_FIXES
+ }
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
EXT_INTRA_MODE ext_intra_mode =
ext_intra_mode_info->ext_intra_mode[plane != 0];
- if (ext_intra_mode <= FILTER_TM_PRED) {
- need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
- need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
- need_aboveright =
- ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
- } else {
- assert(angle > 0 && angle < 270);
-#if CONFIG_MISC_FIXES
- if (angle <= 90)
- need_above = 1, need_left = 0;
- else if (angle < 180)
- need_above = 1, need_left = 1;
- else
- need_above = 0, need_left = 1;
-#else
- if (angle < 90)
- need_above = 0, need_aboveright = 1, need_left = 0;
- else if (angle == 90)
- need_above = 1, need_aboveright = 0, need_left = 0;
- else if (angle < 180)
- need_above = 1, need_aboveright = 0, need_left = 1;
- else
- need_above = 0, need_aboveright = 0, need_left = 1;
-#endif // CONFIG_MISC_FIXES
- }
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ need_aboveright =
+ ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVERIGHT;
}
#endif // CONFIG_EXT_INTRA
@@ -1318,10 +1300,10 @@
#if CONFIG_EXT_INTRA
int need_bottom;
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
- need_bottom = 0;
- else
- need_bottom = angle > 180;
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
} else {
need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
}
@@ -1373,10 +1355,10 @@
#if CONFIG_EXT_INTRA
int need_right;
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
- need_right = 1;
- else
- need_right = angle < 90;
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
} else {
need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
}
@@ -1428,7 +1410,9 @@
(void)need_aboveright;
#if CONFIG_EXT_INTRA
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
- (extend_modes[mode] & NEED_ABOVELEFT)) {
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
}
#else
@@ -1486,11 +1470,14 @@
#if CONFIG_EXT_INTRA
if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
- if (ext_intra_mode <= FILTER_TM_PRED)
- filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
- const_above_row, left_col);
- else
- dr_predictor(dst, dst_stride, bs, const_above_row, left_col, angle);
+ filter_intra_predictors[ext_intra_mode](dst, dst_stride, bs,
+ const_above_row, left_col);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle);
return;
}
#endif // CONFIG_EXT_INTRA
@@ -1510,10 +1497,10 @@
}
void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
- TX_SIZE tx_size, PREDICTION_MODE mode,
- const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride,
- int aoff, int loff, int plane) {
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride,
+ uint8_t *dst, int dst_stride,
+ int aoff, int loff, int plane) {
const int txw = (1 << tx_size);
const int have_top = loff || xd->up_available;
const int have_left = aoff || xd->left_available;
diff --git a/vp10/common/vp10_inv_txfm2d_cfg.h b/vp10/common/vp10_inv_txfm2d_cfg.h
new file mode 100644
index 0000000..8cd76b5
--- /dev/null
+++ b/vp10/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_CFG_H_
+#define VP10_INV_TXFM2D_CFG_H_
+#include "vp10/common/vp10_inv_txfm1d.h"
+
+// ---------------- config inv_dct_dct_4 ----------------
+static const int8_t inv_shift_dct_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_dct_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 4,
+
+ .shift = inv_shift_dct_dct_4,
+ .stage_range_col = inv_stage_range_col_dct_dct_4,
+ .stage_range_row = inv_stage_range_row_dct_dct_4,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_4,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_4,
+ .txfm_func_col = vp10_idct4_new,
+ .txfm_func_row = vp10_idct4_new};
+
+// ---------------- config inv_dct_dct_8 ----------------
+static const int8_t inv_shift_dct_dct_8[2] = {0, -5};
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {17, 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {17, 17, 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_dct_dct_8,
+ .stage_range_col = inv_stage_range_col_dct_dct_8,
+ .stage_range_row = inv_stage_range_row_dct_dct_8,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_8,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_8,
+ .txfm_func_col = vp10_idct8_new,
+ .txfm_func_row = vp10_idct8_new};
+
+// ---------------- config inv_dct_dct_16 ----------------
+static const int8_t inv_shift_dct_dct_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = {14, 14, 14, 14,
+ 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_dct_dct_16,
+ .stage_range_col = inv_stage_range_col_dct_dct_16,
+ .stage_range_row = inv_stage_range_row_dct_dct_16,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_16,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_16,
+ .txfm_func_col = vp10_idct16_new,
+ .txfm_func_row = vp10_idct16_new};
+
+// ---------------- config inv_dct_dct_32 ----------------
+static const int8_t inv_shift_dct_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_dct_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_dct_dct_32,
+ .stage_range_col = inv_stage_range_col_dct_dct_32,
+ .stage_range_row = inv_stage_range_row_dct_dct_32,
+ .cos_bit_col = inv_cos_bit_col_dct_dct_32,
+ .cos_bit_row = inv_cos_bit_row_dct_dct_32,
+ .txfm_func_col = vp10_idct32_new,
+ .txfm_func_row = vp10_idct32_new};
+
+// ---------------- config inv_dct_adst_4 ----------------
+static const int8_t inv_shift_dct_adst_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_dct_adst_4[4] = {17, 17, 16, 16};
+static const int8_t inv_stage_range_row_dct_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_dct_adst_4[4] = {15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 4,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_dct_adst_4,
+ .stage_range_col = inv_stage_range_col_dct_adst_4,
+ .stage_range_row = inv_stage_range_row_dct_adst_4,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_4,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_4,
+ .txfm_func_col = vp10_idct4_new,
+ .txfm_func_row = vp10_iadst4_new};
+
+// ---------------- config inv_dct_adst_8 ----------------
+static const int8_t inv_shift_dct_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_dct_adst_8[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_dct_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_dct_adst_8[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_dct_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 6,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_dct_adst_8,
+ .stage_range_col = inv_stage_range_col_dct_adst_8,
+ .stage_range_row = inv_stage_range_row_dct_adst_8,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_8,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_8,
+ .txfm_func_col = vp10_idct8_new,
+ .txfm_func_row = vp10_iadst8_new};
+
+// ---------------- config inv_dct_adst_16 ----------------
+static const int8_t inv_shift_dct_adst_16[2] = {1, -7};
+static const int8_t inv_stage_range_col_dct_adst_16[8] = {19, 19, 19, 19,
+ 19, 19, 18, 18};
+static const int8_t inv_stage_range_row_dct_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_dct_adst_16[8] = {13, 13, 13, 13,
+ 13, 13, 13, 14};
+static const int8_t inv_cos_bit_row_dct_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 8,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_dct_adst_16,
+ .stage_range_col = inv_stage_range_col_dct_adst_16,
+ .stage_range_row = inv_stage_range_row_dct_adst_16,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_16,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_16,
+ .txfm_func_col = vp10_idct16_new,
+ .txfm_func_row = vp10_iadst16_new};
+
+// ---------------- config inv_dct_adst_32 ----------------
+static const int8_t inv_shift_dct_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_dct_adst_32[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_dct_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_dct_adst_32[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_dct_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 10,
+ .stage_num_row = 12,
+
+ .shift = inv_shift_dct_adst_32,
+ .stage_range_col = inv_stage_range_col_dct_adst_32,
+ .stage_range_row = inv_stage_range_row_dct_adst_32,
+ .cos_bit_col = inv_cos_bit_col_dct_adst_32,
+ .cos_bit_row = inv_cos_bit_row_dct_adst_32,
+ .txfm_func_col = vp10_idct32_new,
+ .txfm_func_row = vp10_iadst32_new};
+
+// ---------------- config inv_adst_adst_4 ----------------
+static const int8_t inv_shift_adst_adst_4[2] = {0, -4};
+static const int8_t inv_stage_range_col_adst_adst_4[6] = {16, 16, 16,
+ 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_4[6] = {16, 16, 16,
+ 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_4[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_adst_adst_4,
+ .stage_range_col = inv_stage_range_col_adst_adst_4,
+ .stage_range_row = inv_stage_range_row_adst_adst_4,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_4,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_4,
+ .txfm_func_col = vp10_iadst4_new,
+ .txfm_func_row = vp10_iadst4_new};
+
+// ---------------- config inv_adst_adst_8 ----------------
+static const int8_t inv_shift_adst_adst_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_adst_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_adst_8[8] = {17, 17, 17, 17,
+ 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_adst_8[8] = {15, 15, 15, 15,
+ 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_adst_adst_8,
+ .stage_range_col = inv_stage_range_col_adst_adst_8,
+ .stage_range_row = inv_stage_range_row_adst_adst_8,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_8,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_8,
+ .txfm_func_col = vp10_iadst8_new,
+ .txfm_func_row = vp10_iadst8_new};
+
+// ---------------- config inv_adst_adst_16 ----------------
+static const int8_t inv_shift_adst_adst_16[2] = {0, -6};
+static const int8_t inv_stage_range_col_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_16[10] = {18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_16[10] = {14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_adst_adst_16,
+ .stage_range_col = inv_stage_range_col_adst_adst_16,
+ .stage_range_row = inv_stage_range_row_adst_adst_16,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_16,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_16,
+ .txfm_func_col = vp10_iadst16_new,
+ .txfm_func_row = vp10_iadst16_new};
+
+// ---------------- config inv_adst_adst_32 ----------------
+static const int8_t inv_shift_adst_adst_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_adst_32[12] = {
+ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_adst_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_adst_32[12] = {13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 12,
+
+ .shift = inv_shift_adst_adst_32,
+ .stage_range_col = inv_stage_range_col_adst_adst_32,
+ .stage_range_row = inv_stage_range_row_adst_adst_32,
+ .cos_bit_col = inv_cos_bit_col_adst_adst_32,
+ .cos_bit_row = inv_cos_bit_row_adst_adst_32,
+ .txfm_func_col = vp10_iadst32_new,
+ .txfm_func_row = vp10_iadst32_new};
+
+// ---------------- config inv_adst_dct_4 ----------------
+static const int8_t inv_shift_adst_dct_4[2] = {1, -5};
+static const int8_t inv_stage_range_col_adst_dct_4[6] = {17, 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_4[4] = {16, 16, 16, 16};
+static const int8_t inv_cos_bit_col_adst_dct_4[6] = {15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_4[4] = {15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
+ .txfm_size = 4,
+ .stage_num_col = 6,
+ .stage_num_row = 4,
+
+ .shift = inv_shift_adst_dct_4,
+ .stage_range_col = inv_stage_range_col_adst_dct_4,
+ .stage_range_row = inv_stage_range_row_adst_dct_4,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_4,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_4,
+ .txfm_func_col = vp10_iadst4_new,
+ .txfm_func_row = vp10_idct4_new};
+
+// ---------------- config inv_adst_dct_8 ----------------
+static const int8_t inv_shift_adst_dct_8[2] = {-1, -4};
+static const int8_t inv_stage_range_col_adst_dct_8[8] = {16, 16, 16, 16,
+ 16, 16, 15, 15};
+static const int8_t inv_stage_range_row_adst_dct_8[6] = {17, 17, 17, 17, 17, 17};
+static const int8_t inv_cos_bit_col_adst_dct_8[8] = {15, 15, 15, 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_8[6] = {15, 15, 15, 15, 15, 15};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
+ .txfm_size = 8,
+ .stage_num_col = 8,
+ .stage_num_row = 6,
+
+ .shift = inv_shift_adst_dct_8,
+ .stage_range_col = inv_stage_range_col_adst_dct_8,
+ .stage_range_row = inv_stage_range_row_adst_dct_8,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_8,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_8,
+ .txfm_func_col = vp10_iadst8_new,
+ .txfm_func_row = vp10_idct8_new};
+
+// ---------------- config inv_adst_dct_16 ----------------
+static const int8_t inv_shift_adst_dct_16[2] = {-1, -5};
+static const int8_t inv_stage_range_col_adst_dct_16[10] = {17, 17, 17, 17, 17,
+ 17, 17, 17, 16, 16};
+static const int8_t inv_stage_range_row_adst_dct_16[8] = {18, 18, 18, 18,
+ 18, 18, 18, 18};
+static const int8_t inv_cos_bit_col_adst_dct_16[10] = {15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15};
+static const int8_t inv_cos_bit_row_adst_dct_16[8] = {14, 14, 14, 14, 14, 14, 14, 14};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
+ .txfm_size = 16,
+ .stage_num_col = 10,
+ .stage_num_row = 8,
+
+ .shift = inv_shift_adst_dct_16,
+ .stage_range_col = inv_stage_range_col_adst_dct_16,
+ .stage_range_row = inv_stage_range_row_adst_dct_16,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_16,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_16,
+ .txfm_func_col = vp10_iadst16_new,
+ .txfm_func_row = vp10_idct16_new};
+
+// ---------------- config inv_adst_dct_32 ----------------
+static const int8_t inv_shift_adst_dct_32[2] = {-1, -6};
+static const int8_t inv_stage_range_col_adst_dct_32[12] = {18, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 17, 17};
+static const int8_t inv_stage_range_row_adst_dct_32[10] = {19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19};
+static const int8_t inv_cos_bit_col_adst_dct_32[12] = {14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 15};
+static const int8_t inv_cos_bit_row_adst_dct_32[10] = {13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
+ .txfm_size = 32,
+ .stage_num_col = 12,
+ .stage_num_row = 10,
+
+ .shift = inv_shift_adst_dct_32,
+ .stage_range_col = inv_stage_range_col_adst_dct_32,
+ .stage_range_row = inv_stage_range_row_adst_dct_32,
+ .cos_bit_col = inv_cos_bit_col_adst_dct_32,
+ .cos_bit_row = inv_cos_bit_row_adst_dct_32,
+ .txfm_func_col = vp10_iadst32_new,
+ .txfm_func_row = vp10_idct32_new};
+
+#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index cc0f3f0..a8868d4 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -346,18 +346,16 @@
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
FRAME_COUNTS *counts = xd->counts;
+
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
if (mbmi->mode == DC_PRED) {
mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
vpx_read(r, cm->fc->ext_intra_probs[0]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
- if (DR_ONLY ? 1 : vpx_read(r, DR_EXT_INTRA_PROB)) {
- mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
- mbmi->ext_intra_mode_info.ext_intra_angle[0] =
- read_uniform(r, EXT_INTRA_ANGLES);
- } else {
- mbmi->ext_intra_mode_info.ext_intra_mode[0] =
- read_uniform(r, FILTER_INTRA_MODES);
- }
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ read_uniform(r, FILTER_INTRA_MODES);
}
if (counts)
++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
@@ -366,14 +364,8 @@
mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
vpx_read(r, cm->fc->ext_intra_probs[1]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
- if (DR_ONLY ? 1 : vpx_read(r, DR_EXT_INTRA_PROB)) {
- mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
- mbmi->ext_intra_mode_info.ext_intra_angle[1] =
- read_uniform(r, EXT_INTRA_ANGLES);
- } else {
- mbmi->ext_intra_mode_info.ext_intra_mode[1] =
- read_uniform(r, FILTER_INTRA_MODES);
- }
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ read_uniform(r, FILTER_INTRA_MODES);
}
if (counts)
++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
@@ -426,9 +418,20 @@
default:
mbmi->mode = read_intra_mode(r,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif
mbmi->palette_mode_info.palette_size[0] = 0;
mbmi->palette_mode_info.palette_size[1] = 0;
@@ -591,8 +594,6 @@
cm->fc->switchable_interp_prob[ctx]);
if (counts)
++counts->switchable_interp[ctx][type];
- // printf("%d/%d -> %d, %d\n", cm->current_video_frame, cm->show_frame,
- // xd->mi[0]->mbmi.sb_type, xd->mi[0]->mbmi.interp_filter);
return type;
}
@@ -626,9 +627,22 @@
break;
default:
mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[0] = 0;
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
}
mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+#endif // CONFIG_EXT_INTRA
+
mbmi->palette_mode_info.palette_size[0] = 0;
mbmi->palette_mode_info.palette_size[1] = 0;
#if CONFIG_EXT_INTRA
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index beb3414..a7b1f24 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -520,19 +520,15 @@
static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
const MB_MODE_INFO *const mbmi,
vpx_writer *w) {
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
if (mbmi->mode == DC_PRED) {
vpx_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0],
cm->fc->ext_intra_probs[0]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0];
- int dr_mode = mode > FILTER_TM_PRED;
- if (!DR_ONLY)
- vpx_write(w, dr_mode, DR_EXT_INTRA_PROB);
- if (dr_mode)
- write_uniform(w, EXT_INTRA_ANGLES,
- mbmi->ext_intra_mode_info.ext_intra_angle[0]);
- else
- write_uniform(w, FILTER_INTRA_MODES, mode);
+ write_uniform(w, FILTER_INTRA_MODES, mode);
}
}
if (mbmi->uv_mode == DC_PRED) {
@@ -540,14 +536,7 @@
cm->fc->ext_intra_probs[1]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1];
- int dr_mode = mode > FILTER_TM_PRED;
- if (!DR_ONLY)
- vpx_write(w, dr_mode, DR_EXT_INTRA_PROB);
- if (dr_mode)
- write_uniform(w, EXT_INTRA_ANGLES,
- mbmi->ext_intra_mode_info.ext_intra_angle[1]);
- else
- write_uniform(w, FILTER_INTRA_MODES, mode);
+ write_uniform(w, FILTER_INTRA_MODES, mode);
}
}
}
@@ -644,6 +633,12 @@
if (!is_inter) {
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
+#if CONFIG_EXT_INTRA
+ if (mode != DC_PRED && mode != TM_PRED) {
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ }
+#endif // CONFIG_EXT_INTRA
} else {
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -657,6 +652,11 @@
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+
if (bsize >= BLOCK_8X8)
write_ext_intra_mode_info(cm, mbmi, w);
#endif // CONFIG_EXT_INTRA
@@ -782,6 +782,11 @@
if (bsize >= BLOCK_8X8) {
write_intra_mode(w, mbmi->mode,
get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+#endif // CONFIG_EXT_INTRA
} else {
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -797,6 +802,12 @@
}
write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
+#if CONFIG_EXT_INTRA
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED &&
+ bsize >= BLOCK_8X8)
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+#endif // CONFIG_EXT_INTRA
if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools &&
mbmi->mode == DC_PRED)
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index adcd547..5c447b2 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -912,9 +912,9 @@
if (cpi->sf.tx_size_search_breakout &&
(rd == INT64_MAX ||
#if CONFIG_EXT_TX
- (s == 1 && tx_type != DCT_DCT) ||
+ (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
#else
- (s == 1) ||
+ (s == 1 && n < start_tx) ||
#endif
(n < (int) max_tx_size && rd > last_rd)))
break;
@@ -1507,10 +1507,7 @@
MB_MODE_INFO *mbmi = &mic->mbmi;
int this_rate, this_rate_tokenonly, s;
int ext_intra_selected_flag = 0;
- int i, step, delta, angle, best_angle, best_angle_dir;
- int deltas[3] = {25, 5, 1};
- int branches[3] = {2, 2, 2};
- int64_t this_distortion, this_rd, best_angle_rd = INT64_MAX;
+ int64_t this_distortion, this_rd;
EXT_INTRA_MODE mode;
TX_SIZE best_tx_size = TX_4X4;
EXT_INTRA_MODE_INFO ext_intra_mode_info;
@@ -1522,123 +1519,30 @@
mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
mbmi->mode = DC_PRED;
- if (!DR_ONLY) {
- for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
- mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, *best_rd);
- if (this_rate_tokenonly == INT_MAX)
- continue;
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
- vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
- write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+ write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- best_tx_size = mic->mbmi.tx_size;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ best_tx_size = mic->mbmi.tx_size;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
#if CONFIG_EXT_TX
- best_tx_type = mic->mbmi.tx_type;
+ best_tx_type = mic->mbmi.tx_type;
#endif // CONFIG_EXT_TX
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_selected_flag = 1;
- }
- }
- }
-
- mbmi->ext_intra_mode_info.ext_intra_mode[0] = EXT_DR_PRED;
- if (ANGLE_FAST_SEARCH) {
- best_angle = EXT_INTRA_ANGLES / 2;
- for (step = 0; step < 3; ++step) {
- delta = deltas[step];
- for (i = -branches[step]; i <= branches[step]; ++i) {
- int64_t rd_thresh;
- if (i == 0 && step != 0)
- continue;
- angle = best_angle + i * delta;
- if (angle < 0)
- angle = 0;
- if (angle >= EXT_INTRA_ANGLES)
- angle = EXT_INTRA_ANGLES - 1;
- if (angle == best_angle && step != 0)
- continue;
- mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
- if (*best_rd == INT64_MAX)
- rd_thresh = best_angle_rd;
- else
- rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, rd_thresh);
- if (this_rate_tokenonly == INT_MAX)
- continue;
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
- (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
- write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- best_tx_size = mic->mbmi.tx_size;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
-#if CONFIG_EXT_TX
- best_tx_type = mic->mbmi.tx_type;
-#endif // CONFIG_EXT_TX
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_selected_flag = 1;
- }
- if (this_rd < best_angle_rd) {
- best_angle_rd = this_rd;
- best_angle_dir = i;
- }
- }
-
- best_angle += best_angle_dir * delta;
- if (best_angle < 0)
- best_angle = 0;
- if (best_angle >= EXT_INTRA_ANGLES)
- best_angle = EXT_INTRA_ANGLES - 1;
- if (*best_rd < best_angle_rd / RD_ADJUSTER)
- break;
- }
- } else {
- for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
- mbmi->ext_intra_mode_info.ext_intra_angle[0] = angle;
- if (prediction_angle_map(angle) == 90 ||
- prediction_angle_map(angle) == 180)
- continue;
- super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, *best_rd);
- if (this_rate_tokenonly == INT_MAX)
- continue;
-
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
- (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
- write_uniform_cost(EXT_INTRA_ANGLES, angle) + mode_cost;
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
-
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- best_tx_size = mic->mbmi.tx_size;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
-#if CONFIG_EXT_TX
- best_tx_type = mic->mbmi.tx_type;
-#endif // CONFIG_EXT_TX
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_selected_flag = 1;
- }
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_selected_flag = 1;
}
}
@@ -1649,8 +1553,6 @@
ext_intra_mode_info.use_ext_intra_mode[0];
mbmi->ext_intra_mode_info.ext_intra_mode[0] =
ext_intra_mode_info.ext_intra_mode[0];
- mbmi->ext_intra_mode_info.ext_intra_angle[0] =
- ext_intra_mode_info.ext_intra_angle[0];
#if CONFIG_EXT_TX
mbmi->tx_type = best_tx_type;
#endif // CONFIG_EXT_TX
@@ -1659,6 +1561,132 @@
return 0;
}
}
+
+static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+ int64_t this_distortion, this_rd, sse_dummy;
+ TX_SIZE best_tx_size = mic->mbmi.tx_size;
+#if CONFIG_EXT_TX
+ TX_TYPE best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mic->mbmi.angle_delta[0] = deltas_level1[i];
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize,
+ (i == 0 && best_rd < INT64_MAX) ? best_rd * rd_adjust :
+ best_rd);
+ if (this_rate_tokenonly == INT_MAX) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mic->mbmi.angle_delta[0] = angle_delta;
+
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ if (this_rate_tokenonly == INT_MAX)
+ continue;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+#if CONFIG_EXT_TX
+ best_tx_type = mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ mbmi->tx_size = best_tx_size;
+ mbmi->angle_delta[0] = best_angle_delta;
+#if CONFIG_EXT_TX
+ mbmi->tx_type = best_tx_type;
+#endif // CONFIG_EXT_TX
+
+ if (*rate_tokenonly < INT_MAX) {
+ txfm_rd_in_plane(x,
+#if CONFIG_VAR_TX
+ cpi,
+#endif
+ &this_rate_tokenonly, &this_distortion, &s,
+ &sse_dummy, INT64_MAX, 0, bsize, mbmi->tx_size,
+ cpi->sf.use_fast_coef_costing);
+ }
+
+ return best_rd;
+}
#endif // CONFIG_EXT_INTRA
// This function is used only for intra_only frames
@@ -1676,6 +1704,7 @@
TX_SIZE best_tx = TX_4X4;
#if CONFIG_EXT_INTRA
EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
#endif // CONFIG_EXT_INTRA
#if CONFIG_EXT_TX
TX_TYPE best_tx_type = DCT_DCT;
@@ -1696,6 +1725,7 @@
#if CONFIG_EXT_INTRA
ext_intra_mode_info.use_ext_intra_mode[0] = 0;
mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.angle_delta[0] = 0;
#endif // CONFIG_EXT_INTRA
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
palette_mode_info.palette_size[0] = 0;
@@ -1708,9 +1738,24 @@
/* Y Search for intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
mic->mbmi.mode = mode;
-
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ if (is_directional_mode) {
+ rate_overhead = bmode_costs[mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ this_rate_tokenonly = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rate_tokenonly,
+ &this_distortion, &s, bsize, rate_overhead,
+ best_rd);
+ } else {
+ mic->mbmi.angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
+ &s, NULL, bsize, best_rd);
+ }
+#endif // CONFIG_EXT_INTRA
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, NULL, bsize, best_rd);
+ &s, NULL, bsize, best_rd);
if (this_rate_tokenonly == INT_MAX)
continue;
@@ -1721,8 +1766,12 @@
vp10_cost_bit(vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8]
[palette_ctx], 0);
#if CONFIG_EXT_INTRA
- if (mode == DC_PRED)
+ if (mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+ if (is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mic->mbmi.angle_delta[0]);
#endif // CONFIG_EXT_INTRA
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@@ -1730,6 +1779,9 @@
mode_selected = mode;
best_rd = this_rd;
best_tx = mic->mbmi.tx_size;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mic->mbmi.angle_delta[0];
+#endif // CONFIG_EXT_INTRA
#if CONFIG_EXT_TX
best_tx_type = mic->mbmi.tx_type;
#endif // CONFIG_EXT_TX
@@ -1746,7 +1798,7 @@
&best_tx, &mode_selected, &best_rd);
#if CONFIG_EXT_INTRA
- if (!palette_mode_info.palette_size[0] > 0) {
+ if (!palette_mode_info.palette_size[0] > 0 && ALLOW_FILTER_INTRA_MODES) {
if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
skippable, bsize, bmode_costs[DC_PRED],
&best_rd)) {
@@ -1764,13 +1816,14 @@
if (ext_intra_mode_info.use_ext_intra_mode[0]) {
mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
ext_intra_mode_info.ext_intra_mode[0];
- mic->mbmi.ext_intra_mode_info.ext_intra_angle[0] =
- ext_intra_mode_info.ext_intra_angle[0];
}
#endif // CONFIG_EXT_INTRA
mic->mbmi.mode = mode_selected;
mic->mbmi.tx_size = best_tx;
+#if CONFIG_EXT_INTRA
+ mic->mbmi.angle_delta[0] = best_angle_delta;
+#endif // CONFIG_EXT_INTRA
#if CONFIG_EXT_TX
mic->mbmi.tx_type = best_tx_type;
#endif // CONFIG_EXT_TX
@@ -2468,127 +2521,38 @@
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
int ext_intra_selected_flag = 0;
int this_rate_tokenonly, this_rate, s;
- int64_t this_distortion, this_sse, this_rd, best_angle_rd = INT64_MAX;
+ int64_t this_distortion, this_sse, this_rd;
EXT_INTRA_MODE mode;
- int i, step, delta, angle, best_angle, best_angle_dir;
- int deltas[3] = {25, 5, 1};
- int branches[3] = {2, 2, 2};
EXT_INTRA_MODE_INFO ext_intra_mode_info;
vp10_zero(ext_intra_mode_info);
mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
mbmi->uv_mode = DC_PRED;
- if (!DR_ONLY) {
- for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
- mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
- if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
- &this_distortion, &s, &this_sse, bsize, *best_rd))
- continue;
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, *best_rd))
+ continue;
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
- vp10_cost_bit(DR_EXT_INTRA_PROB, 0) +
- cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
- write_uniform_cost(FILTER_INTRA_MODES, mode);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
- ext_intra_selected_flag = 1;
- if (!x->select_tx_size)
- swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
- }
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
+ write_uniform_cost(FILTER_INTRA_MODES, mode);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ ext_intra_selected_flag = 1;
+ if (!x->select_tx_size)
+ swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
}
}
- mbmi->ext_intra_mode_info.ext_intra_mode[1] = EXT_DR_PRED;
- if (ANGLE_FAST_SEARCH) {
- best_angle = EXT_INTRA_ANGLES / 2;
- for (step = 0; step < 3; ++step) {
- delta = deltas[step];
- for (i = -branches[step]; i <= branches[step]; ++i) {
- int64_t rd_thresh;
- if (i == 0 && step != 0)
- continue;
- angle = best_angle + i * delta;
- if (angle < 0)
- angle = 0;
- if (angle >= EXT_INTRA_ANGLES)
- angle = EXT_INTRA_ANGLES - 1;
- if (angle == best_angle && step != 0)
- continue;
- mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
- if (*best_rd == INT64_MAX)
- rd_thresh = best_angle_rd;
- else
- rd_thresh = VPXMIN(best_angle_rd, *best_rd * RD_ADJUSTER);
- if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion,
- &s, &this_sse, bsize, rd_thresh))
- continue;
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
- (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
- cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
- write_uniform_cost(EXT_INTRA_ANGLES, angle);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
- ext_intra_selected_flag = 1;
- if (!x->select_tx_size)
- swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
- }
- if (this_rd < best_angle_rd) {
- best_angle_rd = this_rd;
- best_angle_dir = i;
- }
- }
- best_angle += best_angle_dir * delta;
- if (best_angle < 0)
- best_angle = 0;
- if (best_angle >= EXT_INTRA_ANGLES)
- best_angle = EXT_INTRA_ANGLES - 1;
- if (*best_rd < best_angle_rd / RD_ADJUSTER)
- break;
- }
- } else {
- for (angle = 0; angle < EXT_INTRA_ANGLES; ++angle) {
- mbmi->ext_intra_mode_info.ext_intra_angle[1] = angle;
- if (prediction_angle_map(angle) == 90 ||
- prediction_angle_map(angle) == 180)
- continue;
- if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
- &this_distortion, &s, &this_sse, bsize, *best_rd))
- continue;
-
- this_rate = this_rate_tokenonly +
- vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
- (DR_ONLY ? 0: vp10_cost_bit(DR_EXT_INTRA_PROB, 1)) +
- cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
- write_uniform_cost(EXT_INTRA_ANGLES, angle);
- this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
- if (this_rd < *best_rd) {
- *best_rd = this_rd;
- *rate = this_rate;
- *rate_tokenonly = this_rate_tokenonly;
- *distortion = this_distortion;
- *skippable = s;
- ext_intra_mode_info = mbmi->ext_intra_mode_info;
- ext_intra_selected_flag = 1;
- if (!x->select_tx_size)
- swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
- }
- }
- }
if (ext_intra_selected_flag) {
mbmi->uv_mode = DC_PRED;
@@ -2596,14 +2560,105 @@
ext_intra_mode_info.use_ext_intra_mode[1];
mbmi->ext_intra_mode_info.ext_intra_mode[1] =
ext_intra_mode_info.ext_intra_mode[1];
- mbmi->ext_intra_mode_info.ext_intra_angle[1] =
- ext_intra_mode_info.ext_intra_angle[1];
-
return 1;
} else {
return 0;
}
}
+
+static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ PICK_MODE_CONTEXT *ctx,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+
+ (void)ctx;
+ *rate_tokenonly = INT_MAX;
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = {0, -2, 2};
+ int deltas_level2[3][2] = {
+ {-1, 1}, {-3, -1}, {1, 3},
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mbmi->angle_delta[1] = deltas_level1[i];
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize,
+ (i == 0 && best_rd < INT64_MAX) ?
+ best_rd * rd_adjust : best_rd)) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust)
+ break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mbmi->angle_delta[1] = deltas_level2[best_i][j];
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[1] = angle_delta;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ mbmi->angle_delta[1] = best_angle_delta;
+ if (*rate_tokenonly != INT_MAX)
+ super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, INT_MAX);
+ return *rate_tokenonly != INT_MAX;
+}
#endif // CONFIG_EXT_INTRA
static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
@@ -2619,6 +2674,7 @@
int this_rate_tokenonly, this_rate, s;
int64_t this_distortion, this_sse;
#if CONFIG_EXT_INTRA
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
EXT_INTRA_MODE_INFO ext_intra_mode_info;
ext_intra_mode_info.use_ext_intra_mode[1] = 0;
@@ -2631,20 +2687,44 @@
continue;
mbmi->uv_mode = mode;
-
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ mbmi->angle_delta[1] = 0;
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) {
+ if (!rd_pick_intra_angle_sbuv(cpi, x, ctx, &this_rate,
+ &this_rate_tokenonly, &this_distortion, &s,
+ bsize, rate_overhead, best_rd))
+ continue;
+ } else {
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
+ &this_distortion, &s, &this_sse, bsize, best_rd))
+ continue;
+ }
+ this_rate = this_rate_tokenonly +
+ cpi->intra_uv_mode_cost[mbmi->mode][mode];
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[1]);
+ if (mode == DC_PRED && 0)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#else
if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
&this_distortion, &s, &this_sse, bsize, best_rd))
continue;
this_rate = this_rate_tokenonly +
cpi->intra_uv_mode_cost[xd->mi[0]->mbmi.mode][mode];
-#if CONFIG_EXT_INTRA
- if (mode == DC_PRED)
- this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
#endif // CONFIG_EXT_INTRA
+
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
mode_selected = mode;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
best_rd = this_rd;
*rate = this_rate;
*rate_tokenonly = this_rate_tokenonly;
@@ -2656,7 +2736,7 @@
}
#if CONFIG_EXT_INTRA
- if (mbmi->sb_type >= BLOCK_8X8) {
+ if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) {
if (rd_pick_ext_intra_sbuv(cpi, x, ctx, rate, rate_tokenonly, distortion,
skippable, bsize, &best_rd)) {
mode_selected = mbmi->uv_mode;
@@ -2669,6 +2749,7 @@
if (ext_intra_mode_info.use_ext_intra_mode[1])
mbmi->ext_intra_mode_info.ext_intra_mode[1] =
ext_intra_mode_info.ext_intra_mode[1];
+ mbmi->angle_delta[1] = best_angle_delta;
#endif // CONFIG_EXT_INTRA
mbmi->uv_mode = mode_selected;
return best_rd;
@@ -4478,6 +4559,9 @@
PREDICTION_MODE mode_uv[TX_SIZES];
#if CONFIG_EXT_INTRA
EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES];
+ int is_directional_mode;
+ int rate_overhead, rate_dummy;
#endif // CONFIG_EXT_INTRA
const int intra_cost_penalty = vp10_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
@@ -4762,17 +4846,31 @@
TX_SIZE uv_tx;
struct macroblockd_plane *const pd = &xd->plane[1];
memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
- super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
- NULL, bsize, best_rd);
+
#if CONFIG_EXT_INTRA
+ is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
+ if (is_directional_mode) {
+ rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
+ cpi->mbmode_cost[mbmi->mode];
+ rate_y = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize, rate_overhead, best_rd);
+ } else {
+ mbmi->angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
+ NULL, bsize, best_rd);
+ }
+
// TODO(huisu): ext-intra is turned off in lossless mode for now to
// avoid a unit test failure
- if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id]) {
+ if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
+ ALLOW_FILTER_INTRA_MODES) {
MB_MODE_INFO mbmi_copy = *mbmi;
- int rate_dummy;
if (rate_y != INT_MAX) {
- int this_rate = rate_y + cpi->mbmode_cost[mbmi->mode] +
+ int this_rate = rate_y +
+ cpi->mbmode_cost[mbmi->mode] +
vp10_cost_bit(cm->fc->ext_intra_probs[0], 0);
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, distortion_y);
} else {
@@ -4784,7 +4882,11 @@
cpi->mbmode_cost[mbmi->mode], &this_rd))
*mbmi = mbmi_copy;
}
+#else
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
+ NULL, bsize, best_rd);
#endif // CONFIG_EXT_INTRA
+
if (rate_y == INT_MAX)
continue;
uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
@@ -4795,6 +4897,7 @@
&dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
#if CONFIG_EXT_INTRA
ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
#endif // CONFIG_EXT_INTRA
}
@@ -4803,32 +4906,29 @@
skippable = skippable && skip_uv[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
mbmi->ext_intra_mode_info.ext_intra_mode[1] =
ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
- mbmi->ext_intra_mode_info.ext_intra_angle[1] =
- ext_intra_mode_info_uv[uv_tx].ext_intra_angle[1];
}
#endif // CONFIG_EXT_INTRA
rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
#if CONFIG_EXT_INTRA
- if (mbmi->mode == DC_PRED) {
+ if (is_directional_mode)
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS +
+ mbmi->angle_delta[0]);
+
+ if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) {
rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
EXT_INTRA_MODE ext_intra_mode =
mbmi->ext_intra_mode_info.ext_intra_mode[0];
- int angle = mbmi->ext_intra_mode_info.ext_intra_angle[0];
- if (!DR_ONLY)
- rate2 += vp10_cost_bit(DR_EXT_INTRA_PROB,
- ext_intra_mode > FILTER_TM_PRED);
- if (ext_intra_mode > FILTER_TM_PRED)
- rate2 += write_uniform_cost(EXT_INTRA_ANGLES, angle);
- else
- rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES, ext_intra_mode);
}
}
#endif // CONFIG_EXT_INTRA
diff --git a/vp10/vp10_common.mk b/vp10/vp10_common.mk
index 4b7a784..461815c 100644
--- a/vp10/vp10_common.mk
+++ b/vp10/vp10_common.mk
@@ -71,6 +71,7 @@
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.h
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 5bf71ef..f41ee09 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -312,7 +312,7 @@
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
-specialize qw/vp9_diamond_search_sad/;
+specialize qw/vp9_diamond_search_sad avx/;
add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_full_range_search/;
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 4e88819..f5da07e 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -243,7 +243,7 @@
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ if (mask & 1) {
decrease_ref_count(old_idx, frame_bufs, pool);
}
cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
@@ -350,7 +350,7 @@
decrease_ref_count(old_idx, frame_bufs, pool);
// Release the reference frame in reference map.
- if ((mask & 1) && old_idx >= 0) {
+ if (mask & 1) {
decrease_ref_count(old_idx, frame_bufs, pool);
}
++ref_index;
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 4a5188f..afa4009 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -128,7 +128,7 @@
static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
BufferPool *const pool) {
- if (idx >= 0) {
+ if (idx >= 0 && frame_bufs[idx].ref_count > 0) {
--frame_bufs[idx].ref_count;
// A worker may only get a free framebuffer index when calling get_free_fb.
// But the private buffer is not set up until finish decoding header.
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 0791677..2270a06 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -411,8 +411,11 @@
assert(cr->sb_index < sbs_in_frame);
i = cr->sb_index;
cr->target_num_seg_blocks = 0;
- if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
consec_zero_mv_thresh = 100;
+ if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
+ consec_zero_mv_thresh = 80;
+ }
qindex_thresh =
cpi->oxcf.content == VP9E_CONTENT_SCREEN
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 4615554..1a14ea9 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -123,72 +123,66 @@
static void pack_mb_tokens(vpx_writer *w,
TOKENEXTRA **tp, const TOKENEXTRA *const stop,
vpx_bit_depth_t bit_depth) {
- TOKENEXTRA *p = *tp;
-
- while (p < stop && p->token != EOSB_TOKEN) {
- const int t = p->token;
- const struct vp9_token *const a = &vp9_coef_encodings[t];
- int i = 0;
- int v = a->value;
- int n = a->len;
+ const TOKENEXTRA *p;
+ const vp9_extra_bit *const extra_bits =
#if CONFIG_VP9_HIGHBITDEPTH
- const vp9_extra_bit *b;
- if (bit_depth == VPX_BITS_12)
- b = &vp9_extra_bits_high12[t];
- else if (bit_depth == VPX_BITS_10)
- b = &vp9_extra_bits_high10[t];
- else
- b = &vp9_extra_bits[t];
+ (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 :
+ (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 :
+ vp9_extra_bits;
#else
- const vp9_extra_bit *const b = &vp9_extra_bits[t];
+ vp9_extra_bits;
(void) bit_depth;
#endif // CONFIG_VP9_HIGHBITDEPTH
- /* skip one or two nodes */
- if (p->skip_eob_node) {
- n -= p->skip_eob_node;
- i = 2 * p->skip_eob_node;
+ for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) {
+ if (p->token == EOB_TOKEN) {
+ vpx_write(w, 0, p->context_tree[0]);
+ continue;
}
-
- // TODO(jbb): expanding this can lead to big gains. It allows
- // much better branch prediction and would enable us to avoid numerous
- // lookups and compares.
-
- // If we have a token that's in the constrained set, the coefficient tree
- // is split into two treed writes. The first treed write takes care of the
- // unconstrained nodes. The second treed write takes care of the
- // constrained nodes.
- if (t >= TWO_TOKEN && t < EOB_TOKEN) {
- int len = UNCONSTRAINED_NODES - p->skip_eob_node;
- int bits = v >> (n - len);
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, bits, len, i);
- vp9_write_tree(w, vp9_coef_con_tree,
- vp9_pareto8_full[p->context_tree[PIVOT_NODE] - 1],
- v, n - len, 0);
- } else {
- vp9_write_tree(w, vp9_coef_tree, p->context_tree, v, n, i);
- }
-
- if (b->base_val) {
- const int e = p->extra, l = b->len;
-
- if (l) {
- const unsigned char *pb = b->prob;
- int v = e >> 1;
- int n = l; /* number of bits in v, assumed nonzero */
-
- do {
- const int bb = (v >> --n) & 1;
- vpx_write(w, bb, *pb++);
- } while (n);
+ vpx_write(w, 1, p->context_tree[0]);
+ while (p->token == ZERO_TOKEN) {
+ vpx_write(w, 0, p->context_tree[1]);
+ ++p;
+ if (p == stop || p->token == EOSB_TOKEN) {
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
+ return;
}
-
- vpx_write_bit(w, e & 1);
}
- ++p;
- }
- *tp = p + (p->token == EOSB_TOKEN);
+ {
+ const int t = p->token;
+ const vpx_prob *const context_tree = p->context_tree;
+ assert(t != ZERO_TOKEN);
+ assert(t != EOB_TOKEN);
+ assert(t != EOSB_TOKEN);
+ vpx_write(w, 1, context_tree[1]);
+ if (t == ONE_TOKEN) {
+ vpx_write(w, 0, context_tree[2]);
+ vpx_write_bit(w, p->extra & 1);
+ } else { // t >= TWO_TOKEN && t < EOB_TOKEN
+ const struct vp9_token *const a = &vp9_coef_encodings[t];
+ const int v = a->value;
+ const int n = a->len;
+ const int e = p->extra;
+ vpx_write(w, 1, context_tree[2]);
+ vp9_write_tree(w, vp9_coef_con_tree,
+ vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v,
+ n - UNCONSTRAINED_NODES, 0);
+ if (t >= CATEGORY1_TOKEN) {
+ const vp9_extra_bit *const b = &extra_bits[t];
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = b->len; // number of bits in v, assumed nonzero
+ do {
+ const int bb = (v >> --n) & 1;
+ vpx_write(w, bb, *pb++);
+ } while (n);
+ }
+ vpx_write_bit(w, e & 1);
+ }
+ }
+ }
+ *tp = (TOKENEXTRA*)(uintptr_t)p + (p->token == EOSB_TOKEN);
}
static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index c592832..c382b77 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -323,7 +323,7 @@
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
- if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenMedium) {
+ if (bs <= BLOCK_16X16 && denoiser->denoising_level >= kDenLow) {
// Take center pixel in block to determine is_skin.
const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
@@ -349,7 +349,7 @@
denoiser->increase_denoising = 0;
}
- if (denoiser->denoising_level >= kDenMedium)
+ if (denoiser->denoising_level >= kDenLow)
decision = perform_motion_compensation(denoiser, mb, bs,
denoiser->increase_denoising,
mi_row, mi_col, ctx,
@@ -524,6 +524,7 @@
#endif
denoiser->increase_denoising = 0;
denoiser->frame_buffer_initialized = 1;
+ denoiser->denoising_level = kDenLow;
return 0;
}
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index 8bed9e8..bc676e9 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -27,6 +27,7 @@
} VP9_DENOISER_DECISION;
typedef enum vp9_denoiser_level {
+ kDenLowLow,
kDenLow,
kDenMedium,
kDenHigh
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9d66839..0475883 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -492,14 +492,14 @@
// Increase base variance threshold if estimated noise level is high.
if (cpi->noise_estimate.enabled) {
if (cpi->noise_estimate.level == kHigh)
- threshold_base = threshold_base << 2;
+ threshold_base = 3 * threshold_base;
else
if (cpi->noise_estimate.level == kMedium)
threshold_base = threshold_base << 1;
}
- thresholds[1] = threshold_base;
if (cm->width <= 352 && cm->height <= 288) {
- thresholds[0] = threshold_base >> 2;
+ thresholds[0] = threshold_base >> 3;
+ thresholds[1] = threshold_base >> 1;
thresholds[2] = threshold_base << 3;
} else {
thresholds[0] = threshold_base;
@@ -526,7 +526,7 @@
cpi->vbp_bsize_min = BLOCK_8X8;
} else {
if (cm->width <= 352 && cm->height <= 288)
- cpi->vbp_threshold_sad = 100;
+ cpi->vbp_threshold_sad = 10;
else
cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
(cpi->y_dequant[q][1] << 1) : 1000;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index d86a7a7..eebd7c5 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1570,7 +1570,30 @@
#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
+/***********************************************************************
+ * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' *
+ ***********************************************************************
+ * The following 2 functions ('cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts') are used to calculate cost lookup tables *
+ * used by 'vp9_diamond_search_sad'. The C implementation of the *
+ * function is generic, but the AVX intrinsics optimised version *
+ * relies on the following properties of the computed tables: *
+ * For cal_nmvjointsadcost: *
+ * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
+ * For cal_nmvsadcosts: *
+ * - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
+ * (Equal costs for both components) *
+ * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
+ * (Cost function is even) *
+ * If these do not hold, then the AVX optimised version of the *
+ * 'vp9_diamond_search_sad' function cannot be used as it is, in which *
+ * case you can revert to using the C function instead. *
+ ***********************************************************************/
+
static void cal_nmvjointsadcost(int *mvjointsadcost) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
mvjointsadcost[0] = 600;
mvjointsadcost[1] = 300;
mvjointsadcost[2] = 300;
@@ -1578,6 +1601,9 @@
}
static void cal_nmvsadcosts(int *mvsadcost[2]) {
+ /*********************************************************************
+ * Warning: Read the comments above before modifying this function *
+ *********************************************************************/
int i = 1;
mvsadcost[0][0] = 0;
@@ -1739,6 +1765,10 @@
cpi->first_time_stamp_ever = INT64_MAX;
+ /*********************************************************************
+ * Warning: Read the comments around 'cal_nmvjointsadcost' and *
+ * 'cal_nmvsadcosts' before modifying how these tables are computed. *
+ *********************************************************************/
cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 9744e43..b9a104a 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -101,11 +101,8 @@
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss_mv[0].col = 0;
- cfg->ss_mv[0].row = 0;
- cfg->ss_os[0] = 0;
+ int len;
+ int ss_count = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
@@ -117,16 +114,13 @@
}
}
- cfg->ss_count = ss_count;
cfg->searches_per_step = 4;
+ cfg->total_steps = ss_count / cfg->searches_per_step;
}
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
- int len, ss_count = 1;
-
- cfg->ss_mv[0].col = 0;
- cfg->ss_mv[0].row = 0;
- cfg->ss_os[0] = 0;
+ int len;
+ int ss_count = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
@@ -141,8 +135,8 @@
}
}
- cfg->ss_count = ss_count;
cfg->searches_per_step = 8;
+ cfg->total_steps = ss_count / cfg->searches_per_step;
}
/*
@@ -1612,8 +1606,8 @@
const uint8_t *best_address;
unsigned int bestsad = INT_MAX;
- int best_site = 0;
- int last_site = 0;
+ int best_site = -1;
+ int last_site = -1;
int ref_row;
int ref_col;
@@ -1626,7 +1620,7 @@
// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
- const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+ const int tot_steps = cfg->total_steps - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1644,7 +1638,7 @@
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
- i = 1;
+ i = 0;
for (step = 0; step < tot_steps; step++) {
int all_in = 1, t;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index e48259f..1c101f2 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -33,10 +33,10 @@
typedef struct search_site_config {
// motion search sites
- MV ss_mv[8 * MAX_MVSEARCH_STEPS + 1]; // Motion vector
- intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1]; // Offset
- int ss_count;
+ MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector
+ intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset
int searches_per_step;
+ int total_steps;
} search_site_config;
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index 8ba7de7..b41ffd0 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -29,11 +29,14 @@
ne->value = 0;
ne->count = 0;
ne->thresh = 90;
+ ne->last_w = 0;
+ ne->last_h = 0;
if (width * height >= 1920 * 1080) {
ne->thresh = 200;
} else if (width * height >= 1280 * 720) {
ne->thresh = 130;
}
+ ne->num_frames_estimate = 20;
}
int enable_noise_estimation(VP9_COMP *const cpi) {
@@ -86,10 +89,9 @@
// Estimate of noise level every frame_period frames.
int frame_period = 10;
int thresh_consec_zeromv = 8;
- unsigned int thresh_sum_diff = 128;
+ unsigned int thresh_sum_diff = 100;
unsigned int thresh_sum_spatial = (200 * 200) << 8;
unsigned int thresh_spatial_var = (32 * 32) << 8;
- int num_frames_estimate = 20;
int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7;
// Estimate is between current source and last source.
YV12_BUFFER_CONFIG *last_source = cpi->Last_Source;
@@ -100,11 +102,17 @@
ne->enabled = enable_noise_estimation(cpi);
if (!ne->enabled ||
cm->current_video_frame % frame_period != 0 ||
- last_source == NULL) {
+ last_source == NULL ||
+ ne->last_w != cm->width ||
+ ne->last_h != cm->height) {
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0)
copy_frame(&cpi->denoiser.last_source, cpi->Source);
#endif
+ if (last_source != NULL) {
+ ne->last_w = cm->width;
+ ne->last_h = cm->height;
+ }
return;
} else {
int num_samples = 0;
@@ -127,6 +135,17 @@
const int uv_width_shift = y_width_shift >> 1;
const int uv_height_shift = y_height_shift >> 1;
int mi_row, mi_col;
+ int num_low_motion = 0;
+ int frame_low_motion = 1;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv)
+ num_low_motion++;
+ }
+ }
+ if (num_low_motion < ((3 * cm->mi_rows * cm->mi_cols) >> 3))
+ frame_low_motion = 0;
for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
// 16x16 blocks, 1/4 sample of frame.
@@ -146,7 +165,8 @@
const uint8_t vsource =
src_v[uv_height_shift * src_uvstride + uv_width_shift];
int is_skin = vp9_skin_pixel(ysource, usource, vsource);
- if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
+ if (frame_low_motion &&
+ cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
@@ -185,6 +205,8 @@
src_u += (src_uvstride << 2) - (cm->mi_cols << 2);
src_v += (src_uvstride << 2) - (cm->mi_cols << 2);
}
+ ne->last_w = cm->width;
+ ne->last_h = cm->height;
// Update noise estimate if we have at a minimum number of block samples,
// and avg_est > 0 (avg_est == 0 can happen if the application inputs
// duplicate frames).
@@ -192,18 +214,21 @@
// Normalize.
avg_est = avg_est / num_samples;
// Update noise estimate.
- ne->value = (int)((3 * ne->value + avg_est) >> 2);
+ ne->value = (int)((15 * ne->value + avg_est) >> 4);
ne->count++;
- if (ne->count == num_frames_estimate) {
+ if (ne->count == ne->num_frames_estimate) {
// Reset counter and check noise level condition.
+ ne->num_frames_estimate = 30;
ne->count = 0;
if (ne->value > (ne->thresh << 1))
ne->level = kHigh;
else
if (ne->value > ne->thresh)
ne->level = kMedium;
- else
+ else if (ne->value > (ne->thresh >> 1))
ne->level = kLow;
+ else
+ ne->level = kLowLow;
}
}
}
diff --git a/vp9/encoder/vp9_noise_estimate.h b/vp9/encoder/vp9_noise_estimate.h
index 2acc2ea..0d22ef0 100644
--- a/vp9/encoder/vp9_noise_estimate.h
+++ b/vp9/encoder/vp9_noise_estimate.h
@@ -24,6 +24,7 @@
#endif
typedef enum noise_level {
+ kLowLow,
kLow,
kMedium,
kHigh
@@ -35,6 +36,9 @@
int value;
int thresh;
int count;
+ int last_w;
+ int last_h;
+ int num_frames_estimate;
} NOISE_ESTIMATE;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 8c4782d..9db044f 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1483,18 +1483,30 @@
this_rdc.rate += ref_frame_cost[ref_frame];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
- // Bias against non-zero (above some threshold) motion for large blocks.
- // This is temporary fix to avoid selection of large mv for big blocks.
- if (cpi->oxcf.speed > 5 &&
- cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
- (frame_mv[this_mode][ref_frame].as_mv.row > 64 ||
- frame_mv[this_mode][ref_frame].as_mv.row < -64 ||
- frame_mv[this_mode][ref_frame].as_mv.col > 64 ||
- frame_mv[this_mode][ref_frame].as_mv.col < -64)) {
- if (bsize == BLOCK_64X64)
- this_rdc.rdcost = this_rdc.rdcost << 1;
- else if (bsize >= BLOCK_32X32)
- this_rdc.rdcost = 3 * this_rdc.rdcost >> 1;
+ if (cpi->oxcf.speed >= 5 &&
+ cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
+ // Bias against non-zero (above some threshold) motion for large blocks.
+ // This is temporary fix to avoid selection of large mv for big blocks.
+ if (frame_mv[this_mode][ref_frame].as_mv.row > 64 ||
+ frame_mv[this_mode][ref_frame].as_mv.row < -64 ||
+ frame_mv[this_mode][ref_frame].as_mv.col > 64 ||
+ frame_mv[this_mode][ref_frame].as_mv.col < -64) {
+ if (bsize == BLOCK_64X64)
+ this_rdc.rdcost = this_rdc.rdcost << 1;
+ else if (bsize >= BLOCK_32X32)
+ this_rdc.rdcost = 3 * this_rdc.rdcost >> 1;
+ }
+ // If noise estimation is enabled, and estimated level is above threshold,
+ // add a bias to LAST reference with small motion, for large blocks.
+ if (cpi->noise_estimate.enabled &&
+ cpi->noise_estimate.level >= kMedium &&
+ bsize >= BLOCK_32X32 &&
+ ref_frame == LAST_FRAME &&
+ frame_mv[this_mode][ref_frame].as_mv.row < 8 &&
+ frame_mv[this_mode][ref_frame].as_mv.row > -8 &&
+ frame_mv[this_mode][ref_frame].as_mv.col < 8 &&
+ frame_mv[this_mode][ref_frame].as_mv.col > -8)
+ this_rdc.rdcost = 7 * this_rdc.rdcost >> 3;
}
// Skipping checking: test to see if this block can be reconstructed by
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index c6fe76c..0377cb5 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1873,7 +1873,7 @@
// Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key.
- if (cpi->rc.frames_since_key > 1 * cpi->framerate) {
+ if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
const int window = (int)(4 * cpi->framerate);
cpi->resize_avg_qp += cm->base_qindex;
if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
new file mode 100644
index 0000000..2ed3f1a
--- /dev/null
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#if defined(_MSC_VER)
+# include <intrin.h>
+#endif
+#include <emmintrin.h>
+#include <smmintrin.h>
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __GNUC__
+# define __likely__(v) __builtin_expect(v, 1)
+# define __unlikely__(v) __builtin_expect(v, 0)
+#else
+# define __likely__(v) (v)
+# define __unlikely__(v) (v)
+#endif
+
+static INLINE int_mv pack_int_mv(int16_t row, int16_t col) {
+ int_mv result;
+ result.as_mv.row = row;
+ result.as_mv.col = col;
+ return result;
+}
+
+static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) {
+ // This is simplified from the C implementation to utilise that
+ // x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and
+ // x->nmvjointsadcost[1] == x->nmvjointsadcost[3]
+ return mv.as_int == 0 ? 0 : 1;
+}
+
+static INLINE int mv_cost(const int_mv mv,
+ const int *joint_cost, int *const comp_cost[2]) {
+ return joint_cost[get_mv_joint(mv)] +
+ comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col];
+}
+
+static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
+ int error_per_bit) {
+ const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row,
+ mv.as_mv.col - ref->col);
+ return ROUND_POWER_OF_TWO(mv_cost(diff, x->nmvjointsadcost,
+ x->nmvsadcost) * error_per_bit, 8);
+}
+
+/*****************************************************************************
+ * This function utilises 3 properties of the cost function lookup tables, *
+ * constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in *
+ * vp9_encoder.c. *
+ * For the joint cost: *
+ * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
+ * For the component costs: *
+ * - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
+ * (Equal costs for both components) *
+ * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
+ * (Cost function is even) *
+ * If these do not hold, then this function cannot be used without *
+ * modification, in which case you can revert to using the C implementation, *
+ * which does not rely on these properties. *
+ *****************************************************************************/
+int vp9_diamond_search_sad_avx(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv, int search_param,
+ int sad_per_bit, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
+ const int_mv maxmv = pack_int_mv(x->mv_row_max, x->mv_col_max);
+ const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int);
+ const int_mv minmv = pack_int_mv(x->mv_row_min, x->mv_col_min);
+ const __m128i v_min_mv_w = _mm_set1_epi32(minmv.as_int);
+
+ const __m128i v_spb_d = _mm_set1_epi32(sad_per_bit);
+
+ const __m128i v_joint_cost_0_d = _mm_set1_epi32(x->nmvjointsadcost[0]);
+ const __m128i v_joint_cost_1_d = _mm_set1_epi32(x->nmvjointsadcost[1]);
+
+ // search_param determines the length of the initial step and hence the number
+ // of iterations.
+ // 0 = initial step (MAX_FIRST_STEP) pel
+ // 1 = (MAX_FIRST_STEP/2) pel,
+ // 2 = (MAX_FIRST_STEP/4) pel...
+ const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param];
+ const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param];
+ const int tot_steps = cfg->total_steps - search_param;
+
+ const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3,
+ center_mv->col >> 3);
+ const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int);
+
+ const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row);
+ const int ref_col = clamp(ref_mv->col, minmv.as_mv.col, maxmv.as_mv.col);
+
+ int_mv bmv = pack_int_mv(ref_row, ref_col);
+ int_mv new_bmv = bmv;
+ __m128i v_bmv_w = _mm_set1_epi32(bmv.as_int);
+
+ const int what_stride = x->plane[0].src.stride;
+ const int in_what_stride = x->e_mbd.plane[0].pre[0].stride;
+ const uint8_t *const what = x->plane[0].src.buf;
+ const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf +
+ ref_row * in_what_stride + ref_col;
+
+ // Work out the start point for the search
+ const uint8_t *best_address = in_what;
+ const uint8_t *new_best_address = best_address;
+#if ARCH_X86_64
+ __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
+#else
+ __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
+#endif
+
+ unsigned int best_sad;
+
+ int i;
+ int j;
+ int step;
+
+ // Check the prerequisite cost function properties that are easy to check
+ // in an assert. See the function-level documentation for details on all
+ // prerequisites.
+ assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[2]);
+ assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[3]);
+
+ // Check the starting position
+ best_sad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
+ best_sad += mvsad_err_cost(x, bmv, &fcenter_mv.as_mv, sad_per_bit);
+
+ *num00 = 0;
+
+ for (i = 0, step = 0; step < tot_steps; step++) {
+ for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
+ __m128i v_sad_d;
+ __m128i v_cost_d;
+ __m128i v_outside_d;
+ __m128i v_inside_d;
+ __m128i v_diff_mv_w;
+#if ARCH_X86_64
+ __m128i v_blocka[2];
+#else
+ __m128i v_blocka[1];
+#endif
+
+ // Compute the candidate motion vectors
+ const __m128i v_ss_mv_w = _mm_loadu_si128((const __m128i*)&ss_mv[i]);
+ const __m128i v_these_mv_w = _mm_add_epi16(v_bmv_w, v_ss_mv_w);
+ // Clamp them to the search bounds
+ __m128i v_these_mv_clamp_w = v_these_mv_w;
+ v_these_mv_clamp_w = _mm_min_epi16(v_these_mv_clamp_w, v_max_mv_w);
+ v_these_mv_clamp_w = _mm_max_epi16(v_these_mv_clamp_w, v_min_mv_w);
+ // The ones that did not change are inside the search area
+ v_inside_d = _mm_cmpeq_epi32(v_these_mv_clamp_w, v_these_mv_w);
+
+ // If none of them are inside, then move on
+ if (__likely__(_mm_test_all_zeros(v_inside_d, v_inside_d))) {
+ continue;
+ }
+
+ // The inverse mask indicates which of the MVs are outside
+ v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8(0xff));
+ // Shift right to keep the sign bit clear, we will use this later
+ // to set the cost to the maximum value.
+ v_outside_d = _mm_srli_epi32(v_outside_d, 1);
+
+ // Compute the difference MV
+ v_diff_mv_w = _mm_sub_epi16(v_these_mv_clamp_w, vfcmv);
+ // We utilise the fact that the cost function is even, and use the
+ // absolute difference. This allows us to use unsigned indexes later
+ // and reduces cache pressure somewhat as only a half of the table
+ // is ever referenced.
+ v_diff_mv_w = _mm_abs_epi16(v_diff_mv_w);
+
+ // Compute the SIMD pointer offsets.
+ {
+#if ARCH_X86_64 // sizeof(intptr_t) == 8
+ // Load the offsets
+ __m128i v_bo10_q = _mm_loadu_si128((const __m128i*)&ss_os[i+0]);
+ __m128i v_bo32_q = _mm_loadu_si128((const __m128i*)&ss_os[i+2]);
+ // Set the ones falling outside to zero
+ v_bo10_q = _mm_and_si128(v_bo10_q,
+ _mm_cvtepi32_epi64(v_inside_d));
+ v_bo32_q = _mm_and_si128(v_bo32_q,
+ _mm_unpackhi_epi32(v_inside_d, v_inside_d));
+ // Compute the candidate addresses
+ v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
+ v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
+#else // ARCH_X86 // sizeof(intptr_t) == 4
+ __m128i v_bo_d = _mm_loadu_si128((const __m128i*)&ss_os[i]);
+ v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
+ v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
+#endif
+ }
+
+ fn_ptr->sdx4df(what, what_stride,
+ (const uint8_t **)&v_blocka[0], in_what_stride,
+ (uint32_t*)&v_sad_d);
+
+ // Look up the component cost of the residual motion vector
+ {
+ const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0);
+ const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1);
+ const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2);
+ const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3);
+ const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4);
+ const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5);
+ const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6);
+ const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7);
+
+ // Note: This is a use case for vpgather in AVX2
+ const uint32_t cost0 = x->nmvsadcost[0][row0] + x->nmvsadcost[0][col0];
+ const uint32_t cost1 = x->nmvsadcost[0][row1] + x->nmvsadcost[0][col1];
+ const uint32_t cost2 = x->nmvsadcost[0][row2] + x->nmvsadcost[0][col2];
+ const uint32_t cost3 = x->nmvsadcost[0][row3] + x->nmvsadcost[0][col3];
+
+ __m128i v_cost_10_d, v_cost_32_d;
+
+ v_cost_10_d = _mm_cvtsi32_si128(cost0);
+ v_cost_10_d = _mm_insert_epi32(v_cost_10_d, cost1, 1);
+
+ v_cost_32_d = _mm_cvtsi32_si128(cost2);
+ v_cost_32_d = _mm_insert_epi32(v_cost_32_d, cost3, 1);
+
+ v_cost_d = _mm_unpacklo_epi64(v_cost_10_d, v_cost_32_d);
+ }
+
+ // Now add in the joint cost
+ {
+ const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w,
+ _mm_setzero_si128());
+ const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d,
+ v_joint_cost_0_d,
+ v_sel_d);
+ v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d);
+ }
+
+ // Multiply by sad_per_bit
+ v_cost_d = _mm_mullo_epi32(v_cost_d, v_spb_d);
+ // ROUND_POWER_OF_TWO(v_cost_d, 8)
+ v_cost_d = _mm_add_epi32(v_cost_d, _mm_set1_epi32(0x80));
+ v_cost_d = _mm_srai_epi32(v_cost_d, 8);
+ // Add the cost to the sad
+ v_sad_d = _mm_add_epi32(v_sad_d, v_cost_d);
+
+ // Make the motion vectors outside the search area have max cost
+ // by or'ing in the comparison mask, this way the minimum search won't
+ // pick them.
+ v_sad_d = _mm_or_si128(v_sad_d, v_outside_d);
+
+ // Find the minimum value and index horizontally in v_sad_d
+ {
+ // Try speculatively on 16 bits, so we can use the minpos intrinsic
+ const __m128i v_sad_w = _mm_packus_epi32(v_sad_d, v_sad_d);
+ const __m128i v_minp_w = _mm_minpos_epu16(v_sad_w);
+
+ uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0);
+ uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1);
+
+ // If the local best value is not saturated, just use it, otherwise
+ // find the horizontal minimum again the hard way on 32 bits.
+ // This is executed rarely.
+ if (__unlikely__(local_best_sad == 0xffff)) {
+ __m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d;
+
+ v_loval_d = v_sad_d;
+ v_loidx_d = _mm_set_epi32(3, 2, 1, 0);
+ v_hival_d = _mm_srli_si128(v_loval_d, 8);
+ v_hiidx_d = _mm_srli_si128(v_loidx_d, 8);
+
+ v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
+
+ v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);
+ v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d);
+ v_hival_d = _mm_srli_si128(v_loval_d, 4);
+ v_hiidx_d = _mm_srli_si128(v_loidx_d, 4);
+
+ v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d);
+
+ v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d);
+ v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d);
+
+ local_best_sad = _mm_extract_epi32(v_loval_d, 0);
+ local_best_idx = _mm_extract_epi32(v_loidx_d, 0);
+ }
+
+ // Update the global minimum if the local minimum is smaller
+ if (__likely__(local_best_sad < best_sad)) {
+ new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx];
+ new_best_address = ((const uint8_t **)v_blocka)[local_best_idx];
+
+ best_sad = local_best_sad;
+ }
+ }
+ }
+
+ bmv = new_bmv;
+ best_address = new_best_address;
+
+ v_bmv_w = _mm_set1_epi32(bmv.as_int);
+#if ARCH_X86_64
+ v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
+#else
+ v_ba_d = _mm_set1_epi32((intptr_t)best_address);
+#endif
+
+ if (__unlikely__(best_address == in_what)) {
+ (*num00)++;
+ }
+ }
+
+ *best_mv = bmv.as_mv;
+ return best_sad;
+}
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 3f3bdef..5918240 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -96,6 +96,7 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
+VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
diff --git a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
index 9c5b414..abc0270 100644
--- a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -13,15 +13,21 @@
SECTION .text
%macro convolve_fn 1-2
-INIT_XMM sse2
+%ifidn %1, avg
+%define AUX_XMM_REGS 4
+%else
+%define AUX_XMM_REGS 0
+%endif
%ifidn %2, highbd
%define pavg pavgw
-cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
- fx, fxs, fy, fys, w, h, bd
+cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+ dst, dst_stride, \
+ fx, fxs, fy, fys, w, h, bd
%else
%define pavg pavgb
-cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
- fx, fxs, fy, fys, w, h
+cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+ dst, dst_stride, \
+ fx, fxs, fy, fys, w, h
%endif
mov r4d, dword wm
%ifidn %2, highbd
@@ -152,38 +158,11 @@
jnz .loop16
RET
-INIT_MMX sse
.w8:
mov r4d, dword hm
lea r5q, [src_strideq*3]
lea r6q, [dst_strideq*3]
.loop8:
- movu m0, [srcq]
- movu m1, [srcq+src_strideq]
- movu m2, [srcq+src_strideq*2]
- movu m3, [srcq+r5q]
- lea srcq, [srcq+src_strideq*4]
-%ifidn %1, avg
- pavg m0, [dstq]
- pavg m1, [dstq+dst_strideq]
- pavg m2, [dstq+dst_strideq*2]
- pavg m3, [dstq+r6q]
-%endif
- mova [dstq ], m0
- mova [dstq+dst_strideq ], m1
- mova [dstq+dst_strideq*2], m2
- mova [dstq+r6q ], m3
- lea dstq, [dstq+dst_strideq*4]
- sub r4d, 4
- jnz .loop8
- RET
-
-%ifnidn %2, highbd
-.w4:
- mov r4d, dword hm
- lea r5q, [src_strideq*3]
- lea r6q, [dst_strideq*3]
-.loop4:
movh m0, [srcq]
movh m1, [srcq+src_strideq]
movh m2, [srcq+src_strideq*2]
@@ -205,11 +184,42 @@
movh [dstq+r6q ], m3
lea dstq, [dstq+dst_strideq*4]
sub r4d, 4
+ jnz .loop8
+ RET
+
+%ifnidn %2, highbd
+.w4:
+ mov r4d, dword hm
+ lea r5q, [src_strideq*3]
+ lea r6q, [dst_strideq*3]
+.loop4:
+ movd m0, [srcq]
+ movd m1, [srcq+src_strideq]
+ movd m2, [srcq+src_strideq*2]
+ movd m3, [srcq+r5q]
+ lea srcq, [srcq+src_strideq*4]
+%ifidn %1, avg
+ movd m4, [dstq]
+ movd m5, [dstq+dst_strideq]
+ movd m6, [dstq+dst_strideq*2]
+ movd m7, [dstq+r6q]
+ pavg m0, m4
+ pavg m1, m5
+ pavg m2, m6
+ pavg m3, m7
+%endif
+ movd [dstq ], m0
+ movd [dstq+dst_strideq ], m1
+ movd [dstq+dst_strideq*2], m2
+ movd [dstq+r6q ], m3
+ lea dstq, [dstq+dst_strideq*4]
+ sub r4d, 4
jnz .loop4
RET
%endif
%endmacro
+INIT_XMM sse2
convolve_fn copy
convolve_fn avg
%if CONFIG_VP9_HIGHBITDEPTH