Merge "vp9_reconintra/d45_predictor: remove temp storage"
diff --git a/CHANGELOG b/CHANGELOG
index 48dd543..b0d3064 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
xxxx-yy-zz v1.4.0 "Changes for next release"
vpxenc is changed to use VP9 by default.
Encoder controls added for 1 pass SVC.
+ Decoder control to toggle on/off loopfilter.
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 0add523..e971c9d 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -163,6 +163,7 @@
endif
# Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
ifeq ($(CONFIG_VP8), yes)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
endif
@@ -175,6 +176,9 @@
ifeq ($(TARGET_ARCH_ABI),x86)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
endif
+endef
+
+$(eval $(call rtcd_dep_template))
.PHONY: clean
clean:
diff --git a/build/make/gen_msvs_vcxproj.sh b/build/make/gen_msvs_vcxproj.sh
index 643ebd6..b653651 100755
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -263,8 +263,8 @@
;;
arm*)
platforms[0]="ARM"
- asm_Debug_cmdline="armasm -nologo "%(FullPath)""
- asm_Release_cmdline="armasm -nologo "%(FullPath)""
+ asm_Debug_cmdline="armasm -nologo -oldit "%(FullPath)""
+ asm_Release_cmdline="armasm -nologo -oldit "%(FullPath)""
;;
*) die "Unsupported target $target!"
;;
diff --git a/libs.mk b/libs.mk
index 0f87a8a..0ca8379 100644
--- a/libs.mk
+++ b/libs.mk
@@ -373,6 +373,7 @@
TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@@ -486,7 +487,6 @@
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm))
-TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
diff --git a/rate_hist.c b/rate_hist.c
index 1cef19b..a77222b 100644
--- a/rate_hist.c
+++ b/rate_hist.c
@@ -88,6 +88,9 @@
if (now < cfg->rc_buf_initial_sz)
return;
+ if (!cfg->rc_target_bitrate)
+ return;
+
then = now;
/* Sum the size over the past rc_buf_sz ms */
diff --git a/test/android/Android.mk b/test/android/Android.mk
index af85634..48872a2 100644
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -51,4 +51,6 @@
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
+# some test files depend on *_rtcd.h, ensure they're generated first.
+$(eval $(call rtcd_dep_template))
include $(BUILD_EXECUTABLE)
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index 726f472..ba51309 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -694,9 +694,23 @@
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
+ make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
+ make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
+
+INSTANTIATE_TEST_CASE_P(
+ MSA, Loop8Test9Param,
+ ::testing::Values(
+ make_tuple(&vp9_lpf_horizontal_4_dual_msa,
+ &vp9_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vp9_lpf_horizontal_8_dual_msa,
+ &vp9_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vp9_lpf_vertical_4_dual_msa,
+ &vp9_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vp9_lpf_vertical_8_dual_msa,
+ &vp9_lpf_vertical_8_dual_c, 8)));
#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
} // namespace
diff --git a/test/test-data.mk b/test/test-data.mk
index 1055e12..dda1c18 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -12,6 +12,7 @@
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 0bb4481..3590f4e 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -22,6 +22,7 @@
c92825f1ea25c5c37855083a69faac6ac4641a9e *park_joy_90p_12_422.y4m
b592189b885b6cc85db55cc98512a197d73d3b34 *park_joy_90p_12_444.y4m
82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv
+b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m
4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m
7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m
bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m
diff --git a/test/test.mk b/test/test.mk
index 0dad062..4d63a63 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -66,6 +66,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += decode_api_test.cc
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index a684ea4..cd562da 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -311,17 +311,20 @@
vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
#if HAVE_SSE2
+#if ARCH_X86_64
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
vp9_dc_left_predictor_32x32_sse2,
vp9_dc_top_predictor_32x32_sse2,
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-#if ARCH_X86_64
- vp9_tm_predictor_32x32_sse2
+ vp9_tm_predictor_32x32_sse2)
#else
- NULL
-#endif
- )
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
+ vp9_dc_left_predictor_32x32_sse2,
+ vp9_dc_top_predictor_32x32_sse2,
+ vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif // ARCH_X86_64
#endif // HAVE_SSE2
#if HAVE_SSSE3
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 0ae011e..60424ed 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -408,6 +408,8 @@
YUV_RAW_INPUT_WIDTH=352
YUV_RAW_INPUT_HEIGHT=288
+Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+
# Setup a trap function to clean up after tests complete.
trap cleanup EXIT
@@ -429,6 +431,7 @@
VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
YUV_RAW_INPUT=${YUV_RAW_INPUT}
YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
- YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
+ YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
+ Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"
fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 08a247d..2d17119 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1976,12 +1976,9 @@
#if HAVE_NEON_ASM
const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
vp8_sub_pixel_variance16x16_neon;
-const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
- vp8_sub_pixel_variance8x8_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP8SubpelVarianceTest,
- ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
- make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+ ::testing::Values(make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
#endif // HAVE_NEON_ASM
#endif // CONFIG_VP8
diff --git a/test/vp9_skip_loopfilter_test.cc b/test/vp9_skip_loopfilter_test.cc
new file mode 100644
index 0000000..b0cc7ba
--- /dev/null
+++ b/test/vp9_skip_loopfilter_test.cc
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+
+namespace {
+
+const char kVp9TestFile[] = "vp90-2-08-tile_1x8_frame_parallel.webm";
+const char kVp9Md5File[] = "vp90-2-08-tile_1x8_frame_parallel.webm.md5";
+
+// Class for testing shutting off the loop filter.
+class SkipLoopFilterTest {
+ public:
+ SkipLoopFilterTest()
+ : video_(NULL),
+ decoder_(NULL),
+ md5_file_(NULL) {}
+
+ ~SkipLoopFilterTest() {
+ if (md5_file_ != NULL)
+ fclose(md5_file_);
+ delete decoder_;
+ delete video_;
+ }
+
+ // If |threads| > 0 then set the decoder with that number of threads.
+ void Init(int num_threads) {
+ expected_md5_[0] = '\0';
+ junk_[0] = '\0';
+ video_ = new libvpx_test::WebMVideoSource(kVp9TestFile);
+ ASSERT_TRUE(video_ != NULL);
+ video_->Init();
+ video_->Begin();
+
+ vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+ if (num_threads > 0)
+ cfg.threads = num_threads;
+ decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
+ ASSERT_TRUE(decoder_ != NULL);
+
+ OpenMd5File(kVp9Md5File);
+ }
+
+ // Set the VP9 skipLoopFilter control value.
+ void SetSkipLoopFilter(int value, vpx_codec_err_t expected_value) {
+ decoder_->Control(VP9_SET_SKIP_LOOP_FILTER, value, expected_value);
+ }
+
+ vpx_codec_err_t DecodeOneFrame() {
+ const vpx_codec_err_t res =
+ decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+ if (res == VPX_CODEC_OK) {
+ ReadMd5();
+ video_->Next();
+ }
+ return res;
+ }
+
+ vpx_codec_err_t DecodeRemainingFrames() {
+ for (; video_->cxdata() != NULL; video_->Next()) {
+ const vpx_codec_err_t res =
+ decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+ if (res != VPX_CODEC_OK)
+ return res;
+ ReadMd5();
+ }
+ return VPX_CODEC_OK;
+ }
+
+ // Checks if MD5 matches or doesn't.
+ void CheckMd5(bool matches) {
+ libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
+ const vpx_image_t *img = dec_iter.Next();
+ CheckMd5Vpx(*img, matches);
+ }
+
+ private:
+ // TODO(fgalligan): Move the MD5 testing code into another class.
+ void OpenMd5File(const std::string &md5_file_name) {
+ md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name);
+ ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+ << md5_file_name;
+ }
+
+ // Reads the next line of the MD5 file.
+ void ReadMd5() {
+ ASSERT_TRUE(md5_file_ != NULL);
+ const int res = fscanf(md5_file_, "%s %s", expected_md5_, junk_);
+ ASSERT_NE(EOF, res) << "Read md5 data failed";
+ expected_md5_[32] = '\0';
+ }
+
+ // Checks if the last read MD5 matches |img| or doesn't.
+ void CheckMd5Vpx(const vpx_image_t &img, bool matches) {
+ ::libvpx_test::MD5 md5_res;
+ md5_res.Add(&img);
+ const char *const actual_md5 = md5_res.Get();
+
+ // Check MD5.
+ if (matches)
+ ASSERT_STREQ(expected_md5_, actual_md5) << "MD5 checksums don't match";
+ else
+ ASSERT_STRNE(expected_md5_, actual_md5) << "MD5 checksums match";
+ }
+
+ libvpx_test::WebMVideoSource *video_;
+ libvpx_test::VP9Decoder *decoder_;
+ FILE *md5_file_;
+ char expected_md5_[33];
+ char junk_[128];
+};
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilter) {
+ const int non_zero_value = 1;
+ const int num_threads = 0;
+ SkipLoopFilterTest skip_loop_filter;
+ skip_loop_filter.Init(num_threads);
+ skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+ skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilterSingleThread) {
+ const int non_zero_value = 1;
+ const int num_threads = 1;
+ SkipLoopFilterTest skip_loop_filter;
+ skip_loop_filter.Init(num_threads);
+ skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+ skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilter8Threads) {
+ const int non_zero_value = 1;
+ const int num_threads = 8;
+ SkipLoopFilterTest skip_loop_filter;
+ skip_loop_filter.Init(num_threads);
+ skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+ skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, WithLoopFilter) {
+ const int non_zero_value = 1;
+ const int num_threads = 0;
+ SkipLoopFilterTest skip_loop_filter;
+ skip_loop_filter.Init(num_threads);
+ skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+ skip_loop_filter.SetSkipLoopFilter(0, VPX_CODEC_OK);
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+ skip_loop_filter.CheckMd5(true);
+}
+
+TEST(SkipLoopFilterTest, ToggleLoopFilter) {
+ const int num_threads = 0;
+ SkipLoopFilterTest skip_loop_filter;
+ skip_loop_filter.Init(num_threads);
+
+ for (int i = 0; i < 10; ++i) {
+ skip_loop_filter.SetSkipLoopFilter(i % 2, VPX_CODEC_OK);
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeOneFrame());
+ }
+ ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+ skip_loop_filter.CheckMd5(false);
+}
+
+} // namespace
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index 9674bdce..1faa145 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -23,6 +23,13 @@
elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH."
return 1
fi
+ if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
+ if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
+ elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
+ elog "LIBVPX_TEST_DATA_PATH."
+ return 1
+ fi
+ fi
if [ -z "$(vpx_tool_path vpxenc)" ]; then
elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
return 1
@@ -49,6 +56,10 @@
--height="${YUV_RAW_INPUT_HEIGHT}""
}
+y4m_input_non_square_par() {
+ echo ""${Y4M_NOSQ_PAR_INPUT}""
+}
+
# Echo default vpxenc real time encoding params. $1 is the codec, which defaults
# to vp8 if unspecified.
vpxenc_rt_params() {
@@ -320,6 +331,23 @@
fi
}
+# TODO(fgalligan): Test that DisplayWidth is different than video width.
+vpxenc_vp9_webm_non_square_par() {
+ if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+ [ "$(webm_io_available)" = "yes" ]; then
+ local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm"
+ vpxenc $(y4m_input_non_square_par) \
+ --codec=vp9 \
+ --limit="${TEST_FRAMES}" \
+ --output="${output}"
+
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+ fi
+}
+
vpxenc_tests="vpxenc_vp8_ivf
vpxenc_vp8_webm
vpxenc_vp8_webm_rt
@@ -332,6 +360,7 @@
vpxenc_vp9_webm_2pass
vpxenc_vp9_ivf_lossless
vpxenc_vp9_ivf_minq0_maxq0
- vpxenc_vp9_webm_lag10_frames20"
+ vpxenc_vp9_webm_lag10_frames20
+ vpxenc_vp9_webm_non_square_par"
run_tests vpxenc_verify_environment "${vpxenc_tests}"
diff --git a/tools_common.h b/tools_common.h
index aa7f025..adccec8 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -89,6 +89,7 @@
enum VideoFileType file_type;
uint32_t width;
uint32_t height;
+ struct VpxRational pixel_aspect_ratio;
vpx_img_fmt_t fmt;
vpx_bit_depth_t bit_depth;
int only_i420;
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index 245677f..fed2088 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -245,10 +245,9 @@
$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media/;
$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
-$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
diff --git a/vp9/common/mips/msa/vp9_loopfilter_4_msa.c b/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
new file mode 100644
index 0000000..7f69135
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ uint64_t p1_d, p0_d, q0_d, q1_d;
+ v16u8 mask, hev, flat, thresh, b_limit, limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0_ptr,
+ const uint8_t *limit0_ptr,
+ const uint8_t *thresh0_ptr,
+ const uint8_t *b_limit1_ptr,
+ const uint8_t *limit1_ptr,
+ const uint8_t *thresh1_ptr) {
+ v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+ thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+ thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+ b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+ b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+ b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+ limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+ limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+ limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+
+ ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ v16u8 mask, hev, flat, limit, thresh, b_limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v8i16 vec0, vec1, vec2, vec3;
+
+ (void)count;
+
+ LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+ ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+ src -= 2;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ src += 4 * pitch;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+}
+
+void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0_ptr,
+ const uint8_t *limit0_ptr,
+ const uint8_t *thresh0_ptr,
+ const uint8_t *b_limit1_ptr,
+ const uint8_t *limit1_ptr,
+ const uint8_t *thresh1_ptr) {
+ v16u8 mask, hev, flat;
+ v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+ v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+ LD_UB8(src - 4 + (8 * pitch), pitch,
+ row8, row9, row10, row11, row12, row13, row14, row15);
+
+ TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+ row8, row9, row10, row11, row12, row13, row14, row15,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+ thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+ thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+ b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+ b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+ b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+ limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+ limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+ limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+ ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+ ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
+ ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+ ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
+
+ src -= 2;
+
+ ST4x8_UB(tmp2, tmp3, src, pitch);
+ src += (8 * pitch);
+ ST4x8_UB(tmp4, tmp5, src, pitch);
+}
diff --git a/vp9/common/mips/msa/vp9_loopfilter_8_msa.c b/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
new file mode 100644
index 0000000..26a858d
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+ v16u8 mask, hev, flat, thresh, b_limit, limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+ v16i8 zero = { 0 };
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+ q2_r, q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+ p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+ zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+ q0_filter8);
+ PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
+
+ p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
+
+ src -= 3 * pitch;
+
+ SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
+ src += (4 * pitch);
+ SD(q1_d, src);
+ src += pitch;
+ SD(q2_d, src);
+ }
+}
+
+void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *b_limit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16u8 zero = { 0 };
+
+ /* load vector elements */
+ LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh0);
+ tmp = (v16u8)__msa_fill_b(*thresh1);
+ thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh);
+
+ b_limit = (v16u8)__msa_fill_b(*b_limit0);
+ tmp = (v16u8)__msa_fill_b(*b_limit1);
+ b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit);
+
+ limit = (v16u8)__msa_fill_b(*limit0);
+ tmp = (v16u8)__msa_fill_b(*limit1);
+ limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+ q2_r, q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ src -= 3 * pitch;
+
+ ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+ src += (4 * pitch);
+ ST_UB2(q1_out, q2_out, src, pitch);
+ src += (2 * pitch);
+ }
+}
+
+void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p1_out, p0_out, q0_out, q1_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v16u8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3, vec4;
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ /* Store 4 pixels p1-_q1 */
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+ src -= 2;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ src += 4 * pitch;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
+ p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ /* Store 6 pixels p2-_q2 */
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
+
+ src -= 3;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec4, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec4, 4, src + 4, pitch);
+ }
+}
+
+void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *b_limit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ uint8_t *temp_src;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p1_out, p0_out, q0_out, q1_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16u8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+ temp_src = src - 4;
+
+ LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+ temp_src += (8 * pitch);
+ LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+ /* transpose 16x8 matrix into 8x16 */
+ TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+ q3, q2, q1, q0, row12, row13, row14, row15,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh0);
+ vec0 = (v8i16)__msa_fill_b(*thresh1);
+ thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh);
+
+ b_limit = (v16u8)__msa_fill_b(*b_limit0);
+ vec0 = (v8i16)__msa_fill_b(*b_limit1);
+ b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit);
+
+ limit = (v16u8)__msa_fill_b(*limit0);
+ vec0 = (v8i16)__msa_fill_b(*limit1);
+ limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+ src -= 2;
+ ST4x8_UB(vec2, vec3, src, pitch);
+ src += 8 * pitch;
+ ST4x8_UB(vec4, vec5, src, pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+
+ /* filter8 */
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+ ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+ ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+ src -= 3;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec2, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec2, 4, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec5, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec5, 4, src + 4, pitch);
+ }
+}
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index e53e15d..53ae921 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -50,16 +50,16 @@
} b_mode_info;
// Note that the rate-distortion optimization loop, bit-stream writer, and
-// decoder implementation modules critically rely on the enum entry values
+// decoder implementation modules critically rely on the defined entry values
// specified herein. They should be refactored concurrently.
-typedef enum {
- NONE = -1,
- INTRA_FRAME = 0,
- LAST_FRAME = 1,
- GOLDEN_FRAME = 2,
- ALTREF_FRAME = 3,
- MAX_REF_FRAMES = 4
-} MV_REFERENCE_FRAME;
+
+#define NONE -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#define MAX_REF_FRAMES 4
+typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
typedef struct {
@@ -75,12 +75,17 @@
PREDICTION_MODE uv_mode;
// Only for INTER blocks
+ INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
+
+ // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
+
+#if CONFIG_VP9_ENCODER
+ // TODO(slavarnway): Move to encoder
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES];
- INTERP_FILTER interp_filter;
-
+#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 0482025..d089f23 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -12,6 +12,7 @@
#define VP9_COMMON_VP9_ENUMS_H_
#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
@@ -40,23 +41,22 @@
MAX_PROFILES
} BITSTREAM_PROFILE;
-typedef enum BLOCK_SIZE {
- BLOCK_4X4,
- BLOCK_4X8,
- BLOCK_8X4,
- BLOCK_8X8,
- BLOCK_8X16,
- BLOCK_16X8,
- BLOCK_16X16,
- BLOCK_16X32,
- BLOCK_32X16,
- BLOCK_32X32,
- BLOCK_32X64,
- BLOCK_64X32,
- BLOCK_64X64,
- BLOCK_SIZES,
- BLOCK_INVALID = BLOCK_SIZES
-} BLOCK_SIZE;
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#define BLOCK_SIZES 13
+#define BLOCK_INVALID BLOCK_SIZES
+typedef uint8_t BLOCK_SIZE;
typedef enum PARTITION_TYPE {
PARTITION_NONE,
@@ -72,13 +72,12 @@
#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
// block transform size
-typedef enum {
- TX_4X4 = 0, // 4x4 transform
- TX_8X8 = 1, // 8x8 transform
- TX_16X16 = 2, // 16x16 transform
- TX_32X32 = 3, // 32x32 transform
- TX_SIZES
-} TX_SIZE;
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0) // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1) // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2) // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
// frame transform mode
typedef enum {
@@ -110,23 +109,22 @@
PLANE_TYPES
} PLANE_TYPE;
-typedef enum {
- DC_PRED, // Average of above and left pixels
- V_PRED, // Vertical
- H_PRED, // Horizontal
- D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
- D135_PRED, // Directional 135 deg = 180 - 45
- D117_PRED, // Directional 117 deg = 180 - 63
- D153_PRED, // Directional 153 deg = 180 - 27
- D207_PRED, // Directional 207 deg = 180 + 27
- D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
- TM_PRED, // True-motion
- NEARESTMV,
- NEARMV,
- ZEROMV,
- NEWMV,
- MB_MODE_COUNT
-} PREDICTION_MODE;
+#define DC_PRED 0 // Average of above and left pixels
+#define V_PRED 1 // Vertical
+#define H_PRED 2 // Horizontal
+#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4 // Directional 135 deg = 180 - 45
+#define D117_PRED 5 // Directional 117 deg = 180 - 63
+#define D153_PRED 6 // Directional 153 deg = 180 - 27
+#define D207_PRED 7 // Directional 207 deg = 180 + 27
+#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9 // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#define MB_MODE_COUNT 14
+typedef uint8_t PREDICTION_MODE;
#define INTRA_MODES (TM_PRED + 1)
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 808a270..13d38af 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -27,17 +27,16 @@
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
#define SUBPEL_TAPS 8
-typedef enum {
- EIGHTTAP = 0,
- EIGHTTAP_SMOOTH = 1,
- EIGHTTAP_SHARP = 2,
- SWITCHABLE_FILTERS = 3, /* Number of switchable filters */
- BILINEAR = 3,
- // The codec can operate in four possible inter prediction filter mode:
- // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
- SWITCHABLE_FILTER_CONTEXTS = SWITCHABLE_FILTERS + 1,
- SWITCHABLE = 4 /* should be the last one */
-} INTERP_FILTER;
+#define EIGHTTAP 0
+#define EIGHTTAP_SMOOTH 1
+#define EIGHTTAP_SHARP 2
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#define BILINEAR 3
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#define SWITCHABLE 4 /* should be the last one */
+typedef uint8_t INTERP_FILTER;
typedef int16_t InterpKernel[SUBPEL_TAPS];
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index ce69527..5f8ee0f 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -18,7 +18,8 @@
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int block, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -138,7 +139,7 @@
Done:
- mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
+ mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
@@ -150,9 +151,10 @@
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
- mi_row, mi_col, sync, data);
+ mi_row, mi_col, sync, data, mode_context);
}
static void lower_mv_precision(MV *mv, int allow_hp) {
@@ -181,7 +183,8 @@
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv) {
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context) {
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *const mi = xd->mi[0];
b_mode_info *bmi = mi->bmi;
@@ -190,7 +193,7 @@
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
- mi_row, mi_col, NULL, NULL);
+ mi_row, mi_col, NULL, NULL, mode_context);
near_mv->as_int = 0;
switch (block) {
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index f1df521..621dc14 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -212,7 +212,8 @@
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data);
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
@@ -223,7 +224,8 @@
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv);
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 045d350..3af2a41 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -264,6 +264,7 @@
int log2_tile_cols, log2_tile_rows;
int byte_alignment;
+ int skip_loop_filter;
// Private data associated with the frame buffer callbacks.
void *cb_priv;
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f088c56..bbe200d 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -224,36 +224,36 @@
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/;
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/;
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/;
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
#
# post proc
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 4005537..30ca2d0 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -381,6 +381,15 @@
VP9_COMMON *const cm = &pbi->common;
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
+
+ if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+ const BLOCK_SIZE uv_subsize =
+ ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+ if (uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(xd->error_info,
+ VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
+ }
+
vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r);
if (less8x8)
@@ -444,18 +453,14 @@
VP9_COMMON *const cm = &pbi->common;
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
- BLOCK_SIZE subsize, uv_subsize;
+ BLOCK_SIZE subsize;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
subsize = get_subsize(bsize, partition);
- uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y];
- if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID)
- vpx_internal_error(xd->error_info,
- VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
- if (subsize < BLOCK_8X8) {
+ if (bsize == BLOCK_8X8) {
decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
} else {
switch (partition) {
@@ -921,7 +926,8 @@
int mi_row, mi_col;
TileData *tile_data = NULL;
- if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter &&
+ pbi->lf_worker.data1 == NULL) {
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
@@ -931,7 +937,7 @@
}
}
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
// Be sure to sync as we might be resuming after a failed frame decode.
winterface->sync(&pbi->lf_worker);
@@ -1004,7 +1010,7 @@
"Failed to decode tile data");
}
// Loopfilter one row.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
@@ -1033,7 +1039,7 @@
}
// Loopfilter remaining rows in the frame.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
lf_data->start = lf_data->stop;
@@ -1657,7 +1663,7 @@
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data header is corrupted.");
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
@@ -1683,11 +1689,13 @@
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
if (!xd->corrupted) {
- // If multiple threads are used to decode tiles, then we use those threads
- // to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 7ce3389..bc03caf 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -473,7 +473,9 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int inter_mode_ctx, ref, is_compound;
+ int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int ref, is_compound;
+ uint8_t inter_mode_ctx[MAX_REF_FRAMES];
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
@@ -487,12 +489,10 @@
"Reference frame has invalid dimensions");
vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
&ref_buf->sf);
- vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi);
+ vp9_find_mv_refs(cm, xd, tile, mi, frame, ref_mvs[frame],
+ mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
- inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
-
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
@@ -502,12 +502,13 @@
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ mbmi->mode = read_inter_mode(cm, xd, r,
+ inter_mode_ctx[mbmi->ref_frame[0]]);
}
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
+ vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
&nearestmv[ref], &nearmv[ref]);
}
}
@@ -526,13 +527,16 @@
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
- if (b_mode == NEARESTMV || b_mode == NEARMV)
+ if (b_mode == NEARESTMV || b_mode == NEARMV) {
+ uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, j, ref, mi_row, mi_col,
&nearest_sub8x8[ref],
- &near_sub8x8[ref]);
+ &near_sub8x8[ref],
+ dummy_mode_ctx);
+ }
if (!assign_mv(cm, xd, b_mode, block, nearestmv,
nearest_sub8x8, near_sub8x8,
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 54ad835..3304e64 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -59,10 +59,8 @@
const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
const vp9_prob *prob;
- unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
- counts->coef[tx_size][type][ref];
- unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
- counts->eob_branch[tx_size][type][ref];
+ unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+ unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32];
const uint8_t *band_translate = get_band_translate(tx_size);
const int dq_shift = (tx_size == TX_32X32);
@@ -75,6 +73,11 @@
const uint8_t *cat5_prob;
const uint8_t *cat6_prob;
+ if (counts) {
+ coef_counts = counts->coef[tx_size][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size][type][ref];
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->bd > VPX_BITS_8) {
if (xd->bd == VPX_BITS_10) {
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 4b1c959..71c1e0b 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -48,6 +48,8 @@
int16_t motion_thresh;
// Rate target ratio to set q delta.
double rate_ratio_qdelta;
+ // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
+ double rate_boost_fac;
double low_content_avg;
int qindex_delta_seg1;
int qindex_delta_seg2;
@@ -93,6 +95,19 @@
return 1;
}
+static void adjust_cyclic_refresh_parameters(VP9_COMP *const cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ // Adjust some parameters, currently only for low resolutions at low bitrates.
+ if (cm->width <= 352 &&
+ cm->height <= 288 &&
+ rc->avg_frame_bandwidth < 3400) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 1.25;
+ }
+}
+
// Check if this coding block, of size bsize, should be considered for refresh
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
@@ -462,6 +477,7 @@
vp9_clear_system_state();
cr->max_qdelta_perc = 50;
cr->time_for_refresh = 0;
+ cr->rate_boost_fac = 1.7;
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
@@ -470,6 +486,9 @@
// vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[].
cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
cr->motion_thresh = 32;
+
+ adjust_cyclic_refresh_parameters(cpi);
+
// Set up segmentation.
// Clear down the segment map.
vp9_enable_segmentation(&cm->seg);
@@ -505,7 +524,7 @@
// Set a more aggressive (higher) q delta for segment BOOST2.
qindex_delta = compute_deltaq(cpi, cm->base_qindex,
MIN(CR_MAX_RATE_TARGET_RATIO,
- CR_BOOST2_FAC * cr->rate_ratio_qdelta));
+ cr->rate_boost_fac * cr->rate_ratio_qdelta));
cr->qindex_delta_seg2 = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index 21f114b..99bb98e 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -27,9 +27,6 @@
// Maximum rate target ratio for setting segment delta-qp.
#define CR_MAX_RATE_TARGET_RATIO 4.0
-// Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
-#define CR_BOOST2_FAC 1.7
-
struct VP9_COMP;
struct CYCLIC_REFRESH;
diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index 9e5d9ee..f072717 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -82,6 +82,61 @@
}
}
+/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
+ * of variance() and highbd_8_variance(). It should not.
+ */
+static void aq_variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse, uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bs) {
@@ -98,18 +153,18 @@
int avg;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
- CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
- &sse, &avg);
+ aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
+ &sse, &avg);
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
- variance(x->plane[0].src.buf, x->plane[0].src.stride,
- vp9_64_zeros, 0, bw, bh, &sse, &avg);
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, bw, bh, &sse, &avg);
}
#else
- variance(x->plane[0].src.buf, x->plane[0].src.stride,
- vp9_64_zeros, 0, bw, bh, &sse, &avg);
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 49e8887..8864e0e 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -370,6 +370,7 @@
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
variance_node node;
+ memset(&node, 0, sizeof(node));
tree_to_node(data, bsize, &node);
sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 95f9bc4..ba38d64 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2056,6 +2056,65 @@
#endif
}
+/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
+ * and highbd_8_variance(). It should not.
+ */
+static void encoder_variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse,
+ uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h,
+ unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
+ &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static int64_t get_sse(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int width, int height) {
@@ -2067,15 +2126,15 @@
int x, y;
if (dw > 0) {
- variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
+ encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
total_sse += sse;
}
if (dh > 0) {
- variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
+ encoder_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
total_sse += sse;
}
@@ -2128,14 +2187,15 @@
unsigned int sse = 0;
int sum = 0;
if (dw > 0) {
- highbd_8_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
+ encoder_highbd_8_variance(&a[width - dw], a_stride,
+ &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
total_sse += sse;
}
if (dh > 0) {
- highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
+ encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
total_sse += sse;
}
for (y = 0; y < height / 16; ++y) {
@@ -2812,7 +2872,7 @@
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
"%6d %6d %5d %5d %5d "
"%10"PRId64" %10.3lf"
- "%10lf %8u %10"PRId64" %10d %10d\n",
+ "%10lf %8u %10"PRId64" %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
cpi->rc.source_alt_ref_pending,
@@ -2841,7 +2901,8 @@
cpi->twopass.bits_left /
(1 + cpi->twopass.total_left_stats.coded_error),
cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
- cpi->twopass.kf_zeromotion_pct);
+ cpi->twopass.kf_zeromotion_pct,
+ cpi->twopass.fr_content_type);
fclose(f);
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 856a665..3d7843e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -113,8 +113,8 @@
fpfile = fopen("firstpass.stt", "a");
fprintf(fpfile, "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
- "%12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+ "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
+ "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
stats->frame,
stats->weight,
stats->intra_error,
@@ -124,6 +124,8 @@
stats->pcnt_motion,
stats->pcnt_second_ref,
stats->pcnt_neutral,
+ stats->intra_skip_pct,
+ stats->inactive_zone_rows,
stats->MVr,
stats->mvr_abs,
stats->MVc,
@@ -160,7 +162,9 @@
section->pcnt_motion = 0.0;
section->pcnt_second_ref = 0.0;
section->pcnt_neutral = 0.0;
- section->MVr = 0.0;
+ section->intra_skip_pct = 0.0;
+ section->inactive_zone_rows = 0.0;
+ section->MVr = 0.0;
section->mvr_abs = 0.0;
section->MVc = 0.0;
section->mvc_abs = 0.0;
@@ -185,7 +189,9 @@
section->pcnt_motion += frame->pcnt_motion;
section->pcnt_second_ref += frame->pcnt_second_ref;
section->pcnt_neutral += frame->pcnt_neutral;
- section->MVr += frame->MVr;
+ section->intra_skip_pct += frame->intra_skip_pct;
+ section->inactive_zone_rows += frame->inactive_zone_rows;
+ section->MVr += frame->MVr;
section->mvr_abs += frame->mvr_abs;
section->MVc += frame->MVc;
section->mvc_abs += frame->mvc_abs;
@@ -208,7 +214,9 @@
section->pcnt_motion -= frame->pcnt_motion;
section->pcnt_second_ref -= frame->pcnt_second_ref;
section->pcnt_neutral -= frame->pcnt_neutral;
- section->MVr -= frame->MVr;
+ section->intra_skip_pct -= frame->intra_skip_pct;
+ section->inactive_zone_rows -= frame->inactive_zone_rows;
+ section->MVr -= frame->MVr;
section->mvr_abs -= frame->mvr_abs;
section->MVc -= frame->MVc;
section->mvc_abs -= frame->mvc_abs;
@@ -453,6 +461,8 @@
cpi->rc.frames_to_key = INT_MAX;
}
+#define UL_INTRA_THRESH 50
+#define INVALID_ROW -1
void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->td.mb;
@@ -477,6 +487,8 @@
int second_ref_count = 0;
const int intrapenalty = INTRA_MODE_PENALTY;
double neutral_count;
+ int intra_skip_count = 0;
+ int image_data_start_row = INVALID_ROW;
int new_mv_count = 0;
int sum_in_vectors = 0;
MV lastmv = {0, 0};
@@ -636,6 +648,18 @@
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
vp9_encode_intra_block_plane(x, bsize, 0);
this_error = vpx_get_mb_ss(x->plane[0].src_diff);
+
+ // Keep a record of blocks that have almost no intra error residual
+ // (i.e. are in effect completely flat and untextured in the intra
+ // domain). In natural videos this is uncommon, but it is much more
+ // common in animations, graphics and screen content, so may be used
+ // as a signal to detect these types of content.
+ if (this_error < UL_INTRA_THRESH) {
+ ++intra_skip_count;
+ } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = mb_row;
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
@@ -963,6 +987,18 @@
vp9_clear_system_state();
}
+ // Clamp the image start to rows/2. This number of rows is discarded top
+ // and bottom as dead data so rows / 2 means the frame is blank.
+ if ((image_data_start_row > cm->mb_rows / 2) ||
+ (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = cm->mb_rows / 2;
+ }
+ // Exclude any image dead zone
+ if (image_data_start_row > 0) {
+ intra_skip_count =
+ MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
+ }
+
{
FIRSTPASS_STATS fps;
// The minimum error here insures some bit allocation to frames even
@@ -987,6 +1023,8 @@
fps.pcnt_inter = (double)intercount / num_mbs;
fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
fps.pcnt_neutral = (double)neutral_count / num_mbs;
+ fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+ fps.inactive_zone_rows = (double)image_data_start_row;
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -1108,21 +1146,25 @@
static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double section_err,
+ double inactive_zone,
int section_target_bandwidth,
double group_weight_factor) {
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
+
if (section_target_bandwidth <= 0) {
return rc->worst_quality; // Highest value allowed
} else {
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
- const double err_per_mb = section_err / num_mbs;
+ const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+ const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = num_mbs / EDIV_SIZE_FACTOR;
+ const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
- BPER_MB_NORMBITS) / num_mbs;
+ BPER_MB_NORMBITS) / active_mbs;
int q;
int is_svc_upper_layer = 0;
@@ -1135,7 +1177,7 @@
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
- calc_correction_factor(err_per_mb,
+ calc_correction_factor(av_err_per_mb,
ERR_DIVISOR - ediv_size_correction,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
@@ -1414,6 +1456,8 @@
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
+ // TODO(paulwilkins): correct for dead zone
+
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
@@ -1779,6 +1823,8 @@
#if GROUP_ADAPTIVE_MAXQ
double gf_group_raw_error = 0.0;
#endif
+ double gf_group_skip_pct = 0.0;
+ double gf_group_inactive_zone_rows = 0.0;
double gf_first_frame_err = 0.0;
double mod_frame_err = 0.0;
@@ -1828,6 +1874,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
#endif
+ gf_group_skip_pct -= this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
}
// Motion breakout threshold for loop below depends on image size.
@@ -1872,6 +1920,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
if (EOF == input_stats(twopass, &next_frame))
break;
@@ -1974,6 +2024,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
rc->baseline_gf_interval = new_gf_interval;
}
@@ -1996,6 +2048,12 @@
const int vbr_group_bits_per_frame =
(int)(gf_group_bits / rc->baseline_gf_interval);
const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
+ const double group_av_skip_pct =
+ gf_group_skip_pct / rc->baseline_gf_interval;
+ const double group_av_inactive_zone =
+ ((gf_group_inactive_zone_rows * 2) /
+ (rc->baseline_gf_interval * (double)cm->mb_rows));
+
int tmp_q;
// rc factor is a weight factor that corrects for local rate control drift.
double rc_factor = 1.0;
@@ -2007,7 +2065,9 @@
(double)(100 - rc->rate_error_estimate) / 100.0);
}
tmp_q =
- get_twopass_worst_quality(cpi, group_av_err, vbr_group_bits_per_frame,
+ get_twopass_worst_quality(cpi, group_av_err,
+ (group_av_skip_pct + group_av_inactive_zone),
+ vbr_group_bits_per_frame,
twopass->kfgroup_inter_fraction * rc_factor);
twopass->active_worst_quality =
MAX(tmp_q, twopass->active_worst_quality >> 1);
@@ -2546,10 +2606,17 @@
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
+ const double section_length = twopass->total_left_stats.count;
const double section_error =
- twopass->total_left_stats.coded_error / twopass->total_left_stats.count;
+ twopass->total_left_stats.coded_error / section_length;
+ const double section_intra_skip =
+ twopass->total_left_stats.intra_skip_pct / section_length;
+ const double section_inactive_zone =
+ (twopass->total_left_stats.inactive_zone_rows * 2) /
+ ((double)cm->mb_rows * section_length);
const int tmp_q =
get_twopass_worst_quality(cpi, section_error,
+ section_intra_skip + section_inactive_zone,
section_target_bandwidth, DEFAULT_GRP_WEIGHT);
twopass->active_worst_quality = tmp_q;
@@ -2565,6 +2632,12 @@
if (EOF == input_stats(twopass, &this_frame))
return;
+ // Set the frame content type flag.
+ if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
+ twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
+ else
+ twopass->fr_content_type = FC_NORMAL;
+
// Keyframe and section processing.
if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
FIRSTPASS_STATS this_frame_copy;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 4a03855..0047932 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -51,6 +51,8 @@
double pcnt_motion;
double pcnt_second_ref;
double pcnt_neutral;
+ double intra_skip_pct;
+ double inactive_zone_rows; // Image mask rows top and bottom.
double MVr;
double mvr_abs;
double MVc;
@@ -73,6 +75,13 @@
FRAME_UPDATE_TYPES = 5
} FRAME_UPDATE_TYPE;
+#define FC_ANIMATION_THRESH 0.15
+typedef enum {
+ FC_NORMAL = 0,
+ FC_GRAPHICS_ANIMATION = 1,
+ FRAME_CONTENT_TYPES = 2
+} FRAME_CONTENT_TYPE;
+
typedef struct {
unsigned char index;
RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
@@ -103,6 +112,8 @@
uint8_t *this_frame_mb_stats;
FIRSTPASS_MB_STATS firstpass_mb_stats;
#endif
+ // An indication of the content type of the current frame
+ FRAME_CONTENT_TYPE fr_content_type;
// Projected total bits available for a key frame group of frames
int64_t kf_group_bits;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1af6094..60bff57 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1058,6 +1058,7 @@
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -1178,7 +1179,8 @@
if (cm->use_prev_frame_mvs)
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
else
const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
xd->mi[0],
@@ -1217,7 +1219,7 @@
continue;
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if (cpi->ref_frame_flags & flag_list[i])
+ if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
ref_frame_skip_mask |= (1 << ref_frame);
if (ref_frame_skip_mask & (1 << ref_frame))
@@ -1657,7 +1659,8 @@
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&dummy_mv[0], &dummy_mv[1]);
@@ -1731,7 +1734,8 @@
b_mv[NEWMV].as_int = INVALID_MV;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
&b_mv[NEARESTMV],
- &b_mv[NEARMV]);
+ &b_mv[NEARMV],
+ xd->mi[0]->mbmi.mode_context);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int b_rate = 0;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 9fa258c..eacc63f 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1804,7 +1804,8 @@
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
&frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame]);
+ &frame_mv[NEARMV][frame],
+ xd->mi[0]->mbmi.mode_context);
}
// search for the best motion vector on this segment
@@ -2220,7 +2221,7 @@
// Gets an initial list of candidate vectors from neighbours and orders them
vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col,
- NULL, NULL);
+ NULL, NULL, xd->mi[0]->mbmi.mode_context);
// Candidate refinement carried out at encoder and decoder
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 61c066e..6c6c4ed 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -19,9 +19,33 @@
return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
}
-static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
+// Sets a partition size down to which the auto partition code will always
+// search (can go lower), based on the image dimensions. The logic here
+// is that the extent to which ringing artefacts are offensive, depends
+// partly on the screen area that over which they propogate. Propogation is
+// limited by transform block size but the screen area take up by a given block
+// size will be larger for a small image format stretched to full screen.
+static BLOCK_SIZE set_partition_min_limit(VP9_COMMON *const cm) {
+ unsigned int screen_area = (cm->width * cm->height);
+
+ // Select block size based on image format size.
+ if (screen_area < 1280 * 720) {
+ // Formats smaller in area than 720P
+ return BLOCK_4X4;
+ } else if (screen_area < 1920 * 1080) {
+ // Format >= 720P and < 1080P
+ return BLOCK_8X8;
+ } else {
+ // Formats 1080P and up
+ return BLOCK_16X16;
+ }
+}
+
+static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
+ VP9_COMMON *const cm = &cpi->common;
+
if (speed >= 1) {
if (MIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -45,6 +69,7 @@
sf->partition_search_breakout_dist_thr = (1 << 22);
sf->partition_search_breakout_rate_thr = 100;
}
+ sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
}
if (speed >= 3) {
@@ -62,6 +87,13 @@
}
}
+ // If this is a two pass clip that fits the criteria for animated or
+ // graphics content then reset disable_split_mask for speeds 1-4.
+ if ((speed >= 1) && (cpi->oxcf.pass == 2) &&
+ (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)) {
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ }
+
if (speed >= 4) {
if (MIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 26);
@@ -72,29 +104,6 @@
}
}
-// Sets a partition size down to which the auto partition code will always
-// search (can go lower), based on the image dimensions. The logic here
-// is that the extent to which ringing artefacts are offensive, depends
-// partly on the screen area that over which they propogate. Propogation is
-// limited by transform block size but the screen area take up by a given block
-// size will be larger for a small image format stretched to full screen.
-static BLOCK_SIZE set_partition_min_limit(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- unsigned int screen_area = (cm->width * cm->height);
-
- // Select block size based on image format size.
- if (screen_area < 1280 * 720) {
- // Formats smaller in area than 720P
- return BLOCK_4X4;
- } else if (screen_area < 1920 * 1080) {
- // Format >= 720P and < 1080P
- return BLOCK_8X8;
- } else {
- // Formats 1080P and up
- return BLOCK_16X16;
- }
-}
-
static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
SPEED_FEATURES *sf, int speed) {
const int boosted = frame_is_boosted(cpi);
@@ -139,7 +148,6 @@
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->rd_auto_partition_min_limit = set_partition_min_limit(cpi);
sf->allow_partition_search_skip = 1;
}
@@ -260,8 +268,12 @@
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
- // Reference masking is not supported in dynamic scaling mode.
- sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
+ // Disable reference masking if using spatial scaling since
+ // pred_mv_sad will not be set (since vp9_mv_pred will not
+ // be called).
+ // TODO(marpan/agrange): Fix this condition.
+ sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
+ cpi->svc.number_spatial_layers == 1) ? 1 : 0;
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -379,7 +391,6 @@
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
- VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
RD_OPT *const rd = &cpi->rd;
int i;
@@ -387,7 +398,7 @@
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
} else if (oxcf->mode == GOOD) {
- set_good_speed_feature_framesize_dependent(cm, sf, oxcf->speed);
+ set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
}
if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 9197048..cb1b0df 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -131,7 +131,6 @@
LAYER_CONTEXT *const lc =
&svc->layer_context[sl * oxcf->ts_number_layers + tl];
RATE_CONTROL *const lrc = &lc->rc;
- layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
lc->spatial_layer_target_bandwidth = spatial_layer_target;
bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target;
@@ -144,7 +143,7 @@
lrc->bits_off_target =
MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
- lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
lrc->worst_quality = rc->worst_quality;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 8fc47a8..0a87395 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -18,60 +18,6 @@
extern "C" {
#endif
-// TODO(johannkoenig): All functions which depend on
-// [highbd_][8|10|12_]variance should be refactored or moved to vpx_dsp.
-static void variance(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse, uint64_t *sum) {
- int i, j;
-
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
-}
-static void highbd_8_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sse = (unsigned int)sse_long;
- *sum = (int)sum_long;
-}
-#endif
-
typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 5e7210d..d018699 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -146,6 +146,8 @@
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 8cff965..4080d64 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -55,6 +55,7 @@
int invert_tile_order;
int last_show_frame; // Index of last output frame.
int byte_alignment;
+ int skip_loop_filter;
// Frame parallel related.
int frame_parallel_decode; // frame-based threading.
@@ -285,6 +286,7 @@
cm->new_fb_idx = INVALID_IDX;
cm->byte_alignment = ctx->byte_alignment;
+ cm->skip_loop_filter = ctx->skip_loop_filter;
if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
pool->get_fb_cb = ctx->get_ext_fb_cb;
@@ -1059,6 +1061,19 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->skip_loop_filter = va_arg(args, int);
+
+ if (ctx->frame_workers) {
+ VP9Worker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter;
+ }
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{VP8_COPY_REFERENCE, ctrl_copy_reference},
@@ -1072,6 +1087,7 @@
{VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order},
{VPXD_SET_DECRYPTOR, ctrl_set_decryptor},
{VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment},
+ {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter},
// Getters
{VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates},
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 83898bf..bc9cb1a 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -106,6 +106,13 @@
*/
VP9_INVERT_TILE_DECODE_ORDER,
+ /** control function to set the skip loop filter flag. Valid values are
+ * integers. The decoder will skip the loop filter when its value is set to
+ * nonzero. If the loop filter is skipped the decoder may accumulate decode
+ * artifacts. The default value is 0.
+ */
+ VP9_SET_SKIP_LOOP_FILTER,
+
VP8_DECODER_CTRL_ID_MAX
};
diff --git a/vpxdec.c b/vpxdec.c
index f219c2e..baa6115 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -106,24 +106,25 @@
};
#if CONFIG_VP8_DECODER
-static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
- "Enable VP8 postproc add noise");
-static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
- "Enable VP8 deblocking");
-static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
- "Enable VP8 demacroblocking, w/ level");
-static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
- "Enable VP8 visible debug info");
-static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
- "Display only selected reference frame per macro block");
-static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
- "Display only selected macro block modes");
-static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
- "Display only selected block modes");
-static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
- "Draw only selected motion vectors");
-static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
- "Enable multiframe quality enhancement");
+static const arg_def_t addnoise_level = ARG_DEF(
+ NULL, "noise-level", 1, "Enable VP8 postproc add noise");
+static const arg_def_t deblock = ARG_DEF(
+ NULL, "deblock", 0, "Enable VP8 deblocking");
+static const arg_def_t demacroblock_level = ARG_DEF(
+ NULL, "demacroblock-level", 1, "Enable VP8 demacroblocking, w/ level");
+static const arg_def_t pp_debug_info = ARG_DEF(
+ NULL, "pp-debug-info", 1, "Enable VP8 visible debug info");
+static const arg_def_t pp_disp_ref_frame = ARG_DEF(
+ NULL, "pp-dbg-ref-frame", 1,
+ "Display only selected reference frame per macro block");
+static const arg_def_t pp_disp_mb_modes = ARG_DEF(
+ NULL, "pp-dbg-mb-modes", 1, "Display only selected macro block modes");
+static const arg_def_t pp_disp_b_modes = ARG_DEF(
+ NULL, "pp-dbg-b-modes", 1, "Display only selected block modes");
+static const arg_def_t pp_disp_mvs = ARG_DEF(
+ NULL, "pp-dbg-mvs", 1, "Draw only selected motion vectors");
+static const arg_def_t mfqe = ARG_DEF(
+ NULL, "mfqe", 0, "Enable multiframe quality enhancement");
static const arg_def_t *vp8_pp_args[] = {
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
@@ -813,34 +814,42 @@
fprintf(stderr, "%s\n", decoder.name);
#if CONFIG_VP8_DECODER
-
if (vp8_pp_cfg.post_proc_flag
&& vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
- fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+ fprintf(stderr, "Failed to configure postproc: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_ref_frame
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) {
- fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME,
+ vp8_dbg_color_ref_frame)) {
+ fprintf(stderr, "Failed to configure reference block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_mb_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) {
- fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES,
+ vp8_dbg_color_mb_modes)) {
+ fprintf(stderr, "Failed to configure macro block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_b_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) {
- fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES,
+ vp8_dbg_color_b_modes)) {
+ fprintf(stderr, "Failed to configure block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_display_mv
- && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) {
- fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV,
+ vp8_dbg_display_mv)) {
+ fprintf(stderr, "Failed to configure motion vector visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
#endif
diff --git a/vpxenc.c b/vpxenc.c
index be9b8b6..8bbb9fc 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -330,10 +330,6 @@
NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
static const arg_def_t static_thresh = ARG_DEF(
NULL, "static-thresh", 1, "Motion detection threshold");
-static const arg_def_t cpu_used_vp8 = ARG_DEF(
- NULL, "cpu-used", 1, "CPU Used (-16..16)");
-static const arg_def_t cpu_used_vp9 = ARG_DEF(
- NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t auto_altref = ARG_DEF(
NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(
@@ -353,17 +349,14 @@
NULL, "cq-level", 1, "Constant/Constrained Quality level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(
NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
-static const arg_def_t max_inter_rate_pct = ARG_DEF(
- NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
-static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
- NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
-
-static const arg_def_t screen_content_mode = ARG_DEF(NULL, "screen-content-mode", 1,
- "Screen content mode");
#if CONFIG_VP8_ENCODER
+static const arg_def_t cpu_used_vp8 = ARG_DEF(
+ NULL, "cpu-used", 1, "CPU Used (-16..16)");
static const arg_def_t token_parts = ARG_DEF(
NULL, "token-parts", 1, "Number of token partitions to use, log2");
+static const arg_def_t screen_content_mode = ARG_DEF(
+ NULL, "screen-content-mode", 1, "Screen content mode");
static const arg_def_t *vp8_args[] = {
&cpu_used_vp8, &auto_altref, &noise_sens, &sharpness, &static_thresh,
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
@@ -382,6 +375,8 @@
#endif
#if CONFIG_VP9_ENCODER
+static const arg_def_t cpu_used_vp9 = ARG_DEF(
+ NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t tile_cols = ARG_DEF(
NULL, "tile-columns", 1, "Number of tile columns to use, log2");
static const arg_def_t tile_rows = ARG_DEF(
@@ -397,6 +392,10 @@
static const arg_def_t frame_periodic_boost = ARG_DEF(
NULL, "frame-boost", 1,
"Enable frame periodic boost (0: off (default), 1: on)");
+static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
+ NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
+static const arg_def_t max_inter_rate_pct = ARG_DEF(
+ NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
static const struct arg_enum_list color_space_enum[] = {
{ "unknown", VPX_CS_UNKNOWN },
@@ -944,6 +943,10 @@
rewind(input->file);
}
+ /* Default to 1:1 pixel aspect ratio. */
+ input->pixel_aspect_ratio.numerator = 1;
+ input->pixel_aspect_ratio.denominator = 1;
+
/* For RAW input sources, these bytes will applied on the first frame
* in read_frame().
*/
@@ -957,6 +960,8 @@
input->file_type = FILE_TYPE_Y4M;
input->width = input->y4m.pic_w;
input->height = input->y4m.pic_h;
+ input->pixel_aspect_ratio.numerator = input->y4m.par_n;
+ input->pixel_aspect_ratio.denominator = input->y4m.par_d;
input->framerate.numerator = input->y4m.fps_n;
input->framerate.denominator = input->y4m.fps_d;
input->fmt = input->y4m.vpx_fmt;
@@ -1390,7 +1395,8 @@
static void open_output_file(struct stream_state *stream,
- struct VpxEncoderConfig *global) {
+ struct VpxEncoderConfig *global,
+ const struct VpxRational *pixel_aspect_ratio) {
const char *fn = stream->config.out_fn;
const struct vpx_codec_enc_cfg *const cfg = &stream->config.cfg;
@@ -1411,7 +1417,8 @@
write_webm_file_header(&stream->ebml, cfg,
&global->framerate,
stream->config.stereo_fmt,
- global->codec->fourcc);
+ global->codec->fourcc,
+ pixel_aspect_ratio);
}
#endif
@@ -2044,7 +2051,8 @@
}
FOREACH_STREAM(setup_pass(stream, &global, pass));
- FOREACH_STREAM(open_output_file(stream, &global));
+ FOREACH_STREAM(open_output_file(stream, &global,
+ &input.pixel_aspect_ratio));
FOREACH_STREAM(initialize_encoder(stream, &global));
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
diff --git a/webmenc.cc b/webmenc.cc
index a0e542b..8212ee3 100644
--- a/webmenc.cc
+++ b/webmenc.cc
@@ -24,7 +24,8 @@
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
- unsigned int fourcc) {
+ unsigned int fourcc,
+ const struct VpxRational *par) {
mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream);
mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
segment->Init(writer);
@@ -49,6 +50,15 @@
segment->GetTrackByNumber(video_track_id));
video_track->SetStereoMode(stereo_fmt);
video_track->set_codec_id(fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
+ if (par->numerator > 1 || par->denominator > 1) {
+ // TODO(fgalligan): Add support of DisplayUnit, Display Aspect Ratio type
+ // to WebM format.
+ const uint64_t display_width =
+ static_cast<uint64_t>(((cfg->g_w * par->numerator * 1.0) /
+ par->denominator) + .5);
+ video_track->set_display_width(display_width);
+ video_track->set_display_height(cfg->g_h);
+ }
if (glob->debug) {
video_track->set_uid(kDebugTrackUid);
}
diff --git a/webmenc.h b/webmenc.h
index 0ac606b..c255d3d 100644
--- a/webmenc.h
+++ b/webmenc.h
@@ -42,7 +42,8 @@
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
- unsigned int fourcc);
+ unsigned int fourcc,
+ const struct VpxRational *par);
void write_webm_block(struct EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,