Merge "mips msa vp9 block error optimization"
diff --git a/build/make/Makefile b/build/make/Makefile
index b56b490..f1b1cca 100644
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -22,8 +22,10 @@
exampletest: .DEFAULT
install:: .DEFAULT
test:: .DEFAULT
+test-no-data-check:: .DEFAULT
testdata:: .DEFAULT
utiltest: .DEFAULT
+exampletest-no-data-check utiltest-no-data-check: .DEFAULT
# Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -113,6 +115,9 @@
testdata::
.PHONY: utiltest
utiltest:
+.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
+test-no-data-check::
+exampletest-no-data-check utiltest-no-data-check:
# Add compiler flags for intrinsic files
ifeq ($(TOOLCHAIN), x86-os2-gcc)
diff --git a/build/make/configure.sh b/build/make/configure.sh
index c5bed61..688fa12 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -728,6 +728,13 @@
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
case ${toolchain} in
+ arm*-darwin*)
+ ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+ if [ -d "${ios_sdk_dir}" ]; then
+ add_cflags "-isysroot ${ios_sdk_dir}"
+ add_ldflags "-isysroot ${ios_sdk_dir}"
+ fi
+ ;;
*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
@@ -803,7 +810,14 @@
if disabled neon && enabled neon_asm; then
die "Disabling neon while keeping neon-asm is not supported"
fi
- soft_enable media
+ case ${toolchain} in
+ *-darwin*)
+ # Neon is guaranteed on iOS 6+ devices, while old media extensions
+ # no longer assemble with iOS 9 SDK
+ ;;
+ *)
+ soft_enable media
+ esac
;;
armv6)
soft_enable media
diff --git a/libs.mk b/libs.mk
index 0ca8379..6215990 100644
--- a/libs.mk
+++ b/libs.mk
@@ -508,11 +508,13 @@
define test_shard_template
test:: test_shard.$(1)
-test_shard.$(1): $(LIBVPX_TEST_BIN) testdata
+test-no-data-check:: test_shard_ndc.$(1)
+test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
@set -e; \
export GTEST_SHARD_INDEX=$(1); \
export GTEST_TOTAL_SHARDS=$(2); \
$(LIBVPX_TEST_BIN)
+test_shard.$(1): testdata
.PHONY: test_shard.$(1)
endef
@@ -557,15 +559,16 @@
# TODO(tomfinegan): Support running the debug versions of tools?
TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
endif
-utiltest: testdata
+utiltest utiltest-no-data-check:
$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
+utiltest: testdata
else
-utiltest:
+utiltest utiltest-no-data-check:
@echo Unit tests must be enabled to make the utiltest target.
endif
@@ -583,11 +586,12 @@
# TODO(tomfinegan): Support running the debug versions of tools?
EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
endif
-exampletest: examples testdata
+exampletest exampletest-no-data-check: examples
$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(EXAMPLES_BIN_PATH)
+exampletest: testdata
else
-exampletest:
+exampletest exampletest-no-data-check:
@echo Unit tests must be enabled to make the exampletest target.
endif
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index 7b4c435..46d4a25 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -316,8 +316,8 @@
vp9_dc_left_predictor_16x16_neon,
vp9_dc_top_predictor_16x16_neon,
vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
- vp9_h_predictor_16x16_neon, NULL, NULL, NULL, NULL, NULL, NULL,
- vp9_tm_predictor_16x16_neon)
+ vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
+ NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
#if HAVE_MSA
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 60424ed..0bdcc08 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -409,6 +409,7 @@
YUV_RAW_INPUT_HEIGHT=288
Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
# Setup a trap function to clean up after tests complete.
trap cleanup EXIT
diff --git a/test/vpxenc.sh b/test/vpxenc.sh
index 1faa145..bf551a8 100755
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -60,6 +60,10 @@
echo ""${Y4M_NOSQ_PAR_INPUT}""
}
+y4m_input_720p() {
+ echo ""${Y4M_720P_INPUT}""
+}
+
# Echo default vpxenc real time encoding params. $1 is the codec, which defaults
# to vp8 if unspecified.
vpxenc_rt_params() {
@@ -68,7 +72,7 @@
--buf-initial-sz=500
--buf-optimal-sz=600
--buf-sz=1000
- --cpu-used=-5
+ --cpu-used=-6
--end-usage=cbr
--error-resilient=1
--kf-max-dist=90000
@@ -258,6 +262,34 @@
fi
}
+vpxenc_vp9_webm_rt_multithread_tiled() {
+ if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+ [ "$(webm_io_available)" = "yes" ]; then
+ local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm"
+ local readonly tilethread_min=2
+ local readonly tilethread_max=4
+ local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
+ local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
+
+ for threads in ${num_threads}; do
+ for tile_cols in ${num_tile_cols}; do
+ vpxenc $(y4m_input_720p) \
+ $(vpxenc_rt_params vp9) \
+ --threads=${threads} \
+ --tile-columns=${tile_cols} \
+ --output="${output}"
+ done
+ done
+
+ if [ ! -e "${output}" ]; then
+ elog "Output file does not exist."
+ return 1
+ fi
+
+ rm "${output}"
+ fi
+}
+
vpxenc_vp9_webm_2pass() {
if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
[ "$(webm_io_available)" = "yes" ]; then
@@ -357,6 +389,7 @@
vpxenc_vp9_ivf
vpxenc_vp9_webm
vpxenc_vp9_webm_rt
+ vpxenc_vp9_webm_rt_multithread_tiled
vpxenc_vp9_webm_2pass
vpxenc_vp9_ivf_lossless
vpxenc_vp9_ivf_minq0_maxq0
diff --git a/vp8/common/arm/neon/vp8_subpixelvariance_neon.c b/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
index 974d3b6..3c8ed11 100644
--- a/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
+++ b/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
@@ -12,7 +12,7 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-static const uint16_t bilinear_taps_coeff[8][2] = {
+static const uint8_t bilinear_taps_coeff[8][2] = {
{128, 0},
{112, 16},
{ 96, 32},
@@ -972,9 +972,9 @@
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const uint16_t *vpx_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]);
+ const uint8_t *vpx_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vpx_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vpx_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.c b/vp9/common/arm/neon/vp9_reconintra_neon.c
index cfd5905..92706bf 100644
--- a/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -358,6 +358,23 @@
vst1_u8(dst + i * stride, row);
}
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+ const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+ const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+ const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+ uint8x16_t row = vrhaddq_u8(avg1, A1);
+ int i;
+ (void)left;
+ for (i = 0; i < 15; ++i) {
+ vst1q_u8(dst + i * stride, row);
+ row = vextq_u8(row, above_right, 1);
+ }
+ vst1q_u8(dst + i * stride, row);
+}
+
// -----------------------------------------------------------------------------
void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 0de072a..604c03e 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -138,7 +138,7 @@
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 0e4d863..6270bf4 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -51,7 +51,7 @@
// Rate target ratio to set q delta.
double rate_ratio_qdelta;
// Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
- double rate_boost_fac;
+ int rate_boost_fac;
double low_content_avg;
int qindex_delta[3];
};
@@ -129,7 +129,8 @@
else if (bsize >= BLOCK_16X16 &&
rate < cr->thresh_rate_sb &&
is_inter_block(mbmi) &&
- mbmi->mv[0].as_int == 0)
+ mbmi->mv[0].as_int == 0 &&
+ cr->rate_boost_fac > 10)
// More aggressive delta-q for bigger blocks with zero motion.
return CR_SEGMENT_ID_BOOST2;
else
@@ -464,10 +465,10 @@
cm->height <= 288 &&
rc->avg_frame_bandwidth < 3400) {
cr->motion_thresh = 4;
- cr->rate_boost_fac = 1.25;
+ cr->rate_boost_fac = 10;
} else {
cr->motion_thresh = 32;
- cr->rate_boost_fac = 1.7;
+ cr->rate_boost_fac = 17;
}
}
@@ -541,9 +542,9 @@
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
// Set a more aggressive (higher) q delta for segment BOOST2.
- qindex_delta = compute_deltaq(cpi, cm->base_qindex,
- MIN(CR_MAX_RATE_TARGET_RATIO,
- cr->rate_boost_fac * cr->rate_ratio_qdelta));
+ qindex_delta = compute_deltaq(
+ cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO,
+ 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 425073f..85003f6 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1596,7 +1596,10 @@
target = calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
- cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
+ if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
+ cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
+ else
+ cpi->resize_state = 0;
}
int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1781,7 +1784,7 @@
++cpi->resize_buffer_underflow;
++cpi->resize_count;
// Check for resize action every "window" frames.
- if (cpi->resize_count == window) {
+ if (cpi->resize_count >= window) {
int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
// Resize down if buffer level has underflowed sufficent amount in past
// window, and we are at original resolution.