Merge "Change size on first frame and change config cause crash."
diff --git a/build/make/configure.sh b/build/make/configure.sh
index 9c30441..c73e787 100644
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -201,6 +201,10 @@
eval test "x\$$1" = "xno"
}
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) disabled, and enables the setting controlled by
+# the parameter when the setting is not disabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
soft_enable() {
for var in $*; do
if ! disabled $var; then
@@ -210,6 +214,10 @@
done
}
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) enabled, and disables the setting controlled by
+# the parameter when the setting is not enabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
soft_disable() {
for var in $*; do
if ! enabled $var; then
@@ -625,6 +633,11 @@
xcodebuild -sdk $1 -version Path 2>/dev/null
}
+# Print the major version number of the Darwin SDK specified by $1.
+show_darwin_sdk_major_version() {
+ xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
+}
+
process_common_toolchain() {
if [ -z "$toolchain" ]; then
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
@@ -736,7 +749,15 @@
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
case ${toolchain} in
- *-darwin*)
+ arm*-darwin*)
+ add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
+ iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+ if [ -d "${iphoneos_sdk_dir}" ]; then
+ add_cflags "-isysroot ${iphoneos_sdk_dir}"
+ add_ldflags "-isysroot ${iphoneos_sdk_dir}"
+ fi
+ ;;
+ x86*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
add_cflags "-isysroot ${osx_sdk_dir}"
@@ -811,10 +832,36 @@
if disabled neon && enabled neon_asm; then
die "Disabling neon while keeping neon-asm is not supported"
fi
- soft_enable media
+ case ${toolchain} in
+ # Apple iOS SDKs no longer support armv6 as of the version 9
+ # release (coincides with release of Xcode 7). Only enable media
+ # when using earlier SDK releases.
+ *-darwin*)
+ if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+ soft_enable media
+ else
+ soft_disable media
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-media "
+ fi
+ ;;
+ *)
+ soft_enable media
+ ;;
+ esac
;;
armv6)
- soft_enable media
+ case ${toolchain} in
+ *-darwin*)
+ if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+ soft_enable media
+ else
+ die "Your iOS SDK does not support armv6."
+ fi
+ ;;
+ *)
+ soft_enable media
+ ;;
+ esac
;;
esac
diff --git a/test/test.mk b/test/test.mk
index bb5186b..fde97031 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -168,7 +168,7 @@
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
## VP10
-LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_dct_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
endif # CONFIG_SHARED
diff --git a/test/vp10_dct_test.cc b/test/vp10_dct_test.cc
index 8e49609..8fb5f4f 100644
--- a/test/vp10_dct_test.cc
+++ b/test/vp10_dct_test.cc
@@ -17,10 +17,7 @@
#include "test/util.h"
#include "./vpx_config.h"
#include "vpx_ports/msvc.h"
-
-#undef CONFIG_COEFFICIENT_RANGE_CHECKING
-#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
-#include "vp10/encoder/dct.c"
+#include "vp10/encoder/dct.h"
using libvpx_test::ACMRandom;
@@ -105,8 +102,8 @@
INSTANTIATE_TEST_CASE_P(
C, Vp10FwdTxfm,
::testing::Values(
- FdctParam(&fdct4, &reference_dct_1d, 4, 1),
- FdctParam(&fdct8, &reference_dct_1d, 8, 1),
- FdctParam(&fdct16, &reference_dct_1d, 16, 2),
- FdctParam(&fdct32, &reference_dct_1d, 32, 4)));
+ FdctParam(&vp10_fdct4, &reference_dct_1d, 4, 1),
+ FdctParam(&vp10_fdct8, &reference_dct_1d, 8, 1),
+ FdctParam(&vp10_fdct16, &reference_dct_1d, 16, 2),
+ FdctParam(&vp10_fdct32_local, &reference_dct_1d, 32, 4)));
} // namespace
diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c
index 80ace06..78f151e 100644
--- a/vp10/encoder/dct.c
+++ b/vp10/encoder/dct.c
@@ -34,7 +34,7 @@
#endif
}
-static void fdct4(const tran_low_t *input, tran_low_t *output) {
+void vp10_fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[4];
@@ -70,7 +70,7 @@
range_check(output, 4, 13);
}
-static void fdct8(const tran_low_t *input, tran_low_t *output) {
+void vp10_fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[8];
@@ -148,7 +148,7 @@
range_check(output, 8, 14);
}
-static void fdct16(const tran_low_t *input, tran_low_t *output) {
+void vp10_fdct16(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[16];
@@ -322,7 +322,8 @@
range_check(output, 16, 16);
}
-static void fdct32(const tran_low_t *input, tran_low_t *output) {
+// TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
+void vp10_fdct32_local(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[32];
@@ -995,24 +996,24 @@
}
static const transform_2d FHT_4[] = {
- { fdct4, fdct4 }, // DCT_DCT = 0
- { fadst4, fdct4 }, // ADST_DCT = 1
- { fdct4, fadst4 }, // DCT_ADST = 2
- { fadst4, fadst4 } // ADST_ADST = 3
+ { vp10_fdct4, vp10_fdct4 }, // DCT_DCT = 0
+ { fadst4, vp10_fdct4 }, // ADST_DCT = 1
+ { vp10_fdct4, fadst4 }, // DCT_ADST = 2
+ { fadst4, fadst4 } // ADST_ADST = 3
};
static const transform_2d FHT_8[] = {
- { fdct8, fdct8 }, // DCT_DCT = 0
- { fadst8, fdct8 }, // ADST_DCT = 1
- { fdct8, fadst8 }, // DCT_ADST = 2
- { fadst8, fadst8 } // ADST_ADST = 3
+ { vp10_fdct8, vp10_fdct8 }, // DCT_DCT = 0
+ { fadst8, vp10_fdct8 }, // ADST_DCT = 1
+ { vp10_fdct8, fadst8 }, // DCT_ADST = 2
+ { fadst8, fadst8 } // ADST_ADST = 3
};
static const transform_2d FHT_16[] = {
- { fdct16, fdct16 }, // DCT_DCT = 0
- { fadst16, fdct16 }, // ADST_DCT = 1
- { fdct16, fadst16 }, // DCT_ADST = 2
- { fadst16, fadst16 } // ADST_ADST = 3
+ { vp10_fdct16, vp10_fdct16 }, // DCT_DCT = 0
+ { fadst16, vp10_fdct16 }, // ADST_DCT = 1
+ { vp10_fdct16, fadst16 }, // DCT_ADST = 2
+ { fadst16, fadst16 } // ADST_ADST = 3
};
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
@@ -1123,7 +1124,7 @@
// Rows
for (i = 0; i < 8; ++i) {
- fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
+ vp10_fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
for (j = 0; j < 8; ++j)
coeff_ptr[j + i * 8] /= 2;
}
diff --git a/vp10/encoder/dct.h b/vp10/encoder/dct.h
new file mode 100644
index 0000000..ab0db93
--- /dev/null
+++ b/vp10/encoder/dct.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_ENCODER_DCT_H_
+#define VP10_ENCODER_DCT_H_
+
+#include "vpx_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4(const tran_low_t *input, tran_low_t *output);
+void vp10_fdct8(const tran_low_t *input, tran_low_t *output);
+void vp10_fdct16(const tran_low_t *input, tran_low_t *output);
+void vp10_fdct32_local(const tran_low_t *input, tran_low_t *output);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_DCT_H_
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index ea527b1..20b7d50 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -653,6 +653,7 @@
} else if (s[n]) {
if (is_inter_block(mbmi)) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
+ r[n][1] -= r_tx_size;
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
@@ -662,6 +663,11 @@
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
+ if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
+ rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ }
+
// Early termination in transform size search.
if (cpi->sf.tx_size_search_breakout &&
(rd[n][1] == INT64_MAX ||
diff --git a/vp10/vp10cx.mk b/vp10/vp10cx.mk
index ead993a..7393a4e 100644
--- a/vp10/vp10cx.mk
+++ b/vp10/vp10cx.mk
@@ -23,6 +23,7 @@
VP10_CX_SRCS-yes += encoder/context_tree.h
VP10_CX_SRCS-yes += encoder/cost.h
VP10_CX_SRCS-yes += encoder/cost.c
+VP10_CX_SRCS-yes += encoder/dct.h
VP10_CX_SRCS-yes += encoder/dct.c
VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.c
VP10_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/denoiser.h
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index c8ef367..5683736 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -176,7 +176,6 @@
int mb_to_bottom_edge;
FRAME_CONTEXT *fc;
- int frame_parallel_decoding_mode;
/* pointers to reference frames */
RefBuffer *block_refs[2];
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 5d8eb90..61e731a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -364,7 +364,6 @@
memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
}
xd->fc = cm->fc;
- xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode;
}
xd->above_seg_context = cm->above_seg_context;
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index e17b397..bb1e179 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -447,7 +447,7 @@
cr->rate_boost_fac = 10;
} else {
cr->motion_thresh = 32;
- cr->rate_boost_fac = 17;
+ cr->rate_boost_fac = 15;
}
if (cpi->svc.spatial_layer_id > 0) {
cr->motion_thresh = 4;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 19b0beb..fc4d9ae 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1238,10 +1238,12 @@
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
- i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
- if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
- ref_frame_skip_mask |= (1 << ref_frame);
+ if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) {
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
+ if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ ref_frame_skip_mask |= (1 << ref_frame);
+ }
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
@@ -1530,11 +1532,13 @@
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize)) {
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
- const TX_SIZE intra_tx_size =
- VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
+ TX_SIZE intra_tx_size =
+ VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16)
+ intra_tx_size = TX_16X16;
if (reuse_inter_pred && best_pred != NULL) {
if (best_pred->data == orig_dst.buf) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 3c84a77..0bffcba 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -664,6 +664,7 @@
} else if (s[n]) {
if (is_inter_block(mbmi)) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
+ r[n][1] -= r_tx_size;
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
@@ -673,6 +674,11 @@
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
+ if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
+ rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ }
+
// Early termination in transform size search.
if (cpi->sf.tx_size_search_breakout &&
(rd[n][1] == INT64_MAX ||
diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index 8d5c7c2..6fd5208 100644
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -291,123 +291,6 @@
}
}
-#if ARCH_X86_64
-static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
- ptrdiff_t src_pitch,
- uint8_t *output_ptr,
- ptrdiff_t out_pitch,
- uint32_t output_height,
- const int16_t *filter) {
- __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
- __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7;
- __m128i srcReg8;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits in the filter
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits in the filter
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits in the filter
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- // load the first 7 rows of 16 bytes
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
- srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
- srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
- srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
- srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
- srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
- srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
-
- for (i = 0; i < output_height; i++) {
- // load the last 16 bytes
- srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
-
- // merge the result together
- srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2);
- srcRegFilt6 = _mm_unpacklo_epi8(srcReg7, srcReg8);
- srcRegFilt1 = _mm_unpackhi_epi8(srcReg1, srcReg2);
- srcRegFilt3 = _mm_unpackhi_epi8(srcReg7, srcReg8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters);
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
-
- // merge the result together
- srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4);
- srcRegFilt6 = _mm_unpackhi_epi8(srcReg3, srcReg4);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters);
-
- // merge the result together
- srcRegFilt7 = _mm_unpacklo_epi8(srcReg5, srcReg6);
- srcRegFilt8 = _mm_unpackhi_epi8(srcReg5, srcReg6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters);
- srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_min_epi16(srcRegFilt3, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_min_epi16(srcRegFilt6, srcRegFilt8));
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_max_epi16(srcRegFilt3, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_max_epi16(srcRegFilt6, srcRegFilt8));
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7);
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1);
-
- src_ptr+=src_pitch;
-
- // shift down a row
- srcReg1 = srcReg2;
- srcReg2 = srcReg3;
- srcReg3 = srcReg4;
- srcReg4 = srcReg5;
- srcReg5 = srcReg6;
- srcReg6 = srcReg7;
- srcReg7 = srcReg8;
-
- // save 16 bytes convolve result
- _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
-
- output_ptr+=out_pitch;
- }
-}
-#endif // ARCH_X86_64
-
filter8_1dfunction vpx_filter_block1d16_v8_ssse3;
filter8_1dfunction vpx_filter_block1d16_h8_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
diff --git a/vpx_ports/bitops.h b/vpx_ports/bitops.h
index 0d3223e..84ff365 100644
--- a/vpx_ports/bitops.h
+++ b/vpx_ports/bitops.h
@@ -11,6 +11,8 @@
#ifndef VPX_PORTS_BITOPS_H_
#define VPX_PORTS_BITOPS_H_
+#include <assert.h>
+
#include "vpx_ports/msvc.h"
#ifdef _MSC_VER
@@ -25,10 +27,15 @@
extern "C" {
#endif
+// These versions of get_msb() are only valid when n != 0 because all
+// of the optimized versions are undefined when n == 0:
+// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
+
// use GNU builtins where available.
#if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
static INLINE int get_msb(unsigned int n) {
+ assert(n != 0);
return 31 ^ __builtin_clz(n);
}
#elif defined(USE_MSC_INTRINSICS)
@@ -36,6 +43,7 @@
static INLINE int get_msb(unsigned int n) {
unsigned long first_set_bit;
+ assert(n != 0);
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
@@ -47,6 +55,8 @@
unsigned int value = n;
int i;
+ assert(n != 0);
+
for (i = 4; i >= 0; --i) {
const int shift = (1 << i);
const unsigned int x = value >> shift;