Merge "Moving #defines to *.c from *.h."
diff --git a/test/decode_to_md5.sh b/test/decode_to_md5.sh
index da1a870..f64acc8 100755
--- a/test/decode_to_md5.sh
+++ b/test/decode_to_md5.sh
@@ -24,22 +24,25 @@
fi
}
-# Runs decode_to_md5 on $1 and echoes the MD5 sum for the final frame. $2 is
-# interpreted as codec name and used solely to name the output file.
+# Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file. $3 is the
+# expected md5 sum: It must match that of the final frame.
decode_to_md5() {
local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}"
local input_file="$1"
local codec="$2"
+ local expected_md5="$3"
local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
[ -x "${decoder}" ] || return 1
- "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+ eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
[ -e "${output_file}" ] || return 1
local md5_last_frame=$(tail -n1 "${output_file}")
- echo "${md5_last_frame% *}" | tr -d [:space:]
+ local actual_md5=$(echo "${md5_last_frame% *}" | tr -d [:space:])
+ [ "${actual_md5}" = "${expected_md5}" ] || return 1
}
decode_to_md5_vp8() {
@@ -47,8 +50,7 @@
local expected_md5="56794d911b02190212bca92f88ad60c6"
if [ "$(vp8_decode_available)" = "yes" ]; then
- local actual_md5="$(decode_to_md5 "${VP8_IVF_FILE}" vp8)" || return 1
- [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ decode_to_md5 "${VP8_IVF_FILE}" "vp8" "${expected_md5}"
fi
}
@@ -57,8 +59,7 @@
local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2"
if [ "$(vp9_decode_available)" = "yes" ]; then
- local actual_md5="$(decode_to_md5 "${VP9_IVF_FILE}" vp9)" || return 1
- [ "${actual_md5}" = "${expected_md5}" ] || return 1
+ decode_to_md5 "${VP9_IVF_FILE}" "vp9" "${expected_md5}"
fi
}
diff --git a/test/decode_with_drops.sh b/test/decode_with_drops.sh
index d0321bf..82e934d 100755
--- a/test/decode_with_drops.sh
+++ b/test/decode_with_drops.sh
@@ -36,7 +36,7 @@
[ -x "${decoder}" ] || return 1
- "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" > /dev/null 2>&1
+ eval "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" ${devnull}
[ -e "${output_file}" ] || return 1
}
diff --git a/test/simple_decoder.sh b/test/simple_decoder.sh
index a0db58f..24b17c5 100755
--- a/test/simple_decoder.sh
+++ b/test/simple_decoder.sh
@@ -34,7 +34,7 @@
[ -x "${decoder}" ] || return 1
- "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+ eval "${decoder}" "${input_file}" "${output_file}" ${devnull}
[ -e "${output_file}" ] || return 1
}
diff --git a/test/simple_encoder.sh b/test/simple_encoder.sh
index 13f5e29..6232093 100755
--- a/test/simple_encoder.sh
+++ b/test/simple_encoder.sh
@@ -31,8 +31,9 @@
[ -x "${encoder}" ] || return 1
- "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
- "${YUV_RAW_INPUT}" "${output_file}" 9999 > /dev/null 2>&1
+ eval "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
+ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
+ ${devnull}
[ -e "${output_file}" ] || return 1
}
diff --git a/test/tools_common.sh b/test/tools_common.sh
index 1ed0176..30f0fae 100755
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -15,6 +15,7 @@
VPX_TEST_TOOLS_COMMON_SH=included
set -e
+devnull='> /dev/null 2>&1'
vlog() {
[ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "$@"
@@ -197,9 +198,9 @@
local decoder="${LIBVPX_BIN_PATH}/vpxdec${VPX_TEST_EXE_SUFFIX}"
if [ -z "${pipe_input}" ]; then
- "${decoder}" "$input" --summary --noblit "$@" > /dev/null 2>&1
+ eval "${decoder}" "$input" --summary --noblit "$@" ${devnull}
else
- cat "${input}" | "${decoder}" - --summary --noblit "$@" > /dev/null 2>&1
+ cat "${input}" | eval "${decoder}" - --summary --noblit "$@" ${devnull}
fi
}
@@ -245,14 +246,16 @@
fi
if [ -z "${pipe_input}" ]; then
- "${encoder}" --codec=${codec} --width=${width} --height=${height} \
+ eval "${encoder}" --codec=${codec} --width=${width} --height=${height} \
--limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" \
- "${input}" > /dev/null 2>&1
+ "${input}" \
+ ${devnull}
else
cat "${input}" \
- | "${encoder}" --codec=${codec} --width=${width} --height=${height} \
- --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" - \
- > /dev/null 2>&1
+ | eval "${encoder}" --codec=${codec} --width=${width} \
+ --height=${height} --limit=${frames} ${use_ivf} ${extra_flags} \
+ --output="${output}" - \
+ ${devnull}
fi
if [ ! -e "${output}" ]; then
@@ -336,6 +339,7 @@
--run-disabled-tests: Run disabled tests.
--help: Display this message and exit.
--test-data-path <path to libvpx test data directory>
+ --show-program-output: Shows output from all programs being tested.
--verbose: Verbose output.
When the --bin-path option is not specified the script attempts to use
@@ -388,6 +392,9 @@
--verbose)
VPX_TEST_VERBOSE_OUTPUT=yes
;;
+ --show-program-output)
+ devnull=
+ ;;
*)
vpx_test_usage
exit 1
@@ -445,6 +452,7 @@
VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
VPX_TEST_FILTER=${VPX_TEST_FILTER}
- VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}"
+ VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
+ VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}"
fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 99fd6ca..d4c3065 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -707,7 +707,7 @@
specialize qw/vp9_fdct4x4 sse2 avx2/;
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct8x8 sse2 avx2/;
+specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct16x16 sse2 avx2/;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 1cc7ab7..45ebb2f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1370,7 +1370,8 @@
"A stream must start with a complete key frame");
}
- if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode &&
+ !new_fb->corrupted) {
vp9_adapt_coef_probs(cm);
if (!frame_is_intra_only(cm)) {
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 385b2eb..abcff9f 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -331,10 +331,6 @@
ret = vp9_post_proc_frame(&pbi->common, sd, flags);
#else
*sd = *pbi->common.frame_to_show;
- sd->y_width = pbi->common.width;
- sd->y_height = pbi->common.height;
- sd->uv_width = sd->y_width >> pbi->common.subsampling_x;
- sd->uv_height = sd->y_height >> pbi->common.subsampling_y;
ret = 0;
#endif /*!CONFIG_POSTPROC*/
vp9_clear_system_state();
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 2e44f7d..19aa592 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -560,11 +560,6 @@
return act < (8 << 12) ? MIN(act, 5 << 12) : act;
}
-// Stub for alternative experimental activity measures.
-static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) {
- return vp9_encode_intra(x, use_dc_pred);
-}
-
static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int output_enabled) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index baa4665..d71b16f 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -601,15 +601,3 @@
vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
&arg);
}
-
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
- MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi;
- x->skip_encode = 0;
- mbmi->mode = DC_PRED;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
- : TX_8X8)
- : TX_4X4;
- vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
- return vp9_get_mb_ss(x->plane[0].src_diff);
-}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index edef1e2..8021459 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -34,8 +34,6 @@
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 395d26a..f3ab4ed 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -162,6 +162,7 @@
vp9_rc_init_minq_luts();
vp9_entropy_mv_init();
vp9_entropy_mode_init();
+ vp9_temporal_filter_init();
init_done = 1;
}
}
@@ -536,7 +537,6 @@
static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
- int i;
cpi->oxcf = *oxcf;
@@ -571,10 +571,6 @@
cpi->alt_fb_idx = 2;
set_tile_limits(cpi);
-
- cpi->fixed_divide[0] = 0;
- for (i = 1; i < 512; i++)
- cpi->fixed_divide[i] = 0x80000 / i;
}
static int get_pass(MODE mode) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 132b479..6b97370 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -443,7 +443,6 @@
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
- int fixed_divide[512];
#if CONFIG_INTERNAL_STATS
unsigned int mode_chosen_counts[MAX_MODES];
@@ -594,7 +593,8 @@
// alt ref frames tend to be coded at a higher than ambient quality
static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
+ vp9_is_upper_layer_key_frame(cpi);
}
static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index b408ced..1f995c9 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -604,7 +604,13 @@
}
// Do intra 16x16 prediction.
- this_error = vp9_encode_intra(x, use_dc_pred);
+ x->skip_encode = 0;
+ xd->mi[0]->mbmi.mode = DC_PRED;
+ xd->mi[0]->mbmi.tx_size = use_dc_pred ?
+ (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
+ vp9_encode_intra_block_plane(x, bsize, 0);
+ this_error = vp9_get_mb_ss(x->plane[0].src_diff);
+
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
this_error = (int)(this_error * error_weight);
@@ -1734,8 +1740,8 @@
twopass->gf_bits = gf_bits;
}
if (i == 1 ||
- (!rc->source_alt_ref_pending &&
- cpi->common.frame_type != KEY_FRAME)) {
+ (!rc->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME &&
+ !vp9_is_upper_layer_key_frame(cpi))) {
// Calculate the per frame bit target for this frame.
vp9_rc_set_frame_target(cpi, gf_bits);
}
@@ -2297,11 +2303,16 @@
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
// Don't place key frame in any enhancement layers in spatial svc
- if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
- cpi->svc.spatial_layer_id > 0) {
- cm->frame_type = INTER_FRAME;
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ lc->is_key_frame = 1;
+ if (cpi->svc.spatial_layer_id > 0) {
+ cm->frame_type = INTER_FRAME;
+ }
}
} else {
+ if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
+ lc->is_key_frame = 0;
+ }
cm->frame_type = INTER_FRAME;
}
@@ -2399,9 +2410,11 @@
cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0);
#ifdef LONG_TERM_VBR_CORRECTION
- if (cpi->common.frame_type != KEY_FRAME) {
+ if (cpi->common.frame_type != KEY_FRAME &&
+ !vp9_is_upper_layer_key_frame(cpi)) {
#else
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (cpi->common.frame_type == KEY_FRAME ||
+ vp9_is_upper_layer_key_frame(cpi)) {
// For key frames kf_group_bits already had the target bits subtracted out.
// So now update to the correct value based on the actual bits used.
cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3658ee6..24e75ae 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -809,7 +809,7 @@
int active_worst_quality = cpi->twopass.active_worst_quality;
int q;
- if (frame_is_intra_only(cm)) {
+ if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
#if !CONFIG_MULTIPLE_ARF
// Handle the special case for key frames forced when we have75 reached
// the maximum key frame interval. Here force the Q to a range
@@ -930,7 +930,8 @@
vp9_clear_system_state();
// Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+ if ((cm->frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi)) &&
+ !rc->this_key_frame_forced) {
qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
active_worst_quality, 2.0);
} else if (!rc->is_src_frame_alt_ref &&
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f309aac..a2fc1bb 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1806,7 +1806,7 @@
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
- int_mv *const new_mv = &mode_mv[NEWMV][0];
+ MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
@@ -1862,9 +1862,9 @@
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else if (cpi->sf.search_method == SQUARE) {
@@ -1872,9 +1872,9 @@
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else if (cpi->sf.search_method == BIGDIA) {
@@ -1882,16 +1882,16 @@
step_param,
sadpb, 1, v_fn_ptr, 1,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+ bestsme = vp9_get_mvpred_var(x, new_mv,
&bsi->ref_mv[0]->as_mv,
v_fn_ptr, 1);
} else {
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
&bsi->ref_mv[0]->as_mv,
- &new_mv->as_mv);
+ new_mv);
}
// Should we do a full search (best quality only)
@@ -1906,18 +1906,18 @@
&best_mv->as_mv);
if (thissme < bestsme) {
bestsme = thissme;
- new_mv->as_int = best_mv->as_int;
+ *new_mv = best_mv->as_mv;
} else {
// The full search result is actually worse so re-instate the
// previous best vector
- best_mv->as_int = new_mv->as_int;
+ best_mv->as_mv = *new_mv;
}
}
if (bestsme < INT_MAX) {
int distortion;
cpi->find_fractional_mv_step(x,
- &new_mv->as_mv,
+ new_mv,
&bsi->ref_mv[0]->as_mv,
cm->allow_high_precision_mv,
x->errorperbit, v_fn_ptr,
@@ -1928,11 +1928,11 @@
&x->pred_sse[mbmi->ref_frame[0]]);
// save motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
}
if (cpi->sf.adaptive_motion_search)
- x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
+ x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 2379f35..792e8d2 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -220,3 +220,10 @@
: &svc->layer_context[svc->spatial_layer_id];
++lc->current_video_frame_in_layer;
}
+
+int vp9_is_upper_layer_key_frame(const VP9_COMP *cpi) {
+ return cpi->use_svc &&
+ cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id > 0 &&
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
+}
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 2abed30..74d9c1c 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -30,6 +30,7 @@
struct twopass_rc twopass;
struct vpx_fixed_buf rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
+ int is_key_frame;
} LAYER_CONTEXT;
typedef struct {
@@ -73,6 +74,9 @@
// Increment number of video frames in layer
void vp9_inc_frame_in_layer(SVC *svc);
+// Check if current layer is key frame in spatial upper layer
+int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index ca93391..a176bbf 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -27,6 +27,8 @@
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
+static int fixed_divide[512];
+
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
@@ -78,6 +80,14 @@
kernel, mv_precision_uv, x, y);
}
+void vp9_temporal_filter_init() {
+ int i;
+
+ fixed_divide[0] = 0;
+ for (i = 1; i < 512; ++i)
+ fixed_divide[i] = 0x80000 / i;
+}
+
void vp9_temporal_filter_apply_c(uint8_t *frame1,
unsigned int stride,
uint8_t *frame2,
@@ -294,7 +304,7 @@
for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
- pval *= cpi->fixed_divide[count[k]];
+ pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
@@ -315,13 +325,13 @@
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
- pval *= cpi->fixed_divide[count[k]];
+ pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// V
pval = accumulator[m] + (count[m] >> 1);
- pval *= cpi->fixed_divide[count[m]];
+ pval *= fixed_divide[count[m]];
pval >>= 19;
dst2[byte] = (uint8_t)pval;
diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h
index 3028d78..9453dc1 100644
--- a/vp9/encoder/vp9_temporal_filter.h
+++ b/vp9/encoder/vp9_temporal_filter.h
@@ -15,6 +15,7 @@
extern "C" {
#endif
+void vp9_temporal_filter_init();
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance);
void vp9_configure_arnr_filter(VP9_COMP *cpi,
const unsigned int frames_to_arnr,
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 152c3d9..c47fe13 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -69,13 +69,6 @@
unsigned int *sse,
const uint8_t *second_pred);
-typedef unsigned int (*vp9_getmbss_fn_t)(const short *);
-
-typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int ref_stride);
-
typedef struct vp9_variance_vtable {
vp9_sad_fn_t sdf;
vp9_sad_avg_fn_t sdaf;
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3.asm
new file mode 100644
index 0000000..1400071
--- /dev/null
+++ b/vp9/encoder/x86/vp9_dct_ssse3.asm
@@ -0,0 +1,174 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the forward transformation. Part
+; of the macro definitions are originally derived from ffmpeg project.
+; The current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192: times 4 dd 8192
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2
+pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1
+%endmacro
+
+TRANSFORM_COEFFS 15137, 6270
+TRANSFORM_COEFFS 16069, 3196
+TRANSFORM_COEFFS 9102, 13623
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+ psubw m%3, m%1, m%2
+ paddw m%1, m%2
+ SWAP %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+ pmaddwd m%1, m%3, %5
+ pmaddwd m%2, m%3, %6
+ paddd m%1, %4
+ paddd m%2, %4
+ psrad m%1, 14
+ psrad m%2, 14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+ punpckhwd m%6, m%2, m%1
+ MUL_ADD_2X %7, %6, %6, %5, [pw_%4_%3], [pw_%3_m%4]
+ punpcklwd m%2, m%1
+ MUL_ADD_2X %1, %2, %2, %5, [pw_%4_%3], [pw_%3_m%4]
+ packssdw m%1, m%7
+ packssdw m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+ punpckh%1 m%4, m%2, m%3
+ punpckl%1 m%2, m%3
+ SWAP %3, %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+ INTERLEAVE_2X wd, %1, %2, %9
+ INTERLEAVE_2X wd, %3, %4, %9
+ INTERLEAVE_2X wd, %5, %6, %9
+ INTERLEAVE_2X wd, %7, %8, %9
+
+ INTERLEAVE_2X dq, %1, %3, %9
+ INTERLEAVE_2X dq, %2, %4, %9
+ INTERLEAVE_2X dq, %5, %7, %9
+ INTERLEAVE_2X dq, %6, %8, %9
+
+ INTERLEAVE_2X qdq, %1, %5, %9
+ INTERLEAVE_2X qdq, %3, %7, %9
+ INTERLEAVE_2X qdq, %2, %6, %9
+ INTERLEAVE_2X qdq, %4, %8, %9
+
+ SWAP %2, %5
+ SWAP %4, %7
+%endmacro
+
+; 1D forward 8x8 DCT transform
+%macro FDCT8_1D 0
+ SUM_SUB 0, 7, 9
+ SUM_SUB 1, 6, 9
+ SUM_SUB 2, 5, 9
+ SUM_SUB 3, 4, 9
+
+ SUM_SUB 0, 3, 9
+ SUM_SUB 1, 2, 9
+ SUM_SUB 6, 5, 9
+ SUM_SUB 0, 1, 9
+
+ BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10
+
+ pmulhrsw m6, m12
+ pmulhrsw m5, m12
+ pmulhrsw m0, m12
+ pmulhrsw m1, m12
+
+ SUM_SUB 4, 5, 9
+ SUM_SUB 7, 6, 9
+ BUTTERFLY_4X 4, 7, 3196, 16069, m8, 9, 10
+ BUTTERFLY_4X 5, 6, 13623, 9102, m8, 9, 10
+ SWAP 1, 4
+ SWAP 3, 6
+%endmacro
+
+%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2
+ psraw m%3, m%1, 15
+ psraw m%4, m%2, 15
+ psubw m%1, m%3
+ psubw m%2, m%4
+ psraw m%1, 1
+ psraw m%2, 1
+%endmacro
+
+INIT_XMM ssse3
+cglobal fdct8x8, 3, 5, 13, input, output, stride
+
+ mova m8, [pd_8192]
+ mova m12, [pw_11585x2]
+ pxor m11, m11
+
+ lea r3, [2 * strideq]
+ lea r4, [4 * strideq]
+ mova m0, [inputq]
+ mova m1, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m2, [inputq]
+ mova m3, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m4, [inputq]
+ mova m5, [inputq + r3]
+ lea inputq, [inputq + r4]
+ mova m6, [inputq]
+ mova m7, [inputq + r3]
+
+ ; left shift by 2 to increase forward transformation precision
+ psllw m0, 2
+ psllw m1, 2
+ psllw m2, 2
+ psllw m3, 2
+ psllw m4, 2
+ psllw m5, 2
+ psllw m6, 2
+ psllw m7, 2
+
+ ; column transform
+ FDCT8_1D
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ FDCT8_1D
+ TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+ DIVIDE_ROUND_2X 0, 1, 9, 10
+ DIVIDE_ROUND_2X 2, 3, 9, 10
+ DIVIDE_ROUND_2X 4, 5, 9, 10
+ DIVIDE_ROUND_2X 6, 7, 9, 10
+
+ mova [outputq + 0], m0
+ mova [outputq + 16], m1
+ mova [outputq + 32], m2
+ mova [outputq + 48], m3
+ mova [outputq + 64], m4
+ mova [outputq + 80], m5
+ mova [outputq + 96], m6
+ mova [outputq + 112], m7
+
+ RET
+%endif
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
deleted file mode 100644
index 2ecc23e..0000000
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_sse2.asm
+++ /dev/null
@@ -1,337 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;void vp9_half_horiz_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movdqu xmm5, XMMWORD PTR [rsi]
- movdqu xmm3, XMMWORD PTR [rsi+1]
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
- lea rsi, [rsi + rax]
-
-.half_horiz_vert_variance16x_h_1:
- movdqu xmm1, XMMWORD PTR [rsi] ;
- movdqu xmm2, XMMWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
-
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm4, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- psubw xmm5, xmm3 ; xmm5 -= xmm3
-
- movq xmm3, QWORD PTR [rdi+8]
- punpcklbw xmm3, xmm0
- psubw xmm4, xmm3
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance16x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr
-
- mov rdi, arg(2) ;src_ptr
- movsxd rcx, dword ptr arg(4) ;Height
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- movdqu xmm5, XMMWORD PTR [rsi]
- lea rsi, [rsi + rax ]
- pxor xmm0, xmm0
-
-.half_vert_variance16x_h_1:
- movdqu xmm3, XMMWORD PTR [rsi]
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm4, xmm5
- punpcklbw xmm5, xmm0
- punpckhbw xmm4, xmm0
-
- movq xmm2, QWORD PTR [rdi]
- punpcklbw xmm2, xmm0
- psubw xmm5, xmm2
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
- psubw xmm4, xmm2
-
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm4
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm4, xmm4
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm4
-
- movdqa xmm5, xmm3
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1
- jnz .half_vert_variance16x_h_1
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_horiz_variance16x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance16x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance16x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
-.half_horiz_variance16x_h_1:
- movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
- movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- movdqa xmm1, xmm5
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
- punpckhbw xmm1, xmm0
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
- movq xmm2, QWORD PTR [rdi+8]
- punpcklbw xmm2, xmm0
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- psubw xmm1, xmm2
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- paddw xmm6, xmm1
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- pmaddwd xmm1, xmm1
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
- paddd xmm7, xmm1
-
- lea rsi, [rsi + rax]
- lea rdi, [rdi + rdx]
-
- sub rcx, 1 ;
- jnz .half_horiz_variance16x_h_1 ;
-
- pxor xmm1, xmm1
- pxor xmm5, xmm5
-
- punpcklwd xmm0, xmm6
- punpckhwd xmm1, xmm6
- psrad xmm0, 16
- psrad xmm1, 16
- paddd xmm0, xmm1
- movdqa xmm1, xmm0
-
- movdqa xmm6, xmm7
- punpckldq xmm6, xmm5
- punpckhdq xmm7, xmm5
- paddd xmm6, xmm7
-
- punpckldq xmm0, xmm5
- punpckhdq xmm1, xmm5
- paddd xmm0, xmm1
-
- movdqa xmm7, xmm6
- movdqa xmm1, xmm0
-
- psrldq xmm7, 8
- psrldq xmm1, 8
-
- paddd xmm6, xmm7
- paddd xmm0, xmm1
-
- mov rsi, arg(5) ;[Sum]
- mov rdi, arg(6) ;[SSE]
-
- movd [rsi], xmm0
- movd [rdi], xmm6
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_impl_sse2.asm b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
index 2c50881..4830412 100644
--- a/vp9/encoder/x86/vp9_variance_impl_sse2.asm
+++ b/vp9/encoder/x86/vp9_variance_impl_sse2.asm
@@ -398,337 +398,4 @@
pop rbp
ret
-;void vp9_half_horiz_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source
-%else
- add rsi, r8
-%endif
-
-.half_horiz_vert_variance8x_h_1:
-
- movq xmm1, QWORD PTR [rsi] ;
- movq xmm2, QWORD PTR [rsi+1] ;
- pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
-
- pavgb xmm5, xmm1 ; xmm = vertical average of the above
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
- movdqa xmm5, xmm1 ; save xmm1 for use on the next row
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
-
- sub rcx, 1 ;
- jnz .half_horiz_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp9_half_vert_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE
-sym(vp9_half_vert_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
- movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
-
- pxor xmm0, xmm0 ;
-.half_vert_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
-
- sub rcx, 1 ;
- jnz .half_vert_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;void vp9_half_horiz_variance8x_h_sse2
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE
-sym(vp9_half_horiz_variance8x_h_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 7
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
-%if ABI_IS_32BIT=0
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
-%endif
-
- pxor xmm6, xmm6 ; error accumulator
- pxor xmm7, xmm7 ; sse eaccumulator
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor xmm0, xmm0 ;
-.half_horiz_variance8x_h_1:
- movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
- movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
-
- pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
- punpcklbw xmm5, xmm0 ; xmm5 = words of above
-
- movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
- punpcklbw xmm3, xmm0 ; xmm3 = words of above
-
- psubw xmm5, xmm3 ; xmm5 -= xmm3
- paddw xmm6, xmm5 ; xmm6 += accumulated column differences
- pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
- paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
-
-%if ABI_IS_32BIT
- add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
- add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
-%else
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .half_horiz_variance8x_h_1 ;
-
- movdq2q mm6, xmm6 ;
- movdq2q mm7, xmm7 ;
-
- psrldq xmm6, 8
- psrldq xmm7, 8
-
- movdq2q mm2, xmm6
- movdq2q mm3, xmm7
-
- paddw mm6, mm2
- paddd mm7, mm3
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rsi, arg(5) ; sum
- mov rdi, arg(6) ; sumsquared
-
- movd [rsi], mm2 ;
- movd [rdi], mm4 ;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index 25d5946..41f2259 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -42,66 +42,6 @@
unsigned int *SSE,
int *Sum
);
-void vp9_half_horiz_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_horiz_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance8x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
-void vp9_half_vert_variance16x_h_sse2
-(
- const unsigned char *ref_ptr,
- int ref_pixels_per_line,
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- unsigned int Height,
- int *sum,
- unsigned int *sumsquared
-);
typedef unsigned int (*get_var_sse2) (
const unsigned char *src_ptr,
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index 58256b2..d60883c 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -16,9 +16,11 @@
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
- int bps = 12;
- if (yv12->uv_height == yv12->y_height) {
- if (yv12->uv_width == yv12->y_width) {
+ const int ss_x = yv12->uv_crop_width < yv12->y_crop_width;
+ const int ss_y = yv12->uv_crop_height < yv12->y_crop_height;
+ int bps;
+ if (!ss_y) {
+ if (!ss_x) {
img->fmt = VPX_IMG_FMT_I444;
bps = 24;
} else {
@@ -27,13 +29,14 @@
}
} else {
img->fmt = VPX_IMG_FMT_I420;
+ bps = 12;
}
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
img->d_h = yv12->y_crop_height;
- img->x_chroma_shift = yv12->uv_width < yv12->y_width;
- img->y_chroma_shift = yv12->uv_height < yv12->y_height;
+ img->x_chroma_shift = ss_x;
+ img->y_chroma_shift = ss_y;
img->planes[VPX_PLANE_Y] = yv12->y_buffer;
img->planes[VPX_PLANE_U] = yv12->u_buffer;
img->planes[VPX_PLANE_V] = yv12->v_buffer;
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index c444fe4..5e88793 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -96,7 +96,6 @@
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
@@ -112,6 +111,7 @@
ifeq ($(ARCH_X86_64),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.asm
endif
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 8d0f4ec..d45b003 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -34,7 +34,7 @@
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format */
#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U plane in memory */
#define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel component */
-
+#define VPX_IMG_FMT_HIGH 0x800 /**< Image uses 16bit framebuffer */
/*!\brief List of supported image formats */
typedef enum vpx_img_fmt {
@@ -58,7 +58,10 @@
VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,
VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5,
VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6,
- VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7
+ VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7,
+ VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGH,
+ VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGH,
+ VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGH
} vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT