Merge "remove mmx variance functions"
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 3f0f74c..e6bd0d7 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -689,17 +689,6 @@
//------------------------------------------------------------------------------
// x86 functions
-#if HAVE_MMX
-const SadMxNParam mmx_tests[] = {
- make_tuple(16, 16, &vpx_sad16x16_mmx, -1),
- make_tuple(16, 8, &vpx_sad16x8_mmx, -1),
- make_tuple(8, 16, &vpx_sad8x16_mmx, -1),
- make_tuple(8, 8, &vpx_sad8x8_mmx, -1),
- make_tuple(4, 4, &vpx_sad4x4_mmx, -1),
-};
-INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
-#endif // HAVE_MMX
-
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 31a9390..e7fff82 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -1858,6 +1858,8 @@
const double dr =
(double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr) {
const double total_psnr =
@@ -1909,8 +1911,9 @@
SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
}
- fprintf(f, "%s\t Time\n", headings);
- fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
+ fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
+ fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,
+ total_encode_time, rate_err, fabs(rate_err));
}
fclose(f);
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index bc1ce00..7c5d3c0 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -45,7 +45,6 @@
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
-#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@@ -231,6 +230,13 @@
section->duration -= frame->duration;
}
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0 // 1920x1080
+static double get_linear_size_factor(const VP10_COMP *cpi) {
+ const double this_area = cpi->initial_width * cpi->initial_height;
+ return pow(this_area / BASE_SIZE, 0.5);
+}
+
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
@@ -1103,11 +1109,7 @@
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR 100.0
static int get_twopass_worst_quality(const VP10_COMP *cpi,
const double section_err,
double inactive_zone,
@@ -1126,12 +1128,22 @@
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+ double ediv_size_correction;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
BPER_MB_NORMBITS) / active_mbs;
-
int q;
+ // Larger image formats are expected to be a little harder to code
+ // relatively given the same prediction error score. This in part at
+ // least relates to the increased size and hence coding overheads of
+ // motion vectors. Some account of this is made through adjustment of
+ // the error divisor.
+ ediv_size_correction =
+ VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+ if (ediv_size_correction < 1.0)
+ ediv_size_correction = -(1.0 / ediv_size_correction);
+ ediv_size_correction *= 4.0;
+
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
diff --git a/vp10/encoder/mcomp.c b/vp10/encoder/mcomp.c
index 2c1c591..1ba2e2f 100644
--- a/vp10/encoder/mcomp.c
+++ b/vp10/encoder/mcomp.c
@@ -145,16 +145,6 @@
cfg->searches_per_step = 8;
}
-/*
- * To avoid the penalty for crossing cache-line read, preload the reference
- * area in a small buffer, which is aligned to make sure there won't be crossing
- * cache-line read while reading from this buffer. This reduced the cpu
- * cycles spent on reading ref data in sub-pixel filter functions.
- * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
- * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
- * could reduce the area.
- */
-
/* estimated cost of a motion vector (r,c) */
#define MVC(r, c) \
(mvcost ? \
@@ -790,7 +780,6 @@
}
#undef MVC
-#undef PRE
#undef CHECK_BETTER
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 88c191e..6617422 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2257,6 +2257,8 @@
double total_encode_time = (cpi->time_receive_data +
cpi->time_compress_data) / 1000.000;
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr)
{
@@ -2302,12 +2304,14 @@
cpi->summed_weights, 8.0);
fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
- "GLPsnrP\tVPXSSIM\t Time(us)\n");
+ "GLPsnrP\tVPXSSIM\t Time(us) Rc-Err "
+ "Abs Err\n");
fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%8.0f\n",
+ "%7.3f\t%8.0f %7.2f %7.2f\n",
dr, cpi->total / cpi->count, total_psnr,
cpi->totalp / cpi->count, total_psnr2,
- total_ssim, total_encode_time);
+ total_ssim, total_encode_time,
+ rate_err, fabs(rate_err));
}
}
diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h
index 76e7cd4..2aff132 100644
--- a/vp9/common/vp9_common.h
+++ b/vp9/common/vp9_common.h
@@ -67,6 +67,24 @@
#define VP9_FRAME_MARKER 0x2
+typedef enum {
+ LEVEL_UNKNOWN = 0,
+ LEVEL_1 = 10,
+ LEVEL_1_1 = 11,
+ LEVEL_2 = 20,
+ LEVEL_2_1 = 21,
+ LEVEL_3 = 30,
+ LEVEL_3_1 = 31,
+ LEVEL_4 = 40,
+ LEVEL_4_1 = 41,
+ LEVEL_5 = 50,
+ LEVEL_5_1 = 51,
+ LEVEL_5_2 = 52,
+ LEVEL_6 = 60,
+ LEVEL_6_1 = 61,
+ LEVEL_6_2 = 62,
+ LEVEL_NOT_CARE = 255,
+} VP9_LEVEL;
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 3fd935e..1df6f08 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -168,6 +168,8 @@
int allow_high_precision_mv;
+ int keep_level_stats;
+
// Flag signaling that the frame context should be reset to default values.
// 0 or 1 implies don't reset, 2 reset just the context specified in the
// frame header, 3 reset all contexts.
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index e96c96c..67069e7 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4035,7 +4035,6 @@
rdc->m_search_count = 0; // Count of motion search hits.
rdc->ex_search_count = 0; // Exhaustive mesh search hits.
-
xd->lossless = cm->base_qindex == 0 &&
cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 &&
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 4be043d..8201794 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -774,7 +774,6 @@
cpi->oxcf = *oxcf;
cpi->framerate = oxcf->init_framerate;
-
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -783,6 +782,9 @@
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
+ cpi->target_level = oxcf->target_level;
+ cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+
cm->width = oxcf->width;
cm->height = oxcf->height;
alloc_compressor_data(cpi);
@@ -1473,6 +1475,9 @@
cm->color_space = oxcf->color_space;
cm->color_range = oxcf->color_range;
+ cpi->target_level = oxcf->target_level;
+ cm->keep_level_stats = oxcf->target_level != LEVEL_NOT_CARE;
+
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
else
@@ -2021,6 +2026,8 @@
const double dr =
(double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr) {
const double total_psnr =
@@ -2072,8 +2079,9 @@
SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
}
- fprintf(f, "%s\t Time\n", headings);
- fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
+ fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
+ fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,
+ total_encode_time, rate_err, fabs(rate_err));
}
fclose(f);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index add4102..6be61ac 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -227,6 +227,8 @@
int max_threads;
+ int target_level;
+
vpx_fixed_buf_t two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
@@ -497,6 +499,8 @@
int use_skin_detection;
+ int target_level;
+
NOISE_ESTIMATE noise_estimate;
// Count on how many consecutive times a block uses small/zeromv for encoding.
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 14ddaa2..f456f37 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -45,7 +45,6 @@
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
-#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@@ -237,6 +236,13 @@
section->duration -= frame->duration;
}
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0 // 1920x1080
+static double get_linear_size_factor(const VP9_COMP *cpi) {
+ const double this_area = cpi->initial_width * cpi->initial_height;
+ return pow(this_area / BASE_SIZE, 0.5);
+}
+
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
@@ -1241,11 +1247,7 @@
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR 100.0
static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double section_err,
double inactive_zone,
@@ -1267,16 +1269,25 @@
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+ double ediv_size_correction;
const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
BPER_MB_NORMBITS) / active_mbs;
-
int q;
int is_svc_upper_layer = 0;
if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
is_svc_upper_layer = 1;
+ // Larger image formats are expected to be a little harder to code
+ // relatively given the same prediction error score. This in part at
+ // least relates to the increased size and hence coding overheads of
+ // motion vectors. Some account of this is made through adjustment of
+ // the error divisor.
+ ediv_size_correction =
+ VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+ if (ediv_size_correction < 1.0)
+ ediv_size_correction = -(1.0 / ediv_size_correction);
+ ediv_size_correction *= 4.0;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 4669145..f3ffe35 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -144,16 +144,6 @@
cfg->total_steps = ss_count / cfg->searches_per_step;
}
-/*
- * To avoid the penalty for crossing cache-line read, preload the reference
- * area in a small buffer, which is aligned to make sure there won't be crossing
- * cache-line read while reading from this buffer. This reduced the cpu
- * cycles spent on reading ref data in sub-pixel filter functions.
- * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
- * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
- * could reduce the area.
- */
-
/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
* from the same math as in mv_err_cost(). */
#define MVC(r, c) \
@@ -835,7 +825,6 @@
}
#undef MVC
-#undef PRE
#undef CHECK_BETTER
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 1e6c152..0ec93a9 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -953,11 +953,10 @@
FIXED_GF_INTERVAL], cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
- // Use the min of the average Q (with some increase) and
- // active_worst_quality as basis for active_best.
+ // Use the min of the average Q and active_worst_quality as basis for
+ // active_best.
if (cm->current_video_frame > 1) {
- q = VPXMIN(((17 * rc->avg_frame_qindex[INTER_FRAME]) >> 4),
- active_worst_quality);
+ q = VPXMIN(rc->avg_frame_qindex[INTER_FRAME], active_worst_quality);
active_best_quality = inter_minq[q];
} else {
active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 5921636..5e44ffd 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -40,6 +40,7 @@
unsigned int rc_max_inter_bitrate_pct;
unsigned int gf_cbr_boost_pct;
unsigned int lossless;
+ unsigned int target_level;
unsigned int frame_parallel_decoding_mode;
AQ_MODE aq_mode;
unsigned int frame_periodic_boost;
@@ -69,6 +70,7 @@
0, // rc_max_inter_bitrate_pct
0, // gf_cbr_boost_pct
0, // lossless
+ 255, // target_level
1, // frame_parallel_decoding_mode
NO_AQ, // aq_mode
0, // frame_periodic_delta_q
@@ -196,6 +198,17 @@
RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
+ {
+ unsigned int level = extra_cfg->target_level;
+ if (level != LEVEL_1 && level != LEVEL_1_1 && level != LEVEL_2 &&
+ level != LEVEL_2_1 && level != LEVEL_3 && level != LEVEL_3_1 &&
+ level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 &&
+ level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 &&
+ level != LEVEL_6_1 && level != LEVEL_6_2 &&
+ level != LEVEL_UNKNOWN && level != LEVEL_NOT_CARE)
+ ERROR("target_level is invalid");
+ }
+
if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS)
ERROR("ss_number_layers * ts_number_layers is out of range");
if (cfg->ts_number_layers > 1) {
@@ -509,6 +522,8 @@
oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)
cfg->temporal_layering_mode;
+ oxcf->target_level = extra_cfg->target_level;
+
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
#if CONFIG_SPATIAL_SVC
oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
@@ -535,6 +550,7 @@
/*
printf("Current VP9 Settings: \n");
printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
+ printf("target_level: %d\n", oxcf->target_level);
printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
printf("sharpness: %d\n", oxcf->sharpness);
printf("cpu_used: %d\n", oxcf->cpu_used);
@@ -784,6 +800,13 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.target_level = CAST(VP9E_SET_LEVEL_STATS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
vpx_codec_priv_enc_mr_cfg_t *data) {
vpx_codec_err_t res = VPX_CODEC_OK;
@@ -1516,6 +1539,7 @@
{VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval},
{VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config},
{VP9E_SET_RENDER_SIZE, ctrl_set_render_size},
+ {VP9E_SET_TARGET_LEVEL, ctrl_set_target_level},
// Getters
{VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index bd99c6d..109306f 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -554,6 +554,15 @@
* Supported in codecs: VP9
*/
VP9E_SET_RENDER_SIZE,
+
+ /*!\brief Codec control function to set target level.
+ *
+ * 255: off (default); 0: only keep level stats; 10: target for level 1.0;
+ * 11: target for level 1.1; ... 62: target for level 6.2
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_TARGET_LEVEL,
};
/*!\brief vpx 1-D scaling mode
@@ -809,6 +818,9 @@
VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
#define VPX_CTRL_VP9E_SET_RENDER_SIZE
+VPX_CTRL_USE_TYPE(VP9E_SET_LEVEL_STATS, unsigned int)
+#define VPX_CTRL_VP9E_SET_LEVEL_STATS
+
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index dd8c6e8..43802d7 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -285,7 +285,6 @@
DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c
DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c
-DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm
DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index ab5b687..aeadbaf 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -983,16 +983,16 @@
specialize qw/vpx_sad16x32 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 mmx media neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x16 media neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad16x8 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x16 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x16 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad8x8 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad8x8 neon msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x4 msa/, "$sse2_x86inc";
@@ -1001,7 +1001,7 @@
specialize qw/vpx_sad4x8 msa/, "$sse2_x86inc";
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad4x4 mmx neon msa/, "$sse2_x86inc";
+specialize qw/vpx_sad4x4 neon msa/, "$sse2_x86inc";
#
# Avg
diff --git a/vpx_dsp/x86/sad_mmx.asm b/vpx_dsp/x86/sad_mmx.asm
deleted file mode 100644
index 9968992..0000000
--- a/vpx_dsp/x86/sad_mmx.asm
+++ /dev/null
@@ -1,427 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-global sym(vpx_sad16x16_mmx) PRIVATE
-global sym(vpx_sad8x16_mmx) PRIVATE
-global sym(vpx_sad8x8_mmx) PRIVATE
-global sym(vpx_sad4x4_mmx) PRIVATE
-global sym(vpx_sad16x8_mmx) PRIVATE
-
-;unsigned int vpx_sad16x16_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad16x16_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x16x16sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm2, QWORD PTR [rsi+8]
-
- movq mm1, QWORD PTR [rdi]
- movq mm3, QWORD PTR [rdi+8]
-
- movq mm4, mm0
- movq mm5, mm2
-
- psubusb mm0, mm1
- psubusb mm1, mm4
-
- psubusb mm2, mm3
- psubusb mm3, mm5
-
- por mm0, mm1
- por mm2, mm3
-
- movq mm1, mm0
- movq mm3, mm2
-
- punpcklbw mm0, mm6
- punpcklbw mm2, mm6
-
- punpckhbw mm1, mm6
- punpckhbw mm3, mm6
-
- paddw mm0, mm2
- paddw mm1, mm3
-
-
- lea rsi, [rsi+rax]
- add rdi, rdx
-
- paddw mm7, mm0
- paddw mm7, mm1
-
- cmp rsi, rcx
- jne .x16x16sad_mmx_loop
-
-
- movq mm0, mm7
-
- punpcklwd mm0, mm6
- punpckhwd mm7, mm6
-
- paddw mm0, mm7
- movq mm7, mm0
-
-
- psrlq mm0, 32
- paddw mm7, mm0
-
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad8x16_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad8x16_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
-
- lea rcx, [rcx+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x8x16sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- punpcklbw mm0, mm6
-
- punpckhbw mm2, mm6
- lea rsi, [rsi+rax]
-
- add rdi, rdx
- paddw mm7, mm0
-
- paddw mm7, mm2
- cmp rsi, rcx
-
- jne .x8x16sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad8x8_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad8x8_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x8x8sad_mmx_loop:
-
- movq mm0, QWORD PTR [rsi]
- movq mm1, QWORD PTR [rdi]
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- punpcklbw mm0, mm6
-
- punpckhbw mm2, mm6
- paddw mm0, mm2
-
- lea rsi, [rsi+rax]
- add rdi, rdx
-
- paddw mm7, mm0
- cmp rsi, rcx
-
- jne .x8x8sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad4x4_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad4x4_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- movd mm0, DWORD PTR [rsi]
- movd mm1, DWORD PTR [rdi]
-
- movd mm2, DWORD PTR [rsi+rax]
- movd mm3, DWORD PTR [rdi+rdx]
-
- punpcklbw mm0, mm2
- punpcklbw mm1, mm3
-
- movq mm2, mm0
- psubusb mm0, mm1
-
- psubusb mm1, mm2
- por mm0, mm1
-
- movq mm2, mm0
- pxor mm3, mm3
-
- punpcklbw mm0, mm3
- punpckhbw mm2, mm3
-
- paddw mm0, mm2
-
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rdx*2]
-
- movd mm4, DWORD PTR [rsi]
- movd mm5, DWORD PTR [rdi]
-
- movd mm6, DWORD PTR [rsi+rax]
- movd mm7, DWORD PTR [rdi+rdx]
-
- punpcklbw mm4, mm6
- punpcklbw mm5, mm7
-
- movq mm6, mm4
- psubusb mm4, mm5
-
- psubusb mm5, mm6
- por mm4, mm5
-
- movq mm5, mm4
- punpcklbw mm4, mm3
-
- punpckhbw mm5, mm3
- paddw mm4, mm5
-
- paddw mm0, mm4
- movq mm1, mm0
-
- punpcklwd mm0, mm3
- punpckhwd mm1, mm3
-
- paddw mm0, mm1
- movq mm1, mm0
-
- psrlq mm0, 32
- paddw mm0, mm1
-
- movq rax, mm0
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vpx_sad16x8_mmx(
-; unsigned char *src_ptr,
-; int src_stride,
-; unsigned char *ref_ptr,
-; int ref_stride)
-sym(vpx_sad16x8_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;ref_ptr
-
- movsxd rax, dword ptr arg(1) ;src_stride
- movsxd rdx, dword ptr arg(3) ;ref_stride
-
- lea rcx, [rsi+rax*8]
- pxor mm7, mm7
-
- pxor mm6, mm6
-
-.x16x8sad_mmx_loop:
-
- movq mm0, [rsi]
- movq mm1, [rdi]
-
- movq mm2, [rsi+8]
- movq mm3, [rdi+8]
-
- movq mm4, mm0
- movq mm5, mm2
-
- psubusb mm0, mm1
- psubusb mm1, mm4
-
- psubusb mm2, mm3
- psubusb mm3, mm5
-
- por mm0, mm1
- por mm2, mm3
-
- movq mm1, mm0
- movq mm3, mm2
-
- punpcklbw mm0, mm6
- punpckhbw mm1, mm6
-
- punpcklbw mm2, mm6
- punpckhbw mm3, mm6
-
-
- paddw mm0, mm2
- paddw mm1, mm3
-
- paddw mm0, mm1
- lea rsi, [rsi+rax]
-
- add rdi, rdx
- paddw mm7, mm0
-
- cmp rsi, rcx
- jne .x16x8sad_mmx_loop
-
- movq mm0, mm7
- punpcklwd mm0, mm6
-
- punpckhwd mm7, mm6
- paddw mm0, mm7
-
- movq mm7, mm0
- psrlq mm0, 32
-
- paddw mm7, mm0
- movq rax, mm7
-
- pop rdi
- pop rsi
- mov rsp, rbp
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vpxenc.c b/vpxenc.c
index a6cbbc7..f2def54 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -444,6 +444,11 @@
static const arg_def_t tune_content = ARG_DEF_ENUM(
NULL, "tune-content", 1, "Tune content type", tune_content_enum);
+
+static const arg_def_t target_level = ARG_DEF(
+ NULL, "target-level", 1,
+ "Target level (255: off (default); 0: only keep level stats; 10: level 1.0;"
+ " 11: level 1.1; ... 62: level 6.2)");
#endif
#if CONFIG_VP9_ENCODER
@@ -454,7 +459,7 @@
&gf_cbr_boost_pct, &lossless,
&frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
&noise_sens, &tune_content, &input_color_space,
- &min_gf_interval, &max_gf_interval,
+ &min_gf_interval, &max_gf_interval, &target_level,
#if CONFIG_VP9_HIGHBITDEPTH
&bitdeptharg, &inbitdeptharg,
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -470,7 +475,7 @@
VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
- VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL,
+ VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, VP9E_SET_TARGET_LEVEL,
0
};
#endif