Merge "Remove unnecessary border extension when frame size change."
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index d9dace6..e96bc4f 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -25,55 +25,65 @@
static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor,
size_t member_offset) {
int mi_row, mi_col;
- int mi_index = 0;
- // TODO(hkuang): Fix this debug function.
- MODE_INFO **mi = &cm->mi;
+ MODE_INFO *mi = cm->mi;
int rows = cm->mi_rows;
int cols = cm->mi_cols;
char prefix = descriptor[0];
log_frame_info(cm, descriptor, file);
- mi_index = 0;
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(file, "%c ", prefix);
for (mi_col = 0; mi_col < cols; mi_col++) {
fprintf(file, "%2d ",
- *((int*) ((char *) (&mi[mi_index]->mbmi) +
- member_offset)));
- mi_index++;
+ *((int*) ((char *) (&mi->src_mi->mbmi) +
+ member_offset)));
+ mi++;
}
fprintf(file, "\n");
- mi_index += 8;
+ mi += 8;
}
fprintf(file, "\n");
}
+
void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) {
int mi_row;
int mi_col;
- int mi_index = 0;
FILE *mvs = fopen(file, "a");
- // TODO(hkuang): Fix this debug function.
- MODE_INFO **mi = &cm->mi;
+ MODE_INFO *mi = cm->mi;
int rows = cm->mi_rows;
int cols = cm->mi_cols;
print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
- print_mi_data(cm, mvs, "Skips:", offsetof(MB_MODE_INFO, skip));
print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
+ // output skip infomation.
+ log_frame_info(cm, "Skips:", mvs);
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ fprintf(mvs, "S ");
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ fprintf(mvs, "%2d ", mi->src_mi->mbmi.skip);
+ mi++;
+ }
+ fprintf(mvs, "\n");
+ mi += 8;
+ }
+ fprintf(mvs, "\n");
+
+ // output motion vectors.
log_frame_info(cm, "Vectors ", mvs);
+ mi = cm->mi;
for (mi_row = 0; mi_row < rows; mi_row++) {
fprintf(mvs, "V ");
for (mi_col = 0; mi_col < cols; mi_col++) {
- fprintf(mvs, "%4d:%4d ", mi[mi_index]->mbmi.mv[0].as_mv.row,
- mi[mi_index]->mbmi.mv[0].as_mv.col);
- mi_index++;
+ fprintf(mvs, "%4d:%4d ", mi->src_mi->mbmi.mv[0].as_mv.row,
+ mi->src_mi->mbmi.mv[0].as_mv.col);
+ mi++;
}
fprintf(mvs, "\n");
- mi_index += 8;
+ mi += 8;
}
fprintf(mvs, "\n");
diff --git a/vp9/common/vp9_thread.h b/vp9/common/vp9_thread.h
index c24ef5f..12848fe 100644
--- a/vp9/common/vp9_thread.h
+++ b/vp9/common/vp9_thread.h
@@ -28,7 +28,7 @@
#if CONFIG_MULTITHREAD
-#if defined(_WIN32)
+#if defined(_WIN32) && !HAVE_PTHREAD_H
#include <errno.h> // NOLINT
#include <process.h> // NOLINT
#include <windows.h> // NOLINT
diff --git a/vp9/common/x86/vp9_high_intrapred_sse2.asm b/vp9/common/x86/vp9_high_intrapred_sse2.asm
index 721126c..b12d29c 100644
--- a/vp9/common/x86/vp9_high_intrapred_sse2.asm
+++ b/vp9/common/x86/vp9_high_intrapred_sse2.asm
@@ -345,7 +345,7 @@
%if ARCH_X86_64
INIT_XMM sse2
-cglobal highbd_tm_predictor_16x16, 5, 6, 8, dst, stride, above, left, bps, one
+cglobal highbd_tm_predictor_16x16, 5, 6, 9, dst, stride, above, left, bps, one
movd m2, [aboveq-2]
mova m0, [aboveq]
mova m1, [aboveq+16]
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 0885909..99bb930 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -471,6 +471,43 @@
return 0;
}
+
+void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ if (sf->partition_search_type != VAR_BASED_PARTITION) {
+ return;
+ } else {
+ VP9_COMMON *const cm = &cpi->common;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int use_4x4_partition = is_key_frame;
+ const int low_res = (cm->width <= 352 && cm->height <= 288);
+ const int threshold_multiplier = is_key_frame ? 80 : 4;
+ const int64_t threshold_base = (int64_t)(threshold_multiplier *
+ vp9_convert_qindex_to_q(q, cm->bit_depth));
+ cpi->vbp_threshold = threshold_base;
+ cpi->vbp_threshold_bsize_min = threshold_base << oxcf->speed;
+ cpi->vbp_threshold_bsize_max = threshold_base;
+
+ if (is_key_frame) {
+ cpi->vbp_threshold = threshold_base >> 2;
+ cpi->vbp_threshold_bsize_min = threshold_base << 2;
+ } else if (low_res) {
+ cpi->vbp_threshold_bsize_min = threshold_base << 3;
+ cpi->vbp_threshold_bsize_max = threshold_base >> 2;
+ }
+ // TODO(marpan): Allow 4x4 partitions for inter-frames.
+ // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
+ // If 4x4 partition is not used, then 8x8 partition will be selected
+ // if variance of 16x16 block is very high, so use larger threshold
+ // for 16x16 (threshold_bsize_min) in that case.
+ cpi->vbp_threshold_16x16 = (use_4x4_partition) ?
+ cpi->vbp_threshold : cpi->vbp_threshold_bsize_min;
+ cpi->vbp_bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16;
+ }
+}
+
+
// This function chooses partitioning based on the variance between source and
// reconstructed last, where variance is computed for downs-sampled inputs.
static void choose_partitioning(VP9_COMP *cpi,
@@ -479,7 +516,6 @@
int mi_row, int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
-
int i, j, k, m;
v64x64 vt;
v16x16 vt2[16];
@@ -489,34 +525,12 @@
int dp;
int pixels_wide = 64, pixels_high = 64;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+
// Always use 4x4 partition for key frame.
const int is_key_frame = (cm->frame_type == KEY_FRAME);
const int use_4x4_partition = is_key_frame;
+ const int low_res = (cm->width <= 352 && cm->height <= 288);
int variance4x4downsample[16];
- int low_res = (cm->width <= 352 && cm->height <= 288) ? 1 : 0;
- const int threshold_multiplier = is_key_frame ? 80 : 4;
- int64_t threshold_base;
- int64_t threshold;
- int64_t threshold_bsize_min;
- int64_t threshold_bsize_max;
-
- vp9_clear_system_state();
- threshold_base = (int64_t)(threshold_multiplier *
- vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
- threshold = threshold_base;
- threshold_bsize_min = threshold_base << cpi->oxcf.speed;
- threshold_bsize_max = threshold_base;
-
- // Modify thresholds for key frame and for low-resolutions (set lower
- // thresholds to favor split).
- if (is_key_frame) {
- threshold = threshold_base >> 2;
- threshold_bsize_min = threshold_base << 2;
- } else if (low_res) {
- threshold_bsize_min = threshold_base << 3;
- threshold_bsize_max = threshold_base >> 2;
- }
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
@@ -531,7 +545,8 @@
if (!is_key_frame) {
MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
unsigned int var = 0, sse;
- vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf);
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE;
mbmi->sb_type = BLOCK_64X64;
@@ -619,7 +634,7 @@
}
if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance >
- (threshold << 1))) {
+ (cpi->vbp_threshold << 1))) {
// Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1;
for (k = 0; k < 4; k++) {
@@ -680,30 +695,22 @@
}
fill_variance_tree(&vt, BLOCK_64X64);
-
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold.
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
!set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col,
- threshold_bsize_max, BLOCK_16X16)) {
+ cpi->vbp_threshold_bsize_max, BLOCK_16X16)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
const int i2 = i << 2;
if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32,
(mi_row + y32_idx), (mi_col + x32_idx),
- threshold, BLOCK_16X16)) {
+ cpi->vbp_threshold,
+ BLOCK_16X16)) {
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
- // TODO(marpan): Allow 4x4 partitions for inter-frames.
- // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
- // If 4x4 partition is not used, then 8x8 partition will be selected
- // if variance of 16x16 block is very high, so use larger threshold
- // for 16x16 (threshold_bsize_min) in that case.
- uint64_t threshold_16x16 = (use_4x4_partition) ? threshold :
- threshold_bsize_min;
- BLOCK_SIZE bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16;
// For inter frames: if variance4x4downsample[] == 1 for this 16x16
// block, then the variance is based on 4x4 down-sampling, so use vt2
// in set_vt_partioning(), otherwise use vt.
@@ -713,7 +720,8 @@
if (!set_vt_partitioning(cpi, xd, vtemp, BLOCK_16X16,
mi_row + y32_idx + y16_idx,
mi_col + x32_idx + x16_idx,
- threshold_16x16, bsize_min)) {
+ cpi->vbp_threshold_16x16,
+ cpi->vbp_bsize_min)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
@@ -722,7 +730,8 @@
BLOCK_8X8,
mi_row + y32_idx + y16_idx + y8_idx,
mi_col + x32_idx + x16_idx + x8_idx,
- threshold_bsize_min, BLOCK_8X8)) {
+ cpi->vbp_threshold_bsize_min,
+ BLOCK_8X8)) {
set_block_size(cpi, xd,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index 556f3a5..8d545b6 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -38,6 +38,8 @@
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col);
+void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 65b6605..e2ed95c 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -50,6 +50,7 @@
#include "vp9/encoder/vp9_temporal_filter.h"
#include "vp9/encoder/vp9_resize.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */
@@ -60,12 +61,14 @@
// mv. Choose a very high value for
// now so that HIGH_PRECISION is always
// chosen.
-
// #define OUTPUT_YUV_REC
#ifdef OUTPUT_YUV_DENOISED
FILE *yuv_denoised_file = NULL;
#endif
+#ifdef OUTPUT_YUV_SKINMAP
+FILE *yuv_skinmap_file = NULL;
+#endif
#ifdef OUTPUT_YUV_REC
FILE *yuv_rec_file;
#endif
@@ -1541,6 +1544,9 @@
yuv_denoised_file = fopen("denoised.yuv", "ab");
#endif
#endif
+#ifdef OUTPUT_YUV_SKINMAP
+ yuv_skinmap_file = fopen("skinmap.yuv", "ab");
+#endif
#ifdef OUTPUT_YUV_REC
yuv_rec_file = fopen("rec.yuv", "wb");
#endif
@@ -1829,6 +1835,9 @@
fclose(yuv_denoised_file);
#endif
#endif
+#ifdef OUTPUT_YUV_SKINMAP
+ fclose(yuv_skinmap_file);
+#endif
#ifdef OUTPUT_YUV_REC
fclose(yuv_rec_file);
#endif
@@ -2116,8 +2125,7 @@
return 0;
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-#if defined(OUTPUT_YUV_DENOISED)
+#if defined(OUTPUT_YUV_DENOISED) || defined(OUTPUT_YUV_SKINMAP)
// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
// not denoise the UV channels at this time. If ever we implement UV channel
@@ -2148,7 +2156,6 @@
} while (--h);
}
#endif
-#endif
#ifdef OUTPUT_YUV_REC
void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
@@ -2800,7 +2807,10 @@
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
vp9_set_quantizer(cm, q);
+ vp9_set_vbp_thresholds(cpi, q);
+
setup_frame(cpi);
+
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -3296,7 +3306,11 @@
}
#endif
#endif
-
+#ifdef OUTPUT_YUV_SKINMAP
+ if (cpi->common.current_video_frame > 1) {
+ vp9_compute_skin_map(cpi, yuv_skinmap_file);
+ }
+#endif
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 1e4c982..35c5a48 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -450,6 +450,13 @@
int resize_pending;
+ // VAR_BASED_PARTITION thresholds
+ int64_t vbp_threshold;
+ int64_t vbp_threshold_bsize_min;
+ int64_t vbp_threshold_bsize_max;
+ int64_t vbp_threshold_16x16;
+ BLOCK_SIZE vbp_bsize_min;
+
// Multi-threading
int num_workers;
VP9Worker *workers;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 70e90b6..071747e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -577,6 +577,9 @@
ZEROMV, NEARESTMV, NEARMV, NEWMV,
};
+static const int ref_frame_cost[MAX_REF_FRAMES] = {
+ 1235, 229, 530, 615,
+};
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -807,9 +810,8 @@
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
&pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
- cost = RDCOST(x->rdmult, x->rddiv,
- vp9_get_switchable_rate(cpi, xd) + pf_rate[filter],
- pf_dist[filter]);
+ pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
+ cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
pf_tx_size[filter] = mbmi->tx_size;
if (cost < best_cost) {
best_filter = filter;
@@ -846,6 +848,8 @@
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y);
+ this_rdc.rate += cm->interp_filter == SWITCHABLE ?
+ vp9_get_switchable_rate(cpi, xd) : 0;
}
// chroma component rate-distortion cost modeling
@@ -865,6 +869,7 @@
this_rdc.rate += rate_mv;
this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
+ this_rdc.rate += ref_frame_cost[ref_frame];
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
@@ -971,6 +976,7 @@
this_rdc.rate = args.rate;
this_rdc.dist = args.dist;
this_rdc.rate += cpi->mbmode_cost[this_mode];
+ this_rdc.rate += ref_frame_cost[INTRA_FRAME];
this_rdc.rate += intra_cost_penalty;
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
@@ -1124,8 +1130,6 @@
int64_t b_best_rd = INT64_MAX;
const int i = idy * 2 + idx;
PREDICTION_MODE this_mode;
- int b_rate = 0;
- int64_t b_dist = 0;
RD_COST this_rdc;
unsigned int var_y, sse_y;
@@ -1153,6 +1157,7 @@
&b_mv[NEARMV]);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
+ int b_rate = 0;
xd->mi[0].bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
if (this_mode == NEWMV) {
@@ -1214,6 +1219,9 @@
&x->pred_sse[ref_frame], NULL, 0, 0);
xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv;
+ } else {
+ b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ [INTER_OFFSET(this_mode)];
}
vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
@@ -1230,7 +1238,6 @@
&var_y, &sse_y);
this_rdc.rate += b_rate;
- this_rdc.dist += b_dist;
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < b_best_rd) {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 8d316d6..6975137 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1173,8 +1173,8 @@
// Modify frame size target when down-scaling.
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
rc->frame_size_selector != UNSCALED)
- rc->this_frame_target =
- rc->this_frame_target * rate_thresh_mult[rc->frame_size_selector];
+ rc->this_frame_target = (int)(rc->this_frame_target
+ * rate_thresh_mult[rc->frame_size_selector]);
// Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 20ca4ca..0b6d11e 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -279,9 +279,11 @@
set_block_thresholds(cm, rd);
- if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
+ if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
fill_token_costs(x->token_costs, cm->fc->coef_probs);
+ if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
+ cm->frame_type == KEY_FRAME) {
for (i = 0; i < PARTITION_CONTEXTS; ++i)
vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
vp9_partition_tree);
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
new file mode 100644
index 0000000..08ebcf8
--- /dev/null
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "vp9/common/vp9_blockd.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_skin_detection.h"
+
+// Fixed-point skin color model parameters.
+static const int skin_mean[2] = {7463, 9614}; // q6
+static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
+static const int skin_threshold = 1570636; // q18
+
+// Thresholds on luminance.
+static const int y_low = 20;
+static const int y_high = 220;
+
+// Evaluates the Mahalanobis distance measure for the input CbCr values.
+static int evaluate_skin_color_difference(int cb, int cr) {
+ const int cb_q6 = cb << 6;
+ const int cr_q6 = cr << 6;
+ const int cb_diff_q12 = (cb_q6 - skin_mean[0]) * (cb_q6 - skin_mean[0]);
+ const int cbcr_diff_q12 = (cb_q6 - skin_mean[0]) * (cr_q6 - skin_mean[1]);
+ const int cr_diff_q12 = (cr_q6 - skin_mean[1]) * (cr_q6 - skin_mean[1]);
+ const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10;
+ const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10;
+ const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10;
+ const int skin_diff = skin_inv_cov[0] * cb_diff_q2 +
+ skin_inv_cov[1] * cbcr_diff_q2 +
+ skin_inv_cov[2] * cbcr_diff_q2 +
+ skin_inv_cov[3] * cr_diff_q2;
+ return skin_diff;
+}
+
+int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr) {
+ if (y < y_low || y > y_high)
+ return 0;
+ else
+ return (evaluate_skin_color_difference(cb, cr) < skin_threshold);
+}
+
+#ifdef OUTPUT_YUV_SKINMAP
+// For viewing skin map on input source.
+void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
+ int i, j, mi_row, mi_col;
+ VP9_COMMON *const cm = &cpi->common;
+ uint8_t *y;
+ const uint8_t *src_y = cpi->Source->y_buffer;
+ const uint8_t *src_u = cpi->Source->u_buffer;
+ const uint8_t *src_v = cpi->Source->v_buffer;
+ const int src_ystride = cpi->Source->y_stride;
+ const int src_uvstride = cpi->Source->uv_stride;
+ YV12_BUFFER_CONFIG skinmap;
+ vpx_memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG));
+ if (vp9_alloc_frame_buffer(&skinmap, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment)) {
+ vp9_free_frame_buffer(&skinmap);
+ return;
+ }
+ vpx_memset(skinmap.buffer_alloc, 128, skinmap.frame_size);
+ y = skinmap.y_buffer;
+ // Loop through 8x8 blocks and set skin map based on center pixel of block.
+ // Set y to white for skin block, otherwise set to source with gray scale.
+ // Ignore rightmost/bottom boundary blocks.
+ for (mi_row = 0; mi_row < cm->mi_rows - 1; ++mi_row) {
+ for (mi_col = 0; mi_col < cm->mi_cols - 1; ++mi_col) {
+ // Use middle pixel for each 8x8 block for skin detection.
+ // If middle pixel is skin, assign whole 8x8 block to skin.
+ const uint8_t ysource = src_y[4 * src_ystride + 4];
+ const uint8_t usource = src_u[2 * src_uvstride + 2];
+ const uint8_t vsource = src_v[2 * src_uvstride + 2];
+ const int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < 8; j++) {
+ if (is_skin)
+ y[i * src_ystride + j] = 255;
+ else
+ y[i * src_ystride + j] = src_y[i * src_ystride + j];
+ }
+ }
+ y += 8;
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ }
+ y += (src_ystride << 3) - ((cm->mi_cols - 1) << 3);
+ src_y += (src_ystride << 3) - ((cm->mi_cols - 1) << 3);
+ src_u += (src_uvstride << 2) - ((cm->mi_cols - 1) << 2);
+ src_v += (src_uvstride << 2) - ((cm->mi_cols - 1) << 2);
+ }
+ vp9_write_yuv_frame_420(&skinmap, yuv_skinmap_file);
+ vp9_free_frame_buffer(&skinmap);
+}
+#endif
diff --git a/vp9/encoder/vp9_skin_detection.h b/vp9/encoder/vp9_skin_detection.h
new file mode 100644
index 0000000..3d4e737
--- /dev/null
+++ b/vp9/encoder/vp9_skin_detection.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_SKIN_MAP_H_
+#define VP9_ENCODER_VP9_SKIN_MAP_H_
+
+#include "vp9/common/vp9_blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP9_COMP;
+
+// #define OUTPUT_YUV_SKINMAP
+
+int vp9_skin_pixel(const uint8_t y, const uint8_t cb, const uint8_t cr);
+
+#ifdef OUTPUT_YUV_SKINMAP
+// For viewing skin map on input source.
+void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_SKIN_MAP_H_
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index eaa0acc..8722d9c 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -421,4 +421,3 @@
#endif
#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_
-
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index fbdd4ba..2b3f894 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -88,6 +88,8 @@
VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h
VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c
VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h
+VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c
+VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c
diff --git a/vpxenc.c b/vpxenc.c
index f42e02c..46af600 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -398,6 +398,22 @@
NULL, "frame-boost", 1,
"Enable frame periodic boost (0: off (default), 1: on)");
+static const struct arg_enum_list color_space_enum[] = {
+ { "unknown", VPX_CS_UNKNOWN },
+ { "bt601", VPX_CS_BT_601 },
+ { "bt709", VPX_CS_BT_709 },
+ { "smpte170", VPX_CS_SMPTE_170 },
+ { "smpte240", VPX_CS_SMPTE_240 },
+ { "bt2020", VPX_CS_BT_2020 },
+ { "reserved", VPX_CS_RESERVED },
+ { "sRGB", VPX_CS_SRGB },
+ { NULL, 0 }
+};
+
+static const arg_def_t input_color_space = ARG_DEF_ENUM(
+ NULL, "color-space", 1,
+ "The color space of input content:", color_space_enum);
+
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
{"8", VPX_BITS_8},
@@ -429,7 +445,7 @@
&tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct,
&gf_cbr_boost_pct, &lossless,
&frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
- &noise_sens, &tune_content,
+ &noise_sens, &tune_content, &input_color_space,
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
&bitdeptharg, &inbitdeptharg,
#endif