Merge "Resolve a couple of TODOs in firstpass.c"
diff --git a/test/minmax_test.cc b/test/minmax_test.cc
new file mode 100644
index 0000000..dbe4342
--- /dev/null
+++ b/test/minmax_test.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+#include "test/acm_random.h"
+#include "test/register_state_check.h"
+
+namespace {
+
+using ::libvpx_test::ACMRandom;
+
+typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min, int *max);
+
+class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
+ public:
+ virtual void SetUp() {
+ mm_func_ = GetParam();
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ }
+
+ protected:
+ MinMaxFunc mm_func_;
+ ACMRandom rnd_;
+};
+
+void reference_minmax(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min_ret, int *max_ret) {
+ int min = 255;
+ int max = 0;
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
+ if (min > diff) min = diff;
+ if (max < diff) max = diff;
+ }
+ }
+
+ *min_ret = min;
+ *max_ret = max;
+}
+
+TEST_P(MinMaxTest, MinValue) {
+ for (int i = 0; i < 64; i++) {
+ uint8_t a[64], b[64];
+ memset(a, 0, sizeof(a));
+ memset(b, 255, sizeof(b));
+ b[i] = i; // Set a minimum difference of i.
+
+ int min, max;
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(255, max);
+ EXPECT_EQ(i, min);
+ }
+}
+
+TEST_P(MinMaxTest, MaxValue) {
+ for (int i = 0; i < 64; i++) {
+ uint8_t a[64], b[64];
+ memset(a, 0, sizeof(a));
+ memset(b, 0, sizeof(b));
+ b[i] = i; // Set a maximum difference of i.
+
+ int min, max;
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(i, max);
+ EXPECT_EQ(0, min);
+ }
+}
+
+TEST_P(MinMaxTest, CompareReference) {
+ uint8_t a[64], b[64];
+ for (int j = 0; j < 64; j++) {
+ a[j] = rnd_.Rand8();
+ b[j] = rnd_.Rand8();
+ }
+
+ int min_ref, max_ref, min, max;
+ reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
+ EXPECT_EQ(max_ref, max);
+ EXPECT_EQ(min_ref, min);
+}
+
+TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
+ uint8_t a[8 * 64], b[8 * 64];
+ for (int i = 0; i < 8 * 64; i++) {
+ a[i] = rnd_.Rand8();
+ b[i] = rnd_.Rand8();
+ }
+ for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
+ for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
+ int min_ref, max_ref, min, max;
+ reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
+ ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
+ EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+ << " and b_stride = " << b_stride;;
+ EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+ << " and b_stride = " << b_stride;;
+ }
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
+ ::testing::Values(&vpx_minmax_8x8_sse2));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
+ ::testing::Values(&vpx_minmax_8x8_neon));
+#endif
+
+} // namespace
diff --git a/test/test.mk b/test/test.mk
index d28ab11..7c22ca5 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -144,6 +144,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2d98e77..4614625 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -1072,8 +1072,6 @@
}
break;
}
-
- vp9_adjust_mask(cm, mi_row, mi_col, lfm);
}
static void filter_selectively_vert(uint8_t *s, int pitch,
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 8b70062..42d456e 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -21,12 +21,6 @@
#include "vp9/encoder/vp9_denoiser.h"
#include "vp9/encoder/vp9_encoder.h"
-/* The VP9 denoiser is similar to that of the VP8 denoiser. While
- * choosing the motion vectors / reference frames, the denoiser is run, and if
- * it did not modify the signal to much, the denoised block is copied to the
- * signal.
- */
-
#ifdef OUTPUT_YUV_DENOISED
static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
#endif
@@ -49,16 +43,19 @@
}
static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
- return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 60 : 40);
+ return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40);
}
static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
int motion_magnitude) {
if (motion_magnitude >
noise_motion_thresh(bs, increase_denoising)) {
- return 0;
+ if (increase_denoising)
+ return (1 << num_pels_log2_lookup[bs]) << 2;
+ else
+ return 0;
} else {
- return (1 << num_pels_log2_lookup[bs]) * 20;
+ return (1 << num_pels_log2_lookup[bs]) << 4;
}
}
@@ -222,6 +219,7 @@
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME &&
+ ctx->newmv_sse != UINT_MAX &&
sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
mi->ref_frame[0] = ctx->best_reference_frame;
mi->mode = ctx->best_sse_inter_mode;
@@ -496,12 +494,12 @@
ctx->zeromv_sse = UINT_MAX;
ctx->newmv_sse = UINT_MAX;
ctx->zeromv_lastref_sse = UINT_MAX;
+ ctx->best_sse_mv.as_int = 0;
}
void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PREDICTION_MODE mode,
PICK_MODE_CONTEXT *ctx) {
- // TODO(tkopp): Use both MVs if possible
if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
ctx->zeromv_sse = sse;
ctx->best_zeromv_reference_frame = mi->ref_frame[0];
diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c
index a96b912..d4fee6f 100644
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -128,6 +128,14 @@
ne->last_h = cm->height;
}
return;
+ } else if (cpi->rc.avg_frame_low_motion < 50) {
+ // Force noise estimation to 0 and denoiser off if content has high motion.
+ ne->level = kLowLow;
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0)
+ vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+#endif
+ return;
} else {
int num_samples = 0;
uint64_t avg_est = 0;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3bd1753..d53e60a 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -338,6 +338,7 @@
rc->total_target_bits = 0;
rc->total_target_vs_actual = 0;
rc->avg_intersize_gfint = 0;
+ rc->avg_frame_low_motion = 0;
rc->frames_since_key = 8; // Sensible default for first frame.
rc->this_key_frame_forced = 0;
@@ -1334,6 +1335,26 @@
}
}
+static void compute_frame_low_motion(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int mi_row, mi_col;
+ MODE_INFO **mi = cm->mi_grid_visible;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int rows = cm->mi_rows, cols = cm->mi_cols;
+ int cnt_zeromv = 0;
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ if (abs(mi[0]->mv[0].as_mv.row) < 16 &&
+ abs(mi[0]->mv[0].as_mv.col) < 16)
+ cnt_zeromv++;
+ mi++;
+ }
+ mi += 8;
+ }
+ cnt_zeromv = 100 * cnt_zeromv / (rows * cols);
+ rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2;
+}
+
void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -1420,10 +1441,6 @@
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
- if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
- rc->avg_intersize_gfint += rc->projected_frame_size;
- }
-
if (!cpi->use_svc || is_two_pass_svc(cpi)) {
if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
(cm->frame_type != KEY_FRAME))
@@ -1447,6 +1464,13 @@
rc->next_frame_size_selector != rc->frame_size_selector;
rc->frame_size_selector = rc->next_frame_size_selector;
}
+
+ if (oxcf->pass == 0) {
+ if (cm->frame_type != KEY_FRAME)
+ compute_frame_low_motion(cpi);
+ if (!cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame)
+ rc->avg_intersize_gfint += rc->projected_frame_size;
+ }
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1507,6 +1531,7 @@
if (rc->frames_till_gf_update_due == 0) {
rc->avg_intersize_gfint =
rc->avg_intersize_gfint / (rc->baseline_gf_interval + 1);
+ rc->gfu_boost = DEFAULT_GF_BOOST;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) {
vp9_cyclic_refresh_set_golden_update(cpi);
} else {
@@ -1523,6 +1548,11 @@
rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 &&
rc->avg_intersize_gfint > (5 * rc->avg_frame_bandwidth) >> 1) {
rc->baseline_gf_interval = (3 * rc->baseline_gf_interval) >> 1;
+ } else if (cm->current_video_frame > 30 &&
+ rc->avg_frame_low_motion < 20) {
+ // Decrease boost and gf interval for high motion case.
+ rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
+ rc->baseline_gf_interval = VPXMIN(6, rc->baseline_gf_interval >> 1);
}
}
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -1535,7 +1565,6 @@
}
cpi->refresh_golden_frame = 1;
rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
- rc->gfu_boost = DEFAULT_GF_BOOST;
rc->avg_intersize_gfint = 0;
}
if (cm->frame_type == KEY_FRAME)
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 7b6f165..eef1940 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -163,6 +163,7 @@
int high_source_sad;
int count_last_scene_change;
int avg_intersize_gfint;
+ int avg_frame_low_motion;
} RATE_CONTROL;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_skin_detection.c b/vp9/encoder/vp9_skin_detection.c
index ff0dfce..268aa91 100644
--- a/vp9/encoder/vp9_skin_detection.c
+++ b/vp9/encoder/vp9_skin_detection.c
@@ -112,7 +112,6 @@
void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) {
int i, j, mi_row, mi_col, num_bl;
VP9_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
uint8_t *y;
const uint8_t *src_y = cpi->Source->y_buffer;
const uint8_t *src_u = cpi->Source->u_buffer;
@@ -166,19 +165,17 @@
} else {
int block_size = BLOCK_8X8;
int consec_zeromv = 0;
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- int bl_index = mi_row * cm->mi_cols + mi_col;
- int bl_index1 = bl_index + 1;
- int bl_index2 = bl_index + cm->mi_cols;
- int bl_index3 = bl_index2 + 1;
- if (y_bsize == 8)
- consec_zeromv = cr->consec_zero_mv[bl_index];
- else
- consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index],
- VPXMIN(cr->consec_zero_mv[bl_index1],
- VPXMIN(cr->consec_zero_mv[bl_index2],
- cr->consec_zero_mv[bl_index3])));
- }
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ if (y_bsize == 8)
+ consec_zeromv = cpi->consec_zero_mv[bl_index];
+ else
+ consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index],
+ VPXMIN(cpi->consec_zero_mv[bl_index1],
+ VPXMIN(cpi->consec_zero_mv[bl_index2],
+ cpi->consec_zero_mv[bl_index3])));
if (y_bsize == 16)
block_size = BLOCK_16X16;
is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride,
diff --git a/vpx_dsp/arm/avg_neon.c b/vpx_dsp/arm/avg_neon.c
index d054c41..e52958c 100644
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -197,3 +197,60 @@
return s - ((t * t) >> shift_factor);
}
}
+
+void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int *min, int *max) {
+ // Load and concatenate.
+ const uint8x16_t a01 = vcombine_u8(vld1_u8(a),
+ vld1_u8(a + a_stride));
+ const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride),
+ vld1_u8(a + 3 * a_stride));
+ const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride),
+ vld1_u8(a + 5 * a_stride));
+ const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride),
+ vld1_u8(a + 7 * a_stride));
+
+ const uint8x16_t b01 = vcombine_u8(vld1_u8(b),
+ vld1_u8(b + b_stride));
+ const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride),
+ vld1_u8(b + 3 * b_stride));
+ const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride),
+ vld1_u8(b + 5 * b_stride));
+ const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride),
+ vld1_u8(b + 7 * b_stride));
+
+ // Absolute difference.
+ const uint8x16_t ab01_diff = vabdq_u8(a01, b01);
+ const uint8x16_t ab23_diff = vabdq_u8(a23, b23);
+ const uint8x16_t ab45_diff = vabdq_u8(a45, b45);
+ const uint8x16_t ab67_diff = vabdq_u8(a67, b67);
+
+ // Max values between the Q vectors.
+ const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff);
+ const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff);
+ const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff);
+ const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff);
+
+ const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
+ const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
+
+ // Split to D and start doing pairwise.
+ uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max));
+ uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min));
+
+ // Enough runs of vpmax/min propogate the max/min values to every position.
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ ab_max = vpmax_u8(ab_max, ab_max);
+ ab_min = vpmin_u8(ab_min, ab_min);
+
+ *min = *max = 0; // Clear high bits
+ // Store directly to avoid costly neon->gpr transfer.
+ vst1_lane_u8((uint8_t *)max, ab_max, 0);
+ vst1_lane_u8((uint8_t *)min, ab_min, 0);
+}
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 2b13192..9ea80a0 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -1014,7 +1014,7 @@
specialize qw/vpx_avg_4x4 sse2 neon msa/;
add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
- specialize qw/vpx_minmax_8x8 sse2/;
+ specialize qw/vpx_minmax_8x8 sse2 neon/;
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";