Merge "vp9_denoiser_sse2.c: improve code style."
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 5377c1e..c7042fe 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -625,6 +625,20 @@
#if HAVE_AVX2
#if CONFIG_VP9_ENCODER
+const SadMxNVp9Func sad_64x64_avx2_vp9 = vp9_sad64x64_avx2;
+const SadMxNVp9Func sad_64x32_avx2_vp9 = vp9_sad64x32_avx2;
+const SadMxNVp9Func sad_32x64_avx2_vp9 = vp9_sad32x64_avx2;
+const SadMxNVp9Func sad_32x32_avx2_vp9 = vp9_sad32x32_avx2;
+const SadMxNVp9Func sad_32x16_avx2_vp9 = vp9_sad32x16_avx2;
+const SadMxNVp9Param avx2_vp9_tests[] = {
+ make_tuple(64, 64, sad_64x64_avx2_vp9),
+ make_tuple(64, 32, sad_64x32_avx2_vp9),
+ make_tuple(32, 64, sad_32x64_avx2_vp9),
+ make_tuple(32, 32, sad_32x32_avx2_vp9),
+ make_tuple(32, 16, sad_32x16_avx2_vp9),
+};
+INSTANTIATE_TEST_CASE_P(AVX2, SADVP9Test, ::testing::ValuesIn(avx2_vp9_tests));
+
const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
diff --git a/test/test-data.mk b/test/test-data.mk
index e4dae3a..e2da193 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -653,8 +653,30 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
+endif # CONFIG_VP9_HIGHBITDEPTH
# Invalid files for testing libvpx error checking.
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 69d1d2f..3d1cd65 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -670,8 +670,28 @@
5661b0168752969f055eec37b05fa9fa947dc7eb vp90-2-16-intra-only.webm.md5
c01bb7938f9a9f25e0c37afdec2f2fb73b6cc7fa vp90-2-17-show-existing-frame.webm
cc75f351818b9a619818f5cc77b9bc013d0c1e11 vp90-2-17-show-existing-frame.webm.md5
+013708bd043f0821a3e56fb8404d82e7a0c7af6c vp91-2-04-yuv422.webm
+1e58a7d23adad830a672f1733c9d2ae17890d59c vp91-2-04-yuv422.webm.md5
+25d78f28948789d159a9453ebc13048b818251b1 vp91-2-04-yuv440.webm
+81b3870b27a7f695ef6a43e87ab04bbdb5aee2f5 vp91-2-04-yuv440.webm.md5
0321d507ce62dedc8a51b4e9011f7a19aed9c3dc vp91-2-04-yuv444.webm
367e423dd41fdb49aa028574a2cfec5c2f325c5c vp91-2-04-yuv444.webm.md5
+f77673b566f686853adefe0c578ad251b7241281 vp92-2-20-10bit-yuv420.webm
+abdedfaddacbbe1a15ac7a54e86360f03629fb7a vp92-2-20-10bit-yuv420.webm.md5
+0c2c355a1b17b28537c5a3b19997c8783b69f1af vp92-2-20-12bit-yuv420.webm
+afb2c2798703e039189b0a15c8ac5685aa51d33f vp92-2-20-12bit-yuv420.webm.md5
+0d661bc6e83da33238981481efd1b1802d323d88 vp93-2-20-10bit-yuv422.webm
+10318907063db22eb02fad332556edbbecd443cc vp93-2-20-10bit-yuv422.webm.md5
+ebc6be2f7511a0bdeac0b18c67f84ba7168839c7 vp93-2-20-12bit-yuv422.webm
+235232267c6a1dc8a11e45d600f1c99d2f8b42d4 vp93-2-20-12bit-yuv422.webm.md5
+f76b11b26d4beaceac7a7e7729dd5054d095164f vp93-2-20-10bit-yuv440.webm
+757b33b5ac969c5999999488a731a3d1e6d9fb88 vp93-2-20-10bit-yuv440.webm.md5
+df8807dbd29bec795c2db9c3c18e511fbb988101 vp93-2-20-12bit-yuv440.webm
+ea4100930c3f59a1c23fbb33ab0ea01151cae159 vp93-2-20-12bit-yuv440.webm.md5
+189c1b5f404ff41a50a7fc96341085ad541314a9 vp93-2-20-10bit-yuv444.webm
+2dd0177c2f9d970b6e698892634c653630f91f40 vp93-2-20-10bit-yuv444.webm.md5
+bd44cf6e1c27343e3639df9ac21346aedd5d6973 vp93-2-20-12bit-yuv444.webm
+f36e5bdf5ec3213f32c0ddc82f95d82c5133bf27 vp93-2-20-12bit-yuv444.webm.md5
eb438c6540eb429f74404eedfa3228d409c57874 desktop_640_360_30.yuv
89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab kirland_640_480_30.yuv
33c533192759e5bb4f07abfbac389dc259db4686 macmarcomoving_640_480_30.yuv
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 7efa8c0..432522c 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -184,6 +184,13 @@
"vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
"vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm",
"vp91-2-04-yuv444.webm",
+ "vp91-2-04-yuv422.webm", "vp91-2-04-yuv440.webm",
+#if CONFIG_VP9_HIGHBITDEPTH
+ "vp92-2-20-10bit-yuv420.webm", "vp92-2-20-12bit-yuv420.webm",
+ "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
+ "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
+ "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
+#endif // CONFIG_VP9_HIGHBITDEPTH`
};
const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
#endif // CONFIG_VP9_DECODER
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 12f9734..75b2a3b 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -390,9 +390,9 @@
denoiser->denoise_pars.scale_motion_thresh = 16;
denoiser->denoise_pars.scale_increase_filter = 1;
denoiser->denoise_pars.denoise_mv_bias = 60;
- denoiser->denoise_pars.pickmode_mv_bias = 60;
- denoiser->denoise_pars.qp_thresh = 100;
- denoiser->denoise_pars.consec_zerolast = 10;
+ denoiser->denoise_pars.pickmode_mv_bias = 75;
+ denoiser->denoise_pars.qp_thresh = 85;
+ denoiser->denoise_pars.consec_zerolast = 15;
denoiser->denoise_pars.spatial_blur = 20;
}
}
@@ -453,17 +453,17 @@
// Bitrate thresholds and noise metric (nmse) thresholds for switching to
// aggressive mode.
// TODO(marpan): Adjust thresholds, including effect on resolution.
- denoiser->bitrate_threshold = 200000; // (bits/sec).
+ denoiser->bitrate_threshold = 300000; // (bits/sec).
denoiser->threshold_aggressive_mode = 35;
- if (width * height > 640 * 480) {
- denoiser->bitrate_threshold = 500000;
- denoiser->threshold_aggressive_mode = 100;
+ if (width * height > 1280 * 720) {
+ denoiser->bitrate_threshold = 2000000;
+ denoiser->threshold_aggressive_mode = 1400;
} else if (width * height > 960 * 540) {
denoiser->bitrate_threshold = 800000;
denoiser->threshold_aggressive_mode = 150;
- } else if (width * height > 1280 * 720) {
- denoiser->bitrate_threshold = 2000000;
- denoiser->threshold_aggressive_mode = 1400;
+ } else if (width * height > 640 * 480) {
+ denoiser->bitrate_threshold = 500000;
+ denoiser->threshold_aggressive_mode = 100;
}
return 0;
}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index d8eff66..010c01a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3334,11 +3334,6 @@
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
- const unsigned int mse = vp8_mse16x16(src + j,
- ystride,
- dst + j,
- ystride,
- &sse);
const unsigned int var = vp8_variance16x16(src + j,
ystride,
dst + j,
@@ -3347,14 +3342,15 @@
// Only consider this block as valid for noise measurement
// if the sum_diff average of the current and previous frame
// is small (to avoid effects from lighting change).
- if ((mse - var) < 256) {
+ if ((sse - var) < 256) {
+ unsigned int sse2;
const unsigned int act = vp8_variance16x16(src + j,
ystride,
const_source,
0,
- &sse);
+ &sse2);
if (act > 0)
- total += mse / act;
+ total += sse / act;
num_blocks++;
}
}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index de389e7..0530f3a 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -931,22 +931,22 @@
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x64 neon/, "$sse2_x86inc";
+specialize qw/vp9_sad64x64 neon avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x64/, "$sse2_x86inc";
+specialize qw/vp9_sad32x64 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad64x32/, "$sse2_x86inc";
+specialize qw/vp9_sad64x32 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x16/, "$sse2_x86inc";
+specialize qw/vp9_sad32x16 avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vp9_sad32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_sad32x32 neon avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vp9_sad16x16 neon/, "$sse2_x86inc";
@@ -970,22 +970,22 @@
specialize qw/vp9_sad4x4/, "$sse_x86inc";
add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad64x64_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x64_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad64x32_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x16_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";
+specialize qw/vp9_sad32x32_avg avx2/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";
@@ -1114,6 +1114,11 @@
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_avg_8x8 sse2/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
+ specialize qw/vp9_highbd_avg_8x8/;
+}
+
# ENCODEMB INVOKE
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 3c9469c..baf6ab7 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -232,6 +232,8 @@
cm->frame_refs[0].buf->corrupted = 1;
}
+ pbi->ready_for_new_data = 0;
+
// Check if the previous frame was a frame without any references to it.
if (cm->new_fb_idx >= 0 && cm->frame_bufs[cm->new_fb_idx].ref_count == 0)
cm->release_fb_cb(cm->cb_priv,
@@ -279,8 +281,6 @@
cm->current_video_frame++;
}
- pbi->ready_for_new_data = 0;
-
cm->error.setjmp = 0;
return retcode;
}
@@ -296,12 +296,12 @@
if (pbi->ready_for_new_data == 1)
return ret;
+ pbi->ready_for_new_data = 1;
+
/* no raw frame to show!!! */
if (!cm->show_frame)
return ret;
- pbi->ready_for_new_data = 1;
-
#if CONFIG_VP9_POSTPROC
if (!cm->show_existing_frame) {
ret = vp9_post_proc_frame(cm, sd, flags);
diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c
index 22c6cc4..e9810c8 100644
--- a/vp9/encoder/vp9_avg.c
+++ b/vp9/encoder/vp9_avg.c
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"
unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
@@ -17,3 +18,16 @@
return (sum + 32) >> 6;
}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
+ int i, j;
+ int sum = 0;
+ const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
+ for (i = 0; i < 8; ++i, s+=p)
+ for (j = 0; j < 8; sum += s[j], ++j) {}
+
+ return (sum + 32) >> 6;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 3954fe6..421e049 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -935,26 +935,27 @@
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+ TileInfo tile[4][1 << 6];
+ TOKENEXTRA *pre_tok = cpi->tok;
+ int tile_tok = 0;
vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
mi_cols_aligned_to_sb(cm->mi_cols));
- tok[0][0] = cpi->tok;
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- if (tile_row)
- tok[tile_row][0] = tok[tile_row - 1][tile_cols - 1] +
- cpi->tok_count[tile_row - 1][tile_cols - 1];
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
- for (tile_col = 1; tile_col < tile_cols; tile_col++)
- tok[tile_row][tile_col] = tok[tile_row][tile_col - 1] +
- cpi->tok_count[tile_row][tile_col - 1];
+ tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ }
}
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile;
+ const TileInfo * const ptile = &tile[tile_row][tile_col];
- vp9_tile_init(&tile, cm, tile_row, tile_col);
tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
@@ -962,7 +963,7 @@
else
vp9_start_encode(&residual_bc, data_ptr + total_size);
- write_modes(cpi, &tile, &residual_bc, &tok[tile_row][tile_col], tok_end);
+ write_modes(cpi, ptile, &residual_bc, &tok[tile_row][tile_col], tok_end);
assert(tok[tile_row][tile_col] == tok_end);
vp9_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h
index b488261..4f0e46f 100644
--- a/vp9/encoder/vp9_bitstream.h
+++ b/vp9/encoder/vp9_bitstream.h
@@ -29,7 +29,7 @@
(is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id == 0 &&
cpi->svc.layer_context[0].gold_ref_idx >=0 &&
- cpi->oxcf.ss_play_alternate[0]));
+ cpi->oxcf.ss_enable_auto_arf[0]));
}
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index 6b28ee5..47d9580 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -34,6 +34,7 @@
int is_coded;
int num_4x4_blk;
int skip;
+ int pred_pixel_ready;
// For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0.
int skippable;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index b4fe70a..a7370bf 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -533,8 +533,19 @@
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high) {
- int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
- int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+ int s_avg, d_avg;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp);
+ } else {
+ s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+ }
+#else
+ s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
+ d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
+#endif
sum = s_avg - d_avg;
sse = sum * sum;
}
@@ -739,8 +750,8 @@
x->e_mbd.plane[i].subsampling_y);
}
-static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate,
- int64_t *dist, BLOCK_SIZE bsize) {
+static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
+ RD_COST *rd_cost, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
INTERP_FILTER filter_ref;
@@ -766,8 +777,7 @@
xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = 0;
x->skip = 1;
- *rate = 0;
- *dist = 0;
+ vp9_rd_cost_init(rd_cost);
}
static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
@@ -802,6 +812,7 @@
}
ctx->is_coded = 0;
ctx->skippable = 0;
+ ctx->pred_pixel_ready = 0;
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
@@ -1150,79 +1161,6 @@
}
}
-static void copy_partitioning(VP9_COMMON *cm, MODE_INFO *mi_8x8,
- MODE_INFO *prev_mi_8x8) {
- const int mis = cm->mi_stride;
- int block_row, block_col;
-
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- MODE_INFO *const prev_mi =
- prev_mi_8x8[block_row * mis + block_col].src_mi;
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[block_row * mis + block_col].src_mi = cm->mi + offset;
- mi_8x8[block_row * mis + block_col].src_mi->mbmi.sb_type = sb_type;
- }
- }
- }
-}
-
-static void constrain_copy_partitioning(VP9_COMP *const cpi,
- const TileInfo *const tile,
- MODE_INFO *mi_8x8,
- MODE_INFO *prev_mi_8x8,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- VP9_COMMON *const cm = &cpi->common;
- const int mis = cm->mi_stride;
- const int row8x8_remaining = tile->mi_row_end - mi_row;
- const int col8x8_remaining = tile->mi_col_end - mi_col;
- MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col;
- const int bh = num_8x8_blocks_high_lookup[bsize];
- const int bw = num_8x8_blocks_wide_lookup[bsize];
- int block_row, block_col;
-
- assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
-
- // If the SB64 if it is all "in image".
- if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
- (row8x8_remaining >= MI_BLOCK_SIZE)) {
- for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
- for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
- const int index = block_row * mis + block_col;
- MODE_INFO *prev_mi = prev_mi_8x8[index].src_mi;
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
- // Use previous partition if block size is not larger than bsize.
- if (prev_mi && sb_type <= bsize) {
- int block_row2, block_col2;
- for (block_row2 = 0; block_row2 < bh; ++block_row2) {
- for (block_col2 = 0; block_col2 < bw; ++block_col2) {
- const int index2 = (block_row + block_row2) * mis +
- block_col + block_col2;
- prev_mi = prev_mi_8x8[index2].src_mi;
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[index2].src_mi = cm->mi + offset;
- mi_8x8[index2].src_mi->mbmi.sb_type = prev_mi->mbmi.sb_type;
- }
- }
- }
- } else {
- // Otherwise, use fixed partition of size bsize.
- mi_8x8[index].src_mi = mi_upper_left + index;
- mi_8x8[index].src_mi->mbmi.sb_type = bsize;
- }
- }
- }
- } else {
- // Else this is a partial SB64, copy previous partition.
- copy_partitioning(cm, mi_8x8, prev_mi_8x8);
- }
-}
-
const struct {
int row;
int col;
@@ -1353,27 +1291,6 @@
return this_sad < 2 * threshold;
}
-static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO *prev_mi_8x8,
- const int motion_thresh) {
- const int mis = cm->mi_stride;
- int block_row, block_col;
-
- if (cm->prev_mi) {
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- const MODE_INFO *prev_mi =
- prev_mi_8x8[block_row * mis + block_col].src_mi;
- if (prev_mi) {
- if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh ||
- abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh)
- return 1;
- }
- }
- }
- }
- return 0;
-}
-
static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, int bsize) {
VP9_COMMON *const cm = &cpi->common;
@@ -2542,10 +2459,6 @@
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
- MODE_INFO *prev_mi = NULL;
-
- if (cm->frame_type != KEY_FRAME)
- prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i)
@@ -2562,9 +2475,6 @@
vp9_zero(cpi->mb.pred_mv);
cpi->pc_root->index = 0;
- // TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
- // quality encoding. Need to evaluate it in real-time encoding later to
- // decide if it can be removed too. And then, do the code cleanup.
cpi->mb.source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION) {
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2572,38 +2482,18 @@
sf->always_this_block_size);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (cpi->partition_search_skippable_frame ||
- sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
+ } else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
- cm->frame_type != KEY_FRAME ) {
+ } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
+ cm->frame_type != KEY_FRAME ) {
choose_partitioning(cpi, tile, mi_row, mi_col);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (sf->partition_search_type == SEARCH_PARTITION &&
- sf->use_lastframe_partitioning &&
- (cpi->rc.frames_since_key %
- sf->last_partitioning_redo_frequency) &&
- cm->prev_mi &&
- cm->show_frame &&
- cm->frame_type != KEY_FRAME &&
- !cpi->rc.is_src_frame_alt_ref &&
- ((sf->use_lastframe_partitioning !=
- LAST_FRAME_PARTITION_LOW_MOTION) ||
- !sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
- if (sf->constrain_copy_partition &&
- sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
- constrain_copy_partitioning(cpi, tile, mi, prev_mi,
- mi_row, mi_col, BLOCK_16X16);
- else
- copy_partitioning(cm, mi, prev_mi);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
@@ -2686,8 +2576,7 @@
}
static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col,
- int *rate, int64_t *dist,
+ int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
@@ -2702,11 +2591,14 @@
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
+ set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else
- vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize, ctx);
+ vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rd_cost, bsize, ctx);
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+ if (rd_cost->rate == INT_MAX)
+ vp9_rd_cost_reset(rd_cost);
}
static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
@@ -2768,8 +2660,8 @@
static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
TOKENEXTRA **tp, int mi_row,
- int mi_col, BLOCK_SIZE bsize, int *rate,
- int64_t *dist, int do_recon, int64_t best_rd,
+ int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost,
+ int do_recon, int64_t best_rd,
PC_TREE *pc_tree) {
const SPEED_FEATURES *const sf = &cpi->sf;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2781,9 +2673,7 @@
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i;
BLOCK_SIZE subsize = bsize;
- int this_rate, sum_rate = 0, best_rate = INT_MAX;
- int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
- int64_t sum_rd = 0;
+ RD_COST this_rdc, sum_rdc, best_rdc;
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
// Override skipping rectangular partition operations for edge blocks
@@ -2802,6 +2692,10 @@
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
+ vp9_rd_cost_init(&sum_rdc);
+ vp9_rd_cost_reset(&best_rdc);
+ best_rdc.rdcost = best_rd;
+
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (sf->auto_min_max_partition_size) {
@@ -2823,33 +2717,33 @@
// PARTITION_NONE
if (partition_none_allowed) {
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, bsize, ctx);
+ &this_rdc, bsize, ctx);
ctx->mic.mbmi = xd->mi[0].src_mi->mbmi;
ctx->skip_txfm[0] = x->skip_txfm[0];
ctx->skip = x->skip;
+ ctx->pred_pixel_ready = 0;
- if (this_rate != INT_MAX) {
+ if (this_rdc.rate != INT_MAX) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_NONE];
- sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
- if (sum_rd < best_rd) {
- int64_t stop_thresh = 4096;
- int64_t stop_thresh_rd;
+ this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
+ if (this_rdc.rdcost < best_rdc.rdcost) {
+ int64_t dist_breakout_thr = sf->partition_search_breakout_dist_thr;
+ int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
- best_rate = this_rate;
- best_dist = this_dist;
- best_rd = sum_rd;
+ dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+
+ best_rdc = this_rdc;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
- // Adjust threshold according to partition size.
- stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
- b_height_log2_lookup[bsize]);
-
- stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
- // If obtained distortion is very small, choose current partition
- // and stop splitting.
- if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+ if (!x->e_mbd.lossless &&
+ this_rdc.rate < rate_breakout_thr &&
+ this_rdc.dist < dist_breakout_thr) {
do_split = 0;
do_rect = 0;
}
@@ -2861,12 +2755,12 @@
store_pred_mv(x, ctx);
// PARTITION_SPLIT
- sum_rd = 0;
if (do_split) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
subsize = get_subsize(bsize, PARTITION_SPLIT);
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
const int x_idx = (i & 1) * ms;
const int y_idx = (i >> 1) * ms;
@@ -2874,22 +2768,20 @@
continue;
load_pred_mv(x, ctx);
nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
- subsize, &this_rate, &this_dist, 0,
- best_rd - sum_rd, pc_tree->split[i]);
+ subsize, &this_rdc, 0,
+ best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
}
}
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
} else {
// skip rectangular partition test when larger block size
@@ -2905,40 +2797,38 @@
if (sf->adaptive_motion_search)
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0]);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
+ pc_tree->horizontal[0].pred_pixel_ready = 0;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-
- if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rdc, subsize,
&pc_tree->horizontal[1]);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
+ pc_tree->horizontal[1].pred_pixel_ready = 0;
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_HORZ];
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
}
}
- if (sum_rd < best_rd) {
- best_rd = sum_rd;
- best_rate = sum_rate;
- best_dist = sum_dist;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
}
}
@@ -2950,55 +2840,53 @@
if (sf->adaptive_motion_search)
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0]);
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+ pc_tree->vertical[0].pred_pixel_ready = 0;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rdc, subsize,
&pc_tree->vertical[1]);
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ pc_tree->vertical[1].pred_pixel_ready = 0;
+
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_VERT];
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
}
}
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_VERT;
}
}
- // TODO(JBB): The following line is here just to avoid a static warning
- // that occurs because at this point we never again reuse best_rd
- // despite setting it here. The code should be refactored to avoid this.
- (void) best_rd;
- *rate = best_rate;
- *dist = best_dist;
+ *rd_cost = best_rdc;
- if (best_rate == INT_MAX)
+ if (best_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(rd_cost);
return;
+ }
// update mode info array
subsize = get_subsize(bsize, pc_tree->partitioning);
fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, subsize,
pc_tree);
- if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
int output_enabled = (bsize == BLOCK_64X64);
// Check the projected output rate for this SB against it's target
@@ -3006,33 +2894,32 @@
// closer to the target.
if ((oxcf->aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
- best_rate);
+ best_rdc.rate);
}
if (oxcf->aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- best_rate, best_dist);
+ best_rdc.rate, best_rdc.dist);
encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
}
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);
- assert(best_rate < INT_MAX);
- assert(best_dist < INT64_MAX);
+ assert(best_rdc.rate < INT_MAX);
+ assert(best_rdc.dist < INT64_MAX);
} else {
assert(tp_orig == *tp);
}
}
-static void nonrd_use_partition(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO *mi,
- TOKENEXTRA **tp,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize, int output_enabled,
- int *totrate, int64_t *totdist,
- PC_TREE *pc_tree) {
+static void nonrd_select_partition(VP9_COMP *cpi,
+ const TileInfo *const tile,
+ MODE_INFO *mi,
+ TOKENEXTRA **tp,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int output_enabled,
+ RD_COST *rd_cost, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3040,9 +2927,140 @@
const int mis = cm->mi_stride;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
- int rate = INT_MAX;
- int64_t dist = INT64_MAX;
+ RD_COST this_rdc;
+ vp9_rd_cost_reset(&this_rdc);
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4;
+ partition = partition_lookup[bsl][subsize];
+
+ if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
+ subsize >= BLOCK_16X16) {
+ cpi->sf.max_partition_size = BLOCK_32X32;
+ cpi->sf.min_partition_size = BLOCK_8X8;
+ nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, bsize,
+ rd_cost, 0, INT64_MAX, pc_tree);
+ } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
+ cpi->sf.max_partition_size = BLOCK_16X16;
+ cpi->sf.min_partition_size = BLOCK_8X8;
+ nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, bsize,
+ rd_cost, 0, INT64_MAX, pc_tree);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->none);
+ pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->none.skip = x->skip;
+ pc_tree->none.pred_pixel_ready = 1;
+ break;
+ case PARTITION_VERT:
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->vertical[0]);
+ pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->vertical[0].skip = x->skip;
+ pc_tree->vertical[0].pred_pixel_ready = 1;
+ if (mi_col + hbs < cm->mi_cols) {
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
+ &this_rdc, subsize, &pc_tree->vertical[1]);
+ pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->vertical[1].skip = x->skip;
+ pc_tree->vertical[1].pred_pixel_ready = 1;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ }
+ break;
+ case PARTITION_HORZ:
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->horizontal[0]);
+ pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->horizontal[0].skip = x->skip;
+ pc_tree->horizontal[0].pred_pixel_ready = 1;
+ if (mi_row + hbs < cm->mi_rows) {
+ nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
+ &this_rdc, subsize, &pc_tree->horizontal[0]);
+ pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->horizontal[1].skip = x->skip;
+ pc_tree->horizontal[1].pred_pixel_ready = 1;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ nonrd_select_partition(cpi, tile, mi, tp, mi_row, mi_col,
+ subsize, output_enabled, rd_cost,
+ pc_tree->split[0]);
+ nonrd_select_partition(cpi, tile, mi + hbs, tp,
+ mi_row, mi_col + hbs, subsize, output_enabled,
+ &this_rdc, pc_tree->split[1]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ nonrd_select_partition(cpi, tile, mi + hbs * mis, tp,
+ mi_row + hbs, mi_col, subsize, output_enabled,
+ &this_rdc, pc_tree->split[2]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ nonrd_select_partition(cpi, tile, mi + hbs * mis + hbs, tp,
+ mi_row + hbs, mi_col + hbs, subsize,
+ output_enabled, &this_rdc, pc_tree->split[3]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ break;
+ default:
+ assert("Invalid partition type.");
+ break;
+ }
+ }
+
+ if (bsize == BLOCK_64X64 && output_enabled) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ rd_cost->rate, rd_cost->dist);
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree);
+ }
+}
+
+
+static void nonrd_use_partition(VP9_COMP *cpi,
+ const TileInfo *const tile,
+ MODE_INFO *mi,
+ TOKENEXTRA **tp,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int output_enabled,
+ RD_COST *rd_cost, PC_TREE *pc_tree) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ const int mis = cm->mi_stride;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+ RD_COST this_rdc;
+
+ vp9_rd_cost_reset(&this_rdc);
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -3051,78 +3069,78 @@
switch (partition) {
case PARTITION_NONE:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
subsize, &pc_tree->none);
pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
pc_tree->none.skip = x->skip;
break;
case PARTITION_VERT:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
subsize, &pc_tree->vertical[0]);
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
if (mi_col + hbs < cm->mi_cols) {
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
- &rate, &dist, subsize, &pc_tree->vertical[1]);
+ &this_rdc, subsize, &pc_tree->vertical[1]);
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
}
break;
case PARTITION_HORZ:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, rd_cost,
subsize, &pc_tree->horizontal[0]);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
if (mi_row + hbs < cm->mi_rows) {
nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
- &rate, &dist, subsize, &pc_tree->horizontal[0]);
+ &this_rdc, subsize, &pc_tree->horizontal[0]);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
}
break;
case PARTITION_SPLIT:
subsize = get_subsize(bsize, PARTITION_SPLIT);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
- subsize, output_enabled, totrate, totdist,
+ subsize, output_enabled, rd_cost,
pc_tree->split[0]);
nonrd_use_partition(cpi, tile, mi + hbs, tp,
mi_row, mi_col + hbs, subsize, output_enabled,
- &rate, &dist, pc_tree->split[1]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[1]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
nonrd_use_partition(cpi, tile, mi + hbs * mis, tp,
mi_row + hbs, mi_col, subsize, output_enabled,
- &rate, &dist, pc_tree->split[2]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[2]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
nonrd_use_partition(cpi, tile, mi + hbs * mis + hbs, tp,
mi_row + hbs, mi_col + hbs, subsize, output_enabled,
- &rate, &dist, pc_tree->split[3]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[3]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
break;
default:
@@ -3133,7 +3151,7 @@
if (bsize == BLOCK_64X64 && output_enabled) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- *totrate, *totdist);
+ rd_cost->rate, rd_cost->dist);
encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree);
}
}
@@ -3153,52 +3171,53 @@
// Code each SB in the row
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- int dummy_rate = 0;
- int64_t dummy_dist = 0;
+ RD_COST dummy_rdc;
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
BLOCK_SIZE bsize;
x->in_static_area = 0;
x->source_variance = UINT_MAX;
vp9_zero(x->pred_mv);
+ vp9_rd_cost_init(&dummy_rdc);
// Set the partition type of the 64X64 block
switch (sf->partition_search_type) {
case VAR_BASED_PARTITION:
choose_partitioning(cpi, tile, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ 1, &dummy_rdc, cpi->pc_root);
break;
case SOURCE_VAR_BASED_PARTITION:
set_source_var_based_partition(cpi, tile, mi, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ 1, &dummy_rdc, cpi->pc_root);
break;
- case VAR_BASED_FIXED_PARTITION:
case FIXED_PARTITION:
bsize = sf->partition_search_type == FIXED_PARTITION ?
sf->always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ 1, &dummy_rdc, cpi->pc_root);
break;
case REFERENCE_PARTITION:
- if (sf->partition_check ||
- !(x->in_static_area = is_background(cpi, tile, mi_row, mi_col))) {
- set_modeinfo_offsets(cm, xd, mi_row, mi_col);
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ x->in_static_area = is_background(cpi, tile, mi_row, mi_col);
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
+ xd->mi[0].src_mi->mbmi.segment_id && x->in_static_area) {
auto_partition_range(cpi, tile, mi_row, mi_col,
&sf->min_partition_size,
&sf->max_partition_size);
nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, INT64_MAX,
- cpi->pc_root);
+ &dummy_rdc, 1,
+ INT64_MAX, cpi->pc_root);
} else {
choose_partitioning(cpi, tile, mi_row, mi_col);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
- BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
- cpi->pc_root);
+ nonrd_select_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
+ 1, &dummy_rdc, cpi->pc_root);
}
+
break;
default:
assert(0);
@@ -3338,25 +3357,39 @@
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+
int tile_col, tile_row;
- TOKENEXTRA *tok = cpi->tok;
+ TileInfo tile[4][1 << 6];
+ TOKENEXTRA *tok[4][1 << 6];
+ TOKENEXTRA *pre_tok = cpi->tok;
+ int tile_tok = 0;
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo tile;
- TOKENEXTRA *old_tok = tok;
+ vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
+
+ tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ }
+ }
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ const TileInfo * const ptile = &tile[tile_row][tile_col];
+ TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
int mi_row;
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
+ for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
- encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
else
- encode_rd_sb_row(cpi, &tile, mi_row, &tok);
+ encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
}
- cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok - old_tok);
- assert(tok - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+ cpi->tok_count[tile_row][tile_col] =
+ (unsigned int)(tok[tile_row][tile_col] - old_tok);
+ assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
}
}
}
@@ -3714,7 +3747,7 @@
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- if (!cpi->sf.reuse_inter_pred_sby || seg_skip)
+ if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 1758e3f..c5e8726 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1436,25 +1436,6 @@
cpi->refresh_alt_ref_frame = 0;
- // Note that at the moment multi_arf will not work with svc.
- // For the current check in all the execution paths are defaulted to 0
- // pending further tuning and testing. The code is left in place here
- // as a place holder in regard to the required paths.
- cpi->multi_arf_last_grp_enabled = 0;
- if (oxcf->pass == 2) {
- if (cpi->use_svc) {
- cpi->multi_arf_allowed = 0;
- cpi->multi_arf_enabled = 0;
- } else {
- // Disable by default for now.
- cpi->multi_arf_allowed = 0;
- cpi->multi_arf_enabled = 0;
- }
- } else {
- cpi->multi_arf_allowed = 0;
- cpi->multi_arf_enabled = 0;
- }
-
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
@@ -3431,6 +3412,16 @@
vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+ // Is multi-arf enabled.
+ // Note that at the moment multi_arf is only configured for 2 pass VBR and
+ // will not work properly with svc.
+ if ((oxcf->pass == 2) && !cpi->use_svc &&
+ (cpi->oxcf.enable_auto_arf > 1) && (cpi->oxcf.rc_mode == VPX_VBR))
+ cpi->multi_arf_allowed = 1;
+ else
+ cpi->multi_arf_allowed = 0;
+ cpi->multi_arf_last_grp_enabled = 0;
+
// Normal defaults
cm->reset_frame_context = 0;
cm->refresh_frame_context = 1;
@@ -3456,7 +3447,7 @@
int i;
// Reference a hidden frame from a lower layer
for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {
- if (oxcf->ss_play_alternate[i]) {
+ if (oxcf->ss_enable_auto_arf[i]) {
cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx;
break;
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 6091ae2..1e60474 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -178,13 +178,12 @@
int ts_number_layers; // Number of temporal layers.
// Bitrate allocation for spatial layers.
int ss_target_bitrate[VPX_SS_MAX_LAYERS];
- int ss_play_alternate[VPX_SS_MAX_LAYERS];
+ int ss_enable_auto_arf[VPX_SS_MAX_LAYERS];
// Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
int ts_target_bitrate[VPX_TS_MAX_LAYERS];
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
- // these parameters aren't to be used in final build don't use!!!
- int play_alternate;
+ int enable_auto_arf;
int encode_breakout; // early breakout : for video conf recommend 800
@@ -481,6 +480,15 @@
return mb_rows * mb_cols * (16 * 16 * 3 + 4);
}
+// Get the allocated token size for a tile. It does the same calculation as in
+// the frame token allocation.
+static INLINE int allocated_tokens(TileInfo tile) {
+ int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
+ int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
+
+ return get_token_alloc(tile_mb_rows, tile_mb_cols);
+}
+
int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
#if CONFIG_VP9_HIGHBITDEPTH
int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
@@ -511,9 +519,9 @@
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
- (cpi->oxcf.play_alternate &&
+ (cpi->oxcf.enable_auto_arf &&
(!is_two_pass_svc(cpi) ||
- cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
+ cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]));
}
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 96c3e0a..f1baf83 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -66,13 +66,6 @@
*b = temp;
}
-static int gfboost_qadjust(int qindex, vpx_bit_depth_t bit_depth) {
- const double q = vp9_convert_qindex_to_q(qindex, bit_depth);
- return (int)((0.00000828 * q * q * q) +
- (-0.0055 * q * q) +
- (1.32 * q) + 79.3);
-}
-
// Resets the first pass file to the given position using a relative seek from
// the current position.
static void reset_fpf_position(TWO_PASS *p,
@@ -1317,14 +1310,15 @@
double this_frame_mv_in_out,
double max_boost) {
double frame_boost;
- const double lq = vp9_convert_qindex_to_q(cpi->rc.last_q[INTER_FRAME],
- cpi->common.bit_depth);
- const double q_correction = MIN((0.8 + (lq * 0.001)), 1.0);
+ const double lq =
+ vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
+ cpi->common.bit_depth);
+ const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5);
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
- frame_boost = frame_boost * BOOST_FACTOR * q_correction;
+ frame_boost = frame_boost * BOOST_FACTOR * boost_correction;
// Increase boost for frames where new data coming into frame (e.g. zoom out).
// Slightly reduce boost if there is a net balance of motion out of the frame
@@ -1335,7 +1329,7 @@
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
- return MIN(frame_boost, max_boost * q_correction);
+ return MIN(frame_boost, max_boost * boost_correction);
}
static int calc_arf_boost(VP9_COMP *cpi, int offset,
@@ -1874,19 +1868,8 @@
gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
// Calculate the extra bits to be used for boosted frame(s)
- {
- int q = rc->last_q[INTER_FRAME];
- int boost =
- (rc->gfu_boost * gfboost_qadjust(q, cpi->common.bit_depth)) / 100;
-
- // Set max and minimum boost and hence minimum allocation.
- boost = clamp(boost, MIN_ARF_GF_BOOST,
- (rc->baseline_gf_interval + 1) * 200);
-
- // Calculate the extra bits to be used for boosted frame(s)
- gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
- boost, gf_group_bits);
- }
+ gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval,
+ rc->gfu_boost, gf_group_bits);
// Adjust KF group bits and error remaining.
twopass->kf_group_error_left -= (int64_t)gf_group_err;
@@ -2380,7 +2363,11 @@
section_target_bandwidth);
twopass->active_worst_quality = tmp_q;
rc->ni_av_qi = tmp_q;
+ rc->last_q[INTER_FRAME] = tmp_q;
rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth);
+ rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
+ rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
+ rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
}
vp9_zero(this_frame);
if (EOF == input_stats(twopass, &this_frame))
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index b74b2dd..9d20bae 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -460,11 +460,8 @@
// this needs various further optimizations. to be continued..
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx) {
+ int mi_row, int mi_col, RD_COST *rd_cost,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
@@ -478,11 +475,8 @@
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
- int64_t best_rd = INT64_MAX;
- int64_t this_rd = INT64_MAX;
+ RD_COST this_rdc, best_rdc;
uint8_t skip_txfm = 0;
- int rate = INT_MAX;
- int64_t dist = INT64_MAX;
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
@@ -543,8 +537,9 @@
x->skip = 0;
// initialize mode decisions
- *returnrate = INT_MAX;
- *returndistortion = INT64_MAX;
+ vp9_rd_cost_reset(&best_rdc);
+ vp9_rd_cost_reset(&this_rdc);
+ vp9_rd_cost_reset(rd_cost);
vpx_memset(mbmi, 0, sizeof(MB_MODE_INFO));
mbmi->sb_type = bsize;
mbmi->ref_frame[0] = NONE;
@@ -614,17 +609,17 @@
mode_rd_thresh =
rd_threshes[mode_idx[ref_frame -
LAST_FRAME][INTER_OFFSET(this_mode)]];
- if (rd_less_than_thresh(best_rd, mode_rd_thresh,
+ if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
rd_thresh_freq_fact[this_mode]))
continue;
if (this_mode == NEWMV) {
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
- this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
+ this_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rd))
+ &rate_mv, best_rdc.rdcost))
continue;
}
@@ -697,30 +692,34 @@
mbmi->interp_filter = best_filter;
mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
- rate = pf_rate[mbmi->interp_filter];
- dist = pf_dist[mbmi->interp_filter];
+ this_rdc.rate = pf_rate[mbmi->interp_filter];
+ this_rdc.dist = pf_dist[mbmi->interp_filter];
var_y = pf_var[mbmi->interp_filter];
sse_y = pf_sse[mbmi->interp_filter];
x->skip_txfm[0] = skip_txfm;
} else {
mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
+ &var_y, &sse_y);
}
- rate += rate_mv;
- rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ this_rdc.rate += rate_mv;
+ this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
if (cpi->allow_encode_breakout) {
encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame,
- this_mode, var_y, sse_y, yv12_mb, &rate, &dist);
+ this_mode, var_y, sse_y, yv12_mb,
+ &this_rdc.rate, &this_rdc.dist);
if (x->skip) {
- rate += rate_mv;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ this_rdc.rate += rate_mv;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
}
}
@@ -732,10 +731,8 @@
(void)ctx;
#endif
- if (this_rd < best_rd || x->skip) {
- best_rd = this_rd;
- *returnrate = rate;
- *returndistortion = dist;
+ if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
+ best_rdc = this_rdc;
best_mode = this_mode;
best_pred_filter = mbmi->interp_filter;
best_tx_size = mbmi->tx_size;
@@ -757,7 +754,7 @@
}
// If the current reference frame is valid and we found a usable mode,
// we are done.
- if (best_rd < INT64_MAX)
+ if (best_rdc.rdcost < INT64_MAX)
break;
}
@@ -790,7 +787,7 @@
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
- if (!x->skip && best_rd > inter_mode_thresh &&
+ if (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize) {
PREDICTION_MODE this_mode;
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
@@ -812,16 +809,15 @@
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
mbmi->tx_size = saved_tx_size;
- rate = args.rate;
- dist = args.dist;
- rate += cpi->mbmode_cost[this_mode];
- rate += intra_cost_penalty;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ this_rdc.rate = args.rate;
+ this_rdc.dist = args.dist;
+ this_rdc.rate += cpi->mbmode_cost[this_mode];
+ this_rdc.rate += intra_cost_penalty;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
- if (this_rd + intra_mode_cost < best_rd) {
- best_rd = this_rd;
- *returnrate = rate;
- *returndistortion = dist;
+ if (this_rdc.rdcost + intra_mode_cost < best_rdc.rdcost) {
+ best_rdc = this_rdc;
mbmi->mode = this_mode;
mbmi->tx_size = intra_tx_size;
mbmi->ref_frame[0] = INTRA_FRAME;
@@ -834,4 +830,6 @@
if (cpi->sf.reuse_inter_pred_sby)
pd->dst = orig_dst;
}
+
+ *rd_cost = best_rdc;
}
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 97aeca7..27a319d 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -19,9 +19,7 @@
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const struct TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
+ int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index bec77d7..e148bf9 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -142,8 +142,6 @@
if (speed >= 5) {
int i;
-
- sf->partition_search_type = FIXED_PARTITION;
sf->optimize_coefficients = 0;
sf->mv.search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
@@ -151,8 +149,7 @@
sf->intra_y_mode_mask[i] = INTRA_DC;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
- }
- if (speed >= 6) {
+ sf->partition_search_breakout_rate_thr = 500;
sf->mv.reduce_first_step_size = 1;
}
}
@@ -205,7 +202,6 @@
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -217,8 +213,6 @@
if (speed >= 3) {
sf->use_square_partition_only = 1;
sf->disable_filter_search_var_thresh = 100;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
- sf->constrain_copy_partition = 1;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
sf->mv.subpel_iters_per_step = 1;
@@ -275,6 +269,15 @@
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+
+ // This feature is only enabled when partition search is disabled.
+ sf->reuse_inter_pred_sby = 1;
+
+ if (MIN(cm->width, cm->height) >= 720)
+ sf->partition_search_breakout_dist_thr = (1 << 25);
+ else
+ sf->partition_search_breakout_dist_thr = (1 << 23);
+ sf->partition_search_breakout_rate_thr = 200;
}
if (speed >= 6) {
@@ -292,9 +295,6 @@
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
- // This feature is only enabled when partition search is disabled.
- sf->reuse_inter_pred_sby = 1;
-
// Increase mode checking threshold for NEWMV.
sf->elevate_newmv_thresh = 1000;
@@ -342,7 +342,6 @@
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
@@ -362,7 +361,6 @@
sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
- sf->constrain_copy_partition = 0;
sf->disable_split_mask = 0;
sf->mode_search_skip_flags = 0;
sf->force_frame_boost = 0;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index cc6c2e5..1712f87 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -93,12 +93,6 @@
} MOTION_THRESHOLD;
typedef enum {
- LAST_FRAME_PARTITION_OFF = 0,
- LAST_FRAME_PARTITION_LOW_MOTION = 1,
- LAST_FRAME_PARTITION_ALL = 2
-} LAST_FRAME_PARTITION_METHOD;
-
-typedef enum {
USE_FULL_RD = 0,
USE_LARGESTALL,
USE_TX_8X8
@@ -149,16 +143,12 @@
typedef enum {
// Search partitions using RD/NONRD criterion
- SEARCH_PARTITION = 0,
+ SEARCH_PARTITION,
// Always use a fixed size partition
- FIXED_PARTITION = 1,
+ FIXED_PARTITION,
- // Use a fixed size partition in every 64X64 SB, where the size is
- // determined based on source variance
- VAR_BASED_FIXED_PARTITION = 2,
-
- REFERENCE_PARTITION = 3,
+ REFERENCE_PARTITION,
// Use an arbitrary partitioning scheme based on source variance within
// a 64X64 SB
@@ -246,15 +236,6 @@
// level within a frame.
int allow_skip_recode;
- // This variable allows us to reuse the last frames partition choices
- // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
- // frame as a starting point in low motion scenes or always use it. If set
- // we use last partitioning_redo frequency to determine how often to redo
- // the partitioning from scratch. Adjust_partitioning_from_last_frame
- // enables us to adjust up or down one partitioning from the last frames
- // partitioning.
- LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
-
// The threshold is to determine how slow the motino is, it is used when
// use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
MOTION_THRESHOLD lf_motion_threshold;
@@ -268,8 +249,6 @@
// precise but significantly faster than the non lp version.
int use_lp32x32fdct;
- // TODO(JBB): remove this as its no longer used.
-
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
@@ -307,12 +286,6 @@
// use_lastframe_partitioning is set.
int last_partitioning_redo_frequency;
- // This enables constrained copy partitioning, which, given an input block
- // size bsize, will copy previous partition for partitions less than bsize,
- // otherwise bsize partition is used. bsize is currently set to 16x16.
- // Used for the case where motion is detected in superblock.
- int constrain_copy_partition;
-
// Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
// it always, to allow it for only Last frame and Intra, disable it for all
// inter modes or to enable it always.
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 1573557..8d3ca0d 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -85,7 +85,7 @@
oxcf->best_allowed_q) / 2;
lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
- if (oxcf->ss_play_alternate[layer])
+ if (oxcf->ss_enable_auto_arf[layer])
lc->alt_ref_idx = alt_ref_idx++;
else
lc->alt_ref_idx = -1;
@@ -305,7 +305,7 @@
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG);
}
} else {
- if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]) {
+ if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]) {
cpi->alt_fb_idx = lc->alt_ref_idx;
if (!lc->has_alt_frame)
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
@@ -317,7 +317,7 @@
LAYER_CONTEXT *lc_lower =
&cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1];
- if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id - 1] &&
+ if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id - 1] &&
lc_lower->alt_ref_source != NULL)
cpi->alt_fb_idx = lc_lower->alt_ref_idx;
else if (cpi->svc.spatial_layer_id >= 2)
diff --git a/vp9/encoder/x86/vp9_sad_intrin_avx2.c b/vp9/encoder/x86/vp9_sad_intrin_avx2.c
new file mode 100644
index 0000000..1131930
--- /dev/null
+++ b/vp9/encoder/x86/vp9_sad_intrin_avx2.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <immintrin.h>
+#include "vpx_ports/mem.h"
+
+#define FSAD64_H(h) \
+unsigned int vp9_sad64x##h##_avx2(const uint8_t *src_ptr, \
+ int src_stride, \
+ const uint8_t *ref_ptr, \
+ int ref_stride) { \
+ int i, res; \
+ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
+ __m256i sum_sad = _mm256_setzero_si256(); \
+ __m256i sum_sad_h; \
+ __m128i sum_sad128; \
+ for (i = 0 ; i < h ; i++) { \
+ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
+ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \
+ sad1_reg = _mm256_sad_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)src_ptr)); \
+ sad2_reg = _mm256_sad_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \
+ sum_sad = _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
+ ref_ptr+= ref_stride; \
+ src_ptr+= src_stride; \
+ } \
+ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
+ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
+ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
+ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
+ return res; \
+}
+
+#define FSAD32_H(h) \
+unsigned int vp9_sad32x##h##_avx2(const uint8_t *src_ptr, \
+ int src_stride, \
+ const uint8_t *ref_ptr, \
+ int ref_stride) { \
+ int i, res; \
+ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
+ __m256i sum_sad = _mm256_setzero_si256(); \
+ __m256i sum_sad_h; \
+ __m128i sum_sad128; \
+ int ref2_stride = ref_stride << 1; \
+ int src2_stride = src_stride << 1; \
+ int max = h >> 1; \
+ for (i = 0 ; i < max ; i++) { \
+ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
+ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \
+ sad1_reg = _mm256_sad_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)src_ptr)); \
+ sad2_reg = _mm256_sad_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \
+ sum_sad = _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
+ ref_ptr+= ref2_stride; \
+ src_ptr+= src2_stride; \
+ } \
+ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
+ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
+ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
+ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
+ return res; \
+}
+
+#define FSAD64 \
+FSAD64_H(64); \
+FSAD64_H(32);
+
+#define FSAD32 \
+FSAD32_H(64); \
+FSAD32_H(32); \
+FSAD32_H(16);
+
+FSAD64;
+FSAD32;
+
+#undef FSAD64
+#undef FSAD32
+#undef FSAD64_H
+#undef FSAD32_H
+
+#define FSADAVG64_H(h) \
+unsigned int vp9_sad64x##h##_avg_avx2(const uint8_t *src_ptr, \
+ int src_stride, \
+ const uint8_t *ref_ptr, \
+ int ref_stride, \
+ const uint8_t *second_pred) { \
+ int i, res; \
+ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
+ __m256i sum_sad = _mm256_setzero_si256(); \
+ __m256i sum_sad_h; \
+ __m128i sum_sad128; \
+ for (i = 0 ; i < h ; i++) { \
+ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
+ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \
+ ref1_reg = _mm256_avg_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)second_pred)); \
+ ref2_reg = _mm256_avg_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(second_pred +32))); \
+ sad1_reg = _mm256_sad_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)src_ptr)); \
+ sad2_reg = _mm256_sad_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \
+ sum_sad = _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \
+ ref_ptr+= ref_stride; \
+ src_ptr+= src_stride; \
+ second_pred+= 64; \
+ } \
+ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
+ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
+ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
+ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
+ return res; \
+}
+
+#define FSADAVG32_H(h) \
+unsigned int vp9_sad32x##h##_avg_avx2(const uint8_t *src_ptr, \
+ int src_stride, \
+ const uint8_t *ref_ptr, \
+ int ref_stride, \
+ const uint8_t *second_pred) { \
+ int i, res; \
+ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \
+ __m256i sum_sad = _mm256_setzero_si256(); \
+ __m256i sum_sad_h; \
+ __m128i sum_sad128; \
+ int ref2_stride = ref_stride << 1; \
+ int src2_stride = src_stride << 1; \
+ int max = h >> 1; \
+ for (i = 0 ; i < max ; i++) { \
+ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \
+ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \
+ ref1_reg = _mm256_avg_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)second_pred)); \
+ ref2_reg = _mm256_avg_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(second_pred +32))); \
+ sad1_reg = _mm256_sad_epu8(ref1_reg, \
+ _mm256_loadu_si256((__m256i const *)src_ptr)); \
+ sad2_reg = _mm256_sad_epu8(ref2_reg, \
+ _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \
+ sum_sad = _mm256_add_epi32(sum_sad, \
+ _mm256_add_epi32(sad1_reg, sad2_reg)); \
+ ref_ptr+= ref2_stride; \
+ src_ptr+= src2_stride; \
+ second_pred+= 64; \
+ } \
+ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \
+ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \
+ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \
+ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \
+ res = _mm_cvtsi128_si32(sum_sad128); \
+ return res; \
+}
+
+#define FSADAVG64 \
+FSADAVG64_H(64); \
+FSADAVG64_H(32);
+
+#define FSADAVG32 \
+FSADAVG32_H(64); \
+FSADAVG32_H(32); \
+FSADAVG32_H(16);
+
+FSADAVG64;
+FSADAVG32;
+
+#undef FSADAVG64
+#undef FSADAVG32
+#undef FSADAVG64_H
+#undef FSADAVG32_H
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d0ca524..adae18b 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -202,7 +202,7 @@
ERROR("kf_min_dist not supported in auto mode, use 0 "
"or kf_max_dist instead.");
- RANGE_CHECK_BOOL(extra_cfg, enable_auto_alt_ref);
+ RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
RANGE_CHECK(extra_cfg, cpu_used, -16, 16);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
@@ -412,7 +412,7 @@
oxcf->speed = abs(extra_cfg->cpu_used);
oxcf->encode_breakout = extra_cfg->static_thresh;
- oxcf->play_alternate = extra_cfg->enable_auto_alt_ref;
+ oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
oxcf->sharpness = extra_cfg->sharpness;
@@ -445,13 +445,13 @@
for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i];
#if CONFIG_SPATIAL_SVC
- oxcf->ss_play_alternate[i] = cfg->ss_enable_auto_alt_ref[i];
+ oxcf->ss_enable_auto_arf[i] = cfg->ss_enable_auto_alt_ref[i];
#endif
}
} else if (oxcf->ss_number_layers == 1) {
oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
#if CONFIG_SPATIAL_SVC
- oxcf->ss_play_alternate[0] = extra_cfg->enable_auto_alt_ref;
+ oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref;
#endif
}
@@ -493,7 +493,7 @@
printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
- printf("play_alternate: %d\n", oxcf->play_alternate);
+ printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
printf("Version: %d\n", oxcf->Version);
printf("encode_breakout: %d\n", oxcf->encode_breakout);
printf("error resilient: %d\n", oxcf->error_resilient_mode);
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index ad76722..e72cb00 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -118,6 +118,7 @@
endif
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad_intrin_avx2.c
VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
diff --git a/vpxdec.c b/vpxdec.c
index 2afdb71..c4d2a9e 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -276,7 +276,8 @@
const int plane = planes[i];
const unsigned char *buf = img->planes[plane];
const int stride = img->stride[plane];
- const int w = vpx_img_plane_width(img, plane);
+ const int w = vpx_img_plane_width(img, plane) *
+ ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
const int h = vpx_img_plane_height(img, plane);
for (y = 0; y < h; ++y) {