AV1 RT: Introducing speed 5 for RT Speed 5 is the same features as speed 6 with extra step in adjustment of var based partition. ~40% slowdown and 5% BDRate improvement on lowres with some clips are more than 10% BDRate improvements Change-Id: I2d911baee71f04ccdbd52a802c865bb2cc567989
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index c1c740e..2f23751 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -2069,7 +2069,7 @@ } if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION && - cpi->sf.part_sf.adjust_var_based_rd_partitioning == 2) && + cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && (mi_row + bs < mi_params->mi_rows || mi_row + hbs == mi_params->mi_rows) &&
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index 59cfd50..81b3f17 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -763,26 +763,13 @@ if (speed >= 5) { sf->hl_sf.recode_loop = ALLOW_RECODE_KFMAXBW; - sf->part_sf.partition_search_breakout_rate_thr = 300; - - sf->mv_sf.search_method = BIGDIA; - sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; - sf->inter_sf.adaptive_rd_thresh = 4; sf->interp_sf.disable_filter_search_var_thresh = 200; - sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; - sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; - sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; - sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; - sf->rd_sf.use_fast_coef_costing = 1; sf->rd_sf.tx_domain_dist_level = 2; sf->rd_sf.tx_domain_dist_thres_level = 2; - - sf->winner_mode_sf.tx_size_search_level = 2; + sf->winner_mode_sf.tx_size_search_level = 1; sf->rt_sf.mode_search_skip_flags = (cm->current_frame.frame_type == KEY_FRAME) @@ -790,9 +777,6 @@ : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | FLAG_EARLY_TERMINATE; - } - - if (speed >= 6) { sf->hl_sf.frame_parameter_update = 0; sf->part_sf.default_max_partition_size = BLOCK_128X128; @@ -800,11 +784,12 @@ sf->part_sf.max_intra_bsize = BLOCK_32X32; sf->part_sf.partition_search_breakout_rate_thr = 500; sf->part_sf.partition_search_type = VAR_BASED_PARTITION; - sf->part_sf.adjust_var_based_rd_partitioning = 1; + sf->part_sf.adjust_var_based_rd_partitioning = 2; sf->mv_sf.search_method = FAST_DIAMOND; sf->mv_sf.subpel_force_stop = QUARTER_PEL; sf->mv_sf.use_fullpel_costlist = 1; + sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; sf->inter_sf.adaptive_mode_search = 2; sf->inter_sf.inter_mode_rd_model_estimation = 2; @@ -821,8 +806,6 @@ sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT; sf->rd_sf.simple_model_rd_from_var = 1; - sf->winner_mode_sf.tx_size_search_level = 1; - sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; @@ -834,6 +817,10 @@ sf->rt_sf.use_simple_rd_model = 1; } + if (speed >= 6) { + sf->part_sf.adjust_var_based_rd_partitioning = 1; + } + if (speed >= 7) { sf->hl_sf.frame_parameter_update = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index 9174df5..e43fb10 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -368,8 +368,9 @@ BLOCK_SIZE default_max_partition_size; // Sets level of adjustmet of variace-based partitioning during - // rd_use_partition 0 - no partition adjusment 1 - adjust partition for small - // blocks and high QP 2 - always adjust partitioning + // rd_use_partition 0 - no partition adjusment, 1 - try to merge partitions + // for small blocks and high QP, 2 - always try to merge leaf partitions, 3 - + // try to merge and split leaf partitions int adjust_var_based_rd_partitioning; // Partition search early breakout thresholds.
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc index 28fade9..5cd0382 100644 --- a/test/rt_end_to_end_test.cc +++ b/test/rt_end_to_end_test.cc
@@ -32,15 +32,18 @@ std::unordered_map<std::string, std::unordered_map<int, std::unordered_map<int, double>>> kPsnrThreshold = { { "park_joy_90p_8_420.y4m", - { { 6, { { 0, 35.3 }, { 3, 36.2 } } }, + { { 5, { { 0, 35.4 }, { 3, 36.4 } } }, + { 6, { { 0, 35.3 }, { 3, 36.2 } } }, { 7, { { 0, 34.9 }, { 3, 35.8 } } }, { 8, { { 0, 35.0 }, { 3, 35.8 } } } } }, { "paris_352_288_30.y4m", - { { 6, { { 0, 36.1 }, { 3, 36.6 } } }, + { { 5, { { 0, 36.2 }, { 3, 36.7 } } }, + { 6, { { 0, 36.1 }, { 3, 36.6 } } }, { 7, { { 0, 35.5 }, { 3, 36.0 } } }, { 8, { { 0, 36.0 }, { 3, 36.5 } } } } }, { "niklas_1280_720_30.y4m", - { { 6, { { 0, 34.2 }, { 3, 34.2 } } }, + { { 5, { { 0, 34.6 }, { 3, 34.6 } } }, + { 6, { { 0, 34.2 }, { 3, 34.2 } } }, { 7, { { 0, 33.7 }, { 3, 33.7 } } }, { 8, { { 0, 33.6 }, { 3, 33.4 } } } } } }; @@ -159,13 +162,13 @@ TEST_P(RTEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); } AV1_INSTANTIATE_TEST_CASE(RTEndToEndTest, ::testing::ValuesIn(kTestVectors), - ::testing::Range(6, 9), + ::testing::Range(5, 9), ::testing::Values<unsigned int>(0, 3), ::testing::Values(1), ::testing::Values(1)); AV1_INSTANTIATE_TEST_CASE(RTEndToEndTestThreaded, ::testing::ValuesIn(kTestVectors), - ::testing::Range(6, 9), + ::testing::Range(5, 9), ::testing::Values<unsigned int>(0, 3), ::testing::Range(2, 5), ::testing::Range(2, 5)); } // namespace