Merge tag 'v3.2.0' into HEAD
Create the signed v3.2.0 tag
BUG=aomedia:3165
Change-Id: I619563ba6d72d994d749242f66ab9befcc71a200
diff --git a/aom/aom_external_partition.h b/aom/aom_external_partition.h
index 1bb31c4..34503b5 100644
--- a/aom/aom_external_partition.h
+++ b/aom/aom_external_partition.h
@@ -30,7 +30,7 @@
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures.
*/
-#define AOM_EXT_PART_ABI_VERSION 3
+#define AOM_EXT_PART_ABI_VERSION 5
#ifdef __cplusplus
extern "C" {
@@ -240,6 +240,18 @@
int frame_width; ///< Frame width
int frame_height; ///< Frame height
int block_size; ///< As "BLOCK_SIZE" in av1/common/enums.h
+ /*!
+ * Valid partition types. A bitmask is used. "1" represents the
+ * corresponding type is vaild. The bitmask follows the enum order for
+ * PARTITION_TYPE in "enums.h" to represent one partition type at a bit.
+ * For example, 0x01 stands for only PARTITION_NONE is valid,
+ * 0x09 (00...001001) stands for PARTITION_NONE and PARTITION_SPLIT are valid.
+ */
+ int valid_partition_types;
+ int update_type; ///< Frame update type, defined in ratectrl.h
+ int qindex; ///< Quantization index, range: [0, 255]
+ int rdmult; ///< Rate-distortion multiplier
+ int pyramid_level; ///< The level of this frame in the hierarchical structure
} aom_partition_features_t;
/*!\brief Partition decisions received from the external model.
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index ba6027c..b39bfaa 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1113,7 +1113,7 @@
specialize qw/aom_hadamard_lp_8x8 sse2 neon/;
add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
- specialize qw/aom_hadamard_lp_16x16 avx2 neon/;
+ specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
@@ -1127,10 +1127,10 @@
specialize qw/aom_highbd_hadamard_32x32 avx2/;
}
add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length";
- specialize qw/aom_satd neon avx2/;
+ specialize qw/aom_satd neon sse2 avx2/;
add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length";
- specialize qw/aom_satd_lp avx2 neon/;
+ specialize qw/aom_satd_lp sse2 avx2 neon/;
#
diff --git a/aom_dsp/x86/avg_intrin_sse2.c b/aom_dsp/x86/avg_intrin_sse2.c
index 260ca2a..a52abd0 100644
--- a/aom_dsp/x86/avg_intrin_sse2.c
+++ b/aom_dsp/x86/avg_intrin_sse2.c
@@ -272,8 +272,8 @@
hadamard_8x8_sse2(src_diff, src_stride, coeff, 1);
}
-void aom_hadamard_lp_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
- int16_t *coeff) {
+static INLINE void hadamard_lp_8x8_sse2(const int16_t *src_diff,
+ ptrdiff_t src_stride, int16_t *coeff) {
__m128i src[8];
src[0] = _mm_load_si128((const __m128i *)src_diff);
src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
@@ -304,6 +304,50 @@
_mm_store_si128((__m128i *)coeff, src[7]);
}
+void aom_hadamard_lp_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
+ int16_t *coeff) {
+ hadamard_lp_8x8_sse2(src_diff, src_stride, coeff);
+}
+
+void aom_hadamard_lp_16x16_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
+ int16_t *coeff) {
+ for (int idx = 0; idx < 4; ++idx) {
+ const int16_t *src_ptr =
+ src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8;
+ hadamard_lp_8x8_sse2(src_ptr, src_stride, coeff + idx * 64);
+ }
+
+ int16_t *t_coeff = coeff;
+ for (int idx = 0; idx < 64; idx += 8) {
+ __m128i coeff0 = _mm_load_si128((const __m128i *)t_coeff);
+ __m128i coeff1 = _mm_load_si128((const __m128i *)(t_coeff + 64));
+ __m128i coeff2 = _mm_load_si128((const __m128i *)(t_coeff + 128));
+ __m128i coeff3 = _mm_load_si128((const __m128i *)(t_coeff + 192));
+
+ __m128i b0 = _mm_add_epi16(coeff0, coeff1);
+ __m128i b1 = _mm_sub_epi16(coeff0, coeff1);
+ __m128i b2 = _mm_add_epi16(coeff2, coeff3);
+ __m128i b3 = _mm_sub_epi16(coeff2, coeff3);
+
+ b0 = _mm_srai_epi16(b0, 1);
+ b1 = _mm_srai_epi16(b1, 1);
+ b2 = _mm_srai_epi16(b2, 1);
+ b3 = _mm_srai_epi16(b3, 1);
+
+ coeff0 = _mm_add_epi16(b0, b2);
+ coeff1 = _mm_add_epi16(b1, b3);
+ coeff2 = _mm_sub_epi16(b0, b2);
+ coeff3 = _mm_sub_epi16(b1, b3);
+
+ _mm_store_si128((__m128i *)t_coeff, coeff0);
+ _mm_store_si128((__m128i *)(t_coeff + 64), coeff1);
+ _mm_store_si128((__m128i *)(t_coeff + 128), coeff2);
+ _mm_store_si128((__m128i *)(t_coeff + 192), coeff3);
+
+ t_coeff += 8;
+ }
+}
+
static INLINE void hadamard_16x16_sse2(const int16_t *src_diff,
ptrdiff_t src_stride, tran_low_t *coeff,
int is_final) {
@@ -416,17 +460,50 @@
int aom_satd_sse2(const tran_low_t *coeff, int length) {
int i;
const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
__m128i accum = zero;
- for (i = 0; i < length; i += 8) {
- const __m128i src_line = load_tran_low(coeff);
- const __m128i inv = _mm_sub_epi16(zero, src_line);
- const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line)
- const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);
- const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero);
- const __m128i sum = _mm_add_epi32(abs_lo, abs_hi);
- accum = _mm_add_epi32(accum, sum);
- coeff += 8;
+ for (i = 0; i < length; i += 16) {
+ const __m128i src_line0 = load_tran_low(coeff);
+ const __m128i src_line1 = load_tran_low(coeff + 8);
+ const __m128i inv0 = _mm_sub_epi16(zero, src_line0);
+ const __m128i inv1 = _mm_sub_epi16(zero, src_line1);
+ const __m128i abs0 = _mm_max_epi16(src_line0, inv0); // abs(src_line)
+ const __m128i abs1 = _mm_max_epi16(src_line1, inv1); // abs(src_line)
+ const __m128i sum0 = _mm_madd_epi16(abs0, one);
+ const __m128i sum1 = _mm_madd_epi16(abs1, one);
+ accum = _mm_add_epi32(accum, sum0);
+ accum = _mm_add_epi32(accum, sum1);
+ coeff += 16;
+ }
+
+ { // cascading summation of accum
+ __m128i hi = _mm_srli_si128(accum, 8);
+ accum = _mm_add_epi32(accum, hi);
+ hi = _mm_srli_epi64(accum, 32);
+ accum = _mm_add_epi32(accum, hi);
+ }
+
+ return _mm_cvtsi128_si32(accum);
+}
+
+int aom_satd_lp_sse2(const int16_t *coeff, int length) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ __m128i accum = zero;
+
+ for (int i = 0; i < length; i += 16) {
+ const __m128i src_line0 = _mm_loadu_si128((const __m128i *)coeff);
+ const __m128i src_line1 = _mm_loadu_si128((const __m128i *)(coeff + 8));
+ const __m128i inv0 = _mm_sub_epi16(zero, src_line0);
+ const __m128i inv1 = _mm_sub_epi16(zero, src_line1);
+ const __m128i abs0 = _mm_max_epi16(src_line0, inv0); // abs(src_line)
+ const __m128i abs1 = _mm_max_epi16(src_line1, inv1); // abs(src_line)
+ const __m128i sum0 = _mm_madd_epi16(abs0, one);
+ const __m128i sum1 = _mm_madd_epi16(abs1, one);
+ accum = _mm_add_epi32(accum, sum0);
+ accum = _mm_add_epi32(accum, sum1);
+ coeff += 16;
}
{ // cascading summation of accum
diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index 376cb74..c0e0361 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h
@@ -29,6 +29,7 @@
#define AOM_INTERP_EXTEND 4
#define AOM_BORDER_IN_PIXELS 288
#define AOM_ENC_NO_SCALE_BORDER 160
+#define AOM_ENC_ALLINTRA_BORDER 64
#define AOM_DEC_BORDER_IN_PIXELS 64
/*!\endcond */
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 089dea4..bcffa76 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -392,13 +392,14 @@
list(APPEND AOM_AV1_ENCODER_INTRIN_SSE2
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_sse2.h"
+ "${AOM_ROOT}/av1/encoder/x86/av1_k_means_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/av1_quantize_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/encodetxb_sse2.c"
+ "${AOM_ROOT}/av1/encoder/x86/error_intrin_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/highbd_block_error_intrin_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/temporal_filter_sse2.c"
- "${AOM_ROOT}/av1/encoder/x86/av1_k_means_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/highbd_temporal_filter_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/reconinter_enc_sse2.c"
+ "${AOM_ROOT}/av1/encoder/x86/temporal_filter_sse2.c"
"${AOM_ROOT}/av1/encoder/x86/wedge_utils_sse2.c")
if(CONFIG_AV1_TEMPORAL_DENOISING)
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 5e01eef..4764ed5 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -1110,6 +1110,10 @@
q_cfg->deltaq_mode = extra_cfg->deltaq_mode;
q_cfg->use_fixed_qp_offsets =
cfg->use_fixed_qp_offsets && (rc_cfg->mode == AOM_Q);
+ q_cfg->enable_hdr_deltaq =
+ (q_cfg->deltaq_mode == DELTA_Q_HDR) &&
+ (cfg->g_bit_depth == AOM_BITS_10) &&
+ (extra_cfg->color_primaries == AOM_CICP_CP_BT_2020);
for (int i = 0; i < FIXED_QP_OFFSET_COUNT; ++i) {
if (q_cfg->use_fixed_qp_offsets) {
if (cfg->fixed_qp_offsets[i] >= 0) { // user-provided qp offset
@@ -1130,7 +1134,10 @@
// Set cost update frequency configuration.
oxcf->cost_upd_freq.coeff = (COST_UPDATE_TYPE)extra_cfg->coeff_cost_upd_freq;
oxcf->cost_upd_freq.mode = (COST_UPDATE_TYPE)extra_cfg->mode_cost_upd_freq;
- oxcf->cost_upd_freq.mv = (COST_UPDATE_TYPE)extra_cfg->mv_cost_upd_freq;
+ // Avoid MV cost update for allintra encoding mode.
+ oxcf->cost_upd_freq.mv = (cfg->kf_max_dist != 0)
+ ? (COST_UPDATE_TYPE)extra_cfg->mv_cost_upd_freq
+ : COST_UPD_OFF;
oxcf->cost_upd_freq.dv = (COST_UPDATE_TYPE)extra_cfg->dv_cost_upd_freq;
// Set frame resize mode configuration.
@@ -1364,10 +1371,14 @@
oxcf->unit_test_cfg.sb_multipass_unit_test =
extra_cfg->sb_multipass_unit_test;
+ // For allintra encoding mode, inter-frame motion search is not applicable and
+ // the intraBC motion vectors are restricted within the tile boundaries. Hence
+ // a smaller frame border size (AOM_ENC_ALLINTRA_BORDER) is used in this case.
oxcf->border_in_pixels =
(resize_cfg->resize_mode || superres_cfg->superres_mode)
? AOM_BORDER_IN_PIXELS
- : AOM_ENC_NO_SCALE_BORDER;
+ : (oxcf->kf_cfg.key_freq_max == 0) ? AOM_ENC_ALLINTRA_BORDER
+ : AOM_ENC_NO_SCALE_BORDER;
memcpy(oxcf->target_seq_level_idx, extra_cfg->target_seq_level_idx,
sizeof(oxcf->target_seq_level_idx));
oxcf->tier_mask = extra_cfg->tier_mask;
@@ -2701,7 +2712,7 @@
#endif
// Handle fixed keyframe intervals
- if (is_stat_generation_stage(ppi->cpi)) {
+ if (is_stat_generation_stage(ppi->cpi) || is_one_pass_rt_params(ppi->cpi)) {
if (ctx->cfg.kf_mode == AOM_KF_AUTO &&
ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
if (ppi->cpi->common.spatial_layer_id == 0 &&
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 6ea67e1..a4e1bbf 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -325,13 +325,13 @@
specialize qw/av1_block_error sse2 avx2 neon/;
add_proto qw/int64_t av1_block_error_lp/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size";
- specialize qw/av1_block_error_lp avx2 neon/;
+ specialize qw/av1_block_error_lp sse2 avx2 neon/;
add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp sse2 avx2 neon/;
add_proto qw/void av1_quantize_lp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/av1_quantize_lp avx2 neon/;
+ specialize qw/av1_quantize_lp sse2 avx2 neon/;
add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/av1_quantize_fp_32x32 neon avx2/;
diff --git a/av1/encoder/allintra_vis.c b/av1/encoder/allintra_vis.c
index 780ef1c..84fa763 100644
--- a/av1/encoder/allintra_vis.c
+++ b/av1/encoder/allintra_vis.c
@@ -357,20 +357,6 @@
weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
weber_stats->satd = best_intra_cost;
- double reg = sqrt((double)weber_stats->distortion) *
- sqrt((double)weber_stats->src_pix_max) * 0.1;
- double alpha_den = fabs(weber_stats->rec_pix_max *
- sqrt((double)weber_stats->src_variance) -
- weber_stats->src_pix_max *
- sqrt((double)weber_stats->rec_variance)) +
- reg;
- double alpha_num = ((double)weber_stats->distortion) *
- sqrt((double)weber_stats->src_variance) *
- weber_stats->rec_pix_max +
- reg;
-
- weber_stats->alpha = AOMMAX(alpha_num, 1.0) / AOMMAX(alpha_den, 1.0);
-
qcoeff[0] = 0;
for (idx = 1; idx < coeff_count; ++idx) qcoeff[idx] = abs(qcoeff[idx]);
qsort(qcoeff, coeff_count, sizeof(*coeff), qsort_comp);
@@ -408,8 +394,8 @@
get_var_perceptual_ai(cpi, cm->seq_params->sb_size, mi_row, mi_col);
double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
- double min_max_scale =
- AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
+ double min_max_scale = AOMMAX(
+ 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
beta = AOMMIN(beta, 4);
beta = AOMMAX(beta, 0.25);
@@ -468,7 +454,8 @@
}
void av1_set_mb_ur_variance(AV1_COMP *cpi) {
- const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
+ const AV1_COMMON *cm = &cpi->common;
+ const CommonModeInfoParams *const mi_params = &cm->mi_params;
ThreadData *td = &cpi->td;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
@@ -482,8 +469,21 @@
const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
- double a = -23.06 * 4.0, b = 0.004065, c = 30.516 * 4.0;
- int delta_q_avg = 0;
+ int *mb_delta_q[2];
+ CHECK_MEM_ERROR(cm, mb_delta_q[0],
+ aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
+ CHECK_MEM_ERROR(cm, mb_delta_q[1],
+ aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
+
+ // Approximates the model change between current version (Spet 2021) and the
+ // baseline (July 2021).
+ const double model_change[] = { 3.0, 3.0 };
+ // The following parameters are fitted from user labeled data.
+ const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
+ const double b[] = { 0.004898, 0.003093 };
+ const double c[] = { (29.932 + model_change[0]) * 4.0,
+ (42.100 + model_change[1]) * 4.0 };
+ int delta_q_avg[2] = { 0, 0 };
// Loop through each SB block.
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
@@ -504,7 +504,7 @@
buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
buf.stride = y_stride;
- double block_variance;
+ unsigned int block_variance;
if (use_hbd) {
block_variance = av1_high_get_sby_perpixel_variance(
cpi, &buf, BLOCK_8X8, xd->bd);
@@ -513,25 +513,57 @@
av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
}
- block_variance = block_variance < 1.0 ? 1.0 : block_variance;
- var += log(block_variance);
+ block_variance = AOMMAX(block_variance, 1);
+ var += log((double)block_variance);
num_of_var += 1.0;
}
}
var = exp(var / num_of_var);
- cpi->mb_delta_q[index] = (int)(a * exp(-b * var) + c + 0.5);
- delta_q_avg += cpi->mb_delta_q[index];
+ mb_delta_q[0][index] = (int)(a[0] * exp(-b[0] * var) + c[0] + 0.5);
+ mb_delta_q[1][index] = (int)(a[1] * exp(-b[1] * var) + c[1] + 0.5);
+ delta_q_avg[0] += mb_delta_q[0][index];
+ delta_q_avg[1] += mb_delta_q[1][index];
}
}
- delta_q_avg = (int)((double)delta_q_avg / (num_rows * num_cols) + 0.5);
+ delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
+ delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
+ int model_idx;
+ double scaling_factor;
+ const int cq_level = cpi->oxcf.rc_cfg.cq_level;
+ if (cq_level < delta_q_avg[0]) {
+ model_idx = 0;
+ scaling_factor = (double)cq_level / delta_q_avg[0];
+ } else if (cq_level < delta_q_avg[1]) {
+ model_idx = 2;
+ scaling_factor =
+ (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
+ } else {
+ model_idx = 1;
+ scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
+ }
+
+ const double new_delta_q_avg =
+ delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
const int index = row * num_cols + col;
- cpi->mb_delta_q[index] -= delta_q_avg;
+ if (model_idx == 2) {
+ const double delta_q =
+ mb_delta_q[0][index] +
+ scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
+ cpi->mb_delta_q[index] = RINT(delta_q - new_delta_q_avg);
+ } else {
+ cpi->mb_delta_q[index] =
+ RINT(scaling_factor *
+ (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
+ }
}
}
+
+ aom_free(mb_delta_q[0]);
+ aom_free(mb_delta_q[1]);
}
int av1_get_sbq_user_rating_based(AV1_COMP *const cpi, int mi_row, int mi_col) {
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
index 3573b2a..37bc309 100644
--- a/av1/encoder/aq_complexity.c
+++ b/av1/encoder/aq_complexity.c
@@ -46,11 +46,11 @@
static bool is_frame_aq_enabled(const AV1_COMP *const cpi) {
const AV1_COMMON *const cm = &cpi->common;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
return frame_is_intra_only(cm) || cm->features.error_resilient_mode ||
- refresh_frame_flags->alt_ref_frame ||
- (refresh_frame_flags->golden_frame && !cpi->rc.is_src_frame_alt_ref);
+ refresh_frame->alt_ref_frame ||
+ (refresh_frame->golden_frame && !cpi->rc.is_src_frame_alt_ref);
}
// Segmentation only makes sense if the target bits per SB is above a threshold.
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 04df183..cc97802 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -21,7 +21,6 @@
#include "aom_dsp/aom_dsp_common.h"
CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
- size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr));
if (cr == NULL) return NULL;
@@ -30,21 +29,12 @@
av1_cyclic_refresh_free(cr);
return NULL;
}
- last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
- cr->last_coded_q_map = aom_malloc(last_coded_q_map_size);
- if (cr->last_coded_q_map == NULL) {
- av1_cyclic_refresh_free(cr);
- return NULL;
- }
- assert(MAXQ <= 255);
- memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
return cr;
}
void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
if (cr != NULL) {
aom_free(cr->map);
- aom_free(cr->last_coded_q_map);
aom_free(cr);
}
}
@@ -155,6 +145,7 @@
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
+ int sh = cpi->cyclic_refresh->skip_over4x4 ? 2 : 1;
const int prev_segment_id = mbmi->segment_id;
mbmi->segment_id = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
if (prev_segment_id != mbmi->segment_id) {
@@ -164,8 +155,8 @@
const int xmis = AOMMIN(cm->mi_params.mi_cols - mi_col, bw);
const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
- for (int mi_y = 0; mi_y < ymis; mi_y++) {
- for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ for (int mi_y = 0; mi_y < ymis; mi_y += sh) {
+ for (int mi_x = 0; mi_x < xmis; mi_x += sh) {
const int map_offset =
block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = 0;
@@ -200,6 +191,7 @@
const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
const int refresh_this_block =
candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
+ int sh = cpi->cyclic_refresh->skip_over4x4 ? 2 : 1;
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
@@ -229,8 +221,8 @@
// Update entries in the cyclic refresh map with new_map_value, and
// copy mbmi->segment_id into global segmentation map.
- for (int mi_y = 0; mi_y < ymis; mi_y++) {
- for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ for (int mi_y = 0; mi_y < ymis; mi_y += sh) {
+ for (int mi_x = 0; mi_x < xmis; mi_x += sh) {
const int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = new_map_value;
cpi->enc_seg.map[map_offset] = mbmi->segment_id;
@@ -276,7 +268,9 @@
!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
rc->avg_frame_low_motion =
- (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
+ (rc->avg_frame_low_motion == 0)
+ ? avg_cnt_zeromv
+ : (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
// For SVC: set avg_frame_low_motion (only computed on top spatial layer)
// to all lower spatial layers.
if (cpi->ppi->use_svc &&
@@ -299,11 +293,16 @@
// Set minimum gf_interval for GF update to a multiple of the refresh period,
// with some max limit. Depending on past encoding stats, GF flag may be
// reset and update may not occur until next baseline_gf_interval.
+ const int gf_length_mult[2] = { 8, 4 };
if (cr->percent_refresh > 0)
- p_rc->baseline_gf_interval = AOMMIN(2 * (100 / cr->percent_refresh), 40);
+ p_rc->baseline_gf_interval =
+ AOMMIN(gf_length_mult[cpi->sf.rt_sf.gf_length_lvl] *
+ (100 / cr->percent_refresh),
+ MAX_GF_INTERVAL_RT);
else
- p_rc->baseline_gf_interval = 20;
- if (rc->avg_frame_low_motion < 40) p_rc->baseline_gf_interval = 8;
+ p_rc->baseline_gf_interval = FIXED_GF_INTERVAL_RT;
+ if (rc->avg_frame_low_motion && rc->avg_frame_low_motion < 40)
+ p_rc->baseline_gf_interval = 16;
}
// Update the segmentation map, and related quantities: cyclic refresh map,
@@ -342,13 +341,6 @@
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * cm->seq_params->mib_size;
int mi_col = sb_col_index * cm->seq_params->mib_size;
- // TODO(any): Ensure the population of
- // cpi->common.features.allow_screen_content_tools and use the same instead
- // of cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
- int qindex_thresh = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
- ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2,
- cm->quant_params.base_qindex)
- : 0;
assert(mi_row >= 0 && mi_row < mi_params->mi_rows);
assert(mi_col >= 0 && mi_col < mi_params->mi_cols);
bl_index = mi_row * mi_params->mi_cols + mi_col;
@@ -363,7 +355,7 @@
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than GLOBALMV.
if (cr->map[bl_index2] == 0) {
- if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map += 4;
+ sum_map += 4;
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@@ -399,10 +391,21 @@
double weight_segment = 0;
int qp_thresh = AOMMIN(20, rc->best_quality << 1);
int qp_max_thresh = 118 * MAXQ >> 7;
+ // Although this segment feature for RTC is only used for
+ // blocks >= 8X8, for more efficient coding of the seg map
+ // cur_frame->seg_map needs to set at 4x4 along with the
+ // function av1_cyclic_reset_segment_skip(). Skipping over
+ // 4x4 will therefore have small bdrate loss (~0.2%), so
+ // we use it only for speed > 9 for now.
+ // Also if loop-filter deltas is applied via segment, then
+ // we need to set cr->skip_over4x4 = 1.
+ cr->skip_over4x4 = (cpi->oxcf.speed > 9) ? 1 : 0;
cr->apply_cyclic_refresh = 1;
if (frame_is_intra_only(cm) || is_lossless_requested(&cpi->oxcf.rc_cfg) ||
cpi->svc.temporal_layer_id > 0 ||
p_rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||
+ (cpi->svc.number_spatial_layers > 1 &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
(rc->frames_since_key > 20 &&
p_rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh) ||
(rc->avg_frame_low_motion < 45 && rc->frames_since_key > 40)) {
@@ -410,6 +413,7 @@
return;
}
cr->percent_refresh = 10;
+ if (cpi->svc.number_temporal_layers > 2) cr->percent_refresh = 15;
cr->max_qdelta_perc = 60;
cr->time_for_refresh = 0;
cr->motion_thresh = 32;
@@ -418,7 +422,8 @@
// periods of the refresh cycle, after a key frame.
// Account for larger interval on base layer for temporal layers.
if (cr->percent_refresh > 0 &&
- rc->frames_since_key < 400 / cr->percent_refresh) {
+ rc->frames_since_key <
+ (4 * cpi->svc.number_temporal_layers) * (100 / cr->percent_refresh)) {
cr->rate_ratio_qdelta = 3.0;
} else {
cr->rate_ratio_qdelta = 2.0;
@@ -477,9 +482,6 @@
memset(seg_map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
av1_disable_segmentation(&cm->seg);
if (cm->current_frame.frame_type == KEY_FRAME) {
- memset(cr->last_coded_q_map, MAXQ,
- cm->mi_params.mi_rows * cm->mi_params.mi_cols *
- sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
}
return;
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index 1c0d5cb..4e4e1f2 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -80,10 +80,6 @@
*/
int8_t *map;
/*!
- * Map of the last q a block was coded at.
- */
- uint8_t *last_coded_q_map;
- /*!
* Threshold applied to the projected rate of the coding block,
* when deciding whether block should be refreshed.
*/
@@ -111,6 +107,7 @@
int qindex_delta[3];
double weight_segment;
int apply_cyclic_refresh;
+ int skip_over4x4;
/*!\endcond */
};
diff --git a/av1/encoder/aq_variance.c b/av1/encoder/aq_variance.c
index c2d0a07..3273ef8 100644
--- a/av1/encoder/aq_variance.c
+++ b/av1/encoder/aq_variance.c
@@ -43,7 +43,7 @@
void av1_vaq_frame_setup(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const int base_qindex = cm->quant_params.base_qindex;
struct segmentation *seg = &cm->seg;
int i;
@@ -64,8 +64,8 @@
return;
}
if (frame_is_intra_only(cm) || cm->features.error_resilient_mode ||
- refresh_frame_flags->alt_ref_frame ||
- (refresh_frame_flags->golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ refresh_frame->alt_ref_frame ||
+ (refresh_frame->golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
cpi->vaq_refresh = 1;
av1_enable_segmentation(seg);
@@ -124,13 +124,13 @@
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
- 16);
+ 16.0);
} else {
var +=
log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
- 16);
+ 16.0);
}
}
}
@@ -141,6 +141,34 @@
return (int)(var);
}
+int av1_log_block_avg(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
+ int mi_row, int mi_col) {
+ // This functions returns the block average of luma block
+ unsigned int sum, avg, num_pix;
+ int r, c;
+ const int pic_w = cpi->common.width;
+ const int pic_h = cpi->common.height;
+ const int bw = MI_SIZE * mi_size_wide[bs];
+ const int bh = MI_SIZE * mi_size_high[bs];
+ const uint16_t *x16 = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
+
+ sum = 0;
+ num_pix = 0;
+ avg = 0;
+ int row = mi_row << MI_SIZE_LOG2;
+ int col = mi_col << MI_SIZE_LOG2;
+ for (r = row; (r < (row + bh)) && (r < pic_h); r++) {
+ for (c = col; (c < (col + bw)) && (c < pic_w); c++) {
+ sum += *(x16 + r * x->plane[0].src.stride + c);
+ num_pix++;
+ }
+ }
+ if (num_pix != 0) {
+ avg = sum / num_pix;
+ }
+ return avg;
+}
+
#define DEFAULT_E_MIDPOINT 10.0
static unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
diff --git a/av1/encoder/aq_variance.h b/av1/encoder/aq_variance.h
index 543eb0b..aa0535a 100644
--- a/av1/encoder/aq_variance.h
+++ b/av1/encoder/aq_variance.h
@@ -21,6 +21,8 @@
void av1_vaq_frame_setup(AV1_COMP *cpi);
int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+int av1_log_block_avg(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
+ int mi_row, int mi_col);
int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi,
int block_var_level);
int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
diff --git a/av1/encoder/av1_noise_estimate.c b/av1/encoder/av1_noise_estimate.c
index bfa0a74..4419085 100644
--- a/av1/encoder/av1_noise_estimate.c
+++ b/av1/encoder/av1_noise_estimate.c
@@ -53,12 +53,7 @@
}
static int enable_noise_estimation(AV1_COMP *const cpi) {
- ResizePendingParams *const resize_pending_params =
- &cpi->resize_pending_params;
- const int resize_pending =
- (resize_pending_params->width && resize_pending_params->height &&
- (cpi->common.width != resize_pending_params->width ||
- cpi->common.height != resize_pending_params->height));
+ const int resize_pending = is_frame_resize_pending(cpi);
#if CONFIG_AV1_HIGHBITDEPTH
if (cpi->common.seq_params->use_highbitdepth) return 0;
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index 66be3b6..105897e 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -764,14 +764,34 @@
av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
}
+static int adjust_hdr_cb_deltaq(int base_qindex) {
+ double baseQp = base_qindex / QP_SCALE_FACTOR;
+ const double chromaQp = CHROMA_QP_SCALE * baseQp + CHROMA_QP_OFFSET;
+ const double dcbQP = CHROMA_CB_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
+ int dqpCb = (int)(dcbQP + (dcbQP < 0 ? -0.5 : 0.5));
+ dqpCb = AOMMIN(0, dqpCb);
+ dqpCb = (int)CLIP(dqpCb, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+ return dqpCb;
+}
+
+static int adjust_hdr_cr_deltaq(int base_qindex) {
+ double baseQp = base_qindex / QP_SCALE_FACTOR;
+ const double chromaQp = CHROMA_QP_SCALE * baseQp + CHROMA_QP_OFFSET;
+ const double dcrQP = CHROMA_CR_QP_SCALE * chromaQp * QP_SCALE_FACTOR;
+ int dqpCr = (int)(dcrQP + (dcrQP < 0 ? -0.5 : 0.5));
+ dqpCr = AOMMIN(0, dqpCr);
+ dqpCr = (int)CLIP(dqpCr, -12 * QP_SCALE_FACTOR, 12 * QP_SCALE_FACTOR);
+ return dqpCr;
+}
+
void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
- int q, int enable_chroma_deltaq) {
+ int q, int enable_chroma_deltaq, int enable_hdr_deltaq) {
// quantizer has to be reinitialized with av1_init_quantizer() if any
// delta_q changes.
CommonQuantParams *quant_params = &cm->quant_params;
quant_params->base_qindex = AOMMAX(cm->delta_q_info.delta_q_present_flag, q);
-
quant_params->y_dc_delta_q = 0;
+
if (enable_chroma_deltaq) {
// TODO(aomedia:2717): need to design better delta
quant_params->u_dc_delta_q = 2;
@@ -785,6 +805,18 @@
quant_params->v_ac_delta_q = 0;
}
+ // following section 8.3.2 in T-REC-H.Sup15 document
+ // to apply to AV1 qindex in the range of [0, 255]
+ if (enable_hdr_deltaq) {
+ int dqpCb = adjust_hdr_cb_deltaq(quant_params->base_qindex);
+ int dqpCr = adjust_hdr_cr_deltaq(quant_params->base_qindex);
+ quant_params->u_dc_delta_q = quant_params->u_ac_delta_q = dqpCb;
+ quant_params->v_dc_delta_q = quant_params->v_ac_delta_q = dqpCr;
+ if (dqpCb != dqpCr) {
+ cm->seq_params->separate_uv_delta_q = 1;
+ }
+ }
+
quant_params->qmatrix_level_y =
aom_get_qmlevel(quant_params->base_qindex, min_qmlevel, max_qmlevel);
quant_params->qmatrix_level_u =
diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h
index 62b53b8..085ab17 100644
--- a/av1/encoder/av1_quantize.h
+++ b/av1/encoder/av1_quantize.h
@@ -106,7 +106,8 @@
aom_bit_depth_t bit_depth);
void av1_set_quantizer(struct AV1Common *const cm, int min_qmlevel,
- int max_qmlevel, int q, int enable_chroma_deltaq);
+ int max_qmlevel, int q, int enable_chroma_deltaq,
+ int enable_hdr_deltaq);
int av1_quantizer_to_qindex(int quantizer);
diff --git a/av1/encoder/av1_temporal_denoiser.c b/av1/encoder/av1_temporal_denoiser.c
index 96f3d7d..26e0eda7 100644
--- a/av1/encoder/av1_temporal_denoiser.c
+++ b/av1/encoder/av1_temporal_denoiser.c
@@ -718,11 +718,7 @@
? KEY_FRAME
: cm->current_frame.frame_type;
cpi->denoiser.current_denoiser_frame++;
- const int resize_pending =
- (cpi->resize_pending_params.width &&
- cpi->resize_pending_params.height &&
- (cpi->common.width != cpi->resize_pending_params.width ||
- cpi->common.height != cpi->resize_pending_params.height));
+ const int resize_pending = is_frame_resize_pending(cpi);
if (cpi->ppi->use_svc) {
// TODO(kyslov) Enable when SVC temporal denosing is implemented
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3198c80..9696859 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -252,8 +252,8 @@
*/
typedef struct {
//! Circular buffer that stores the txfm search results.
- MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN]; // Circular buffer.
- //! Index to insert the newest \ref TXB_RD_INFO.
+ MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN];
+ //! Index to insert the newest rd record.
int index_start;
//! Number of info stored in this record.
int num;
@@ -261,44 +261,6 @@
CRC32C crc_calculator;
} MB_RD_RECORD;
-/*! \brief Txfm search results for a tx block.
- */
-typedef struct {
- //! Distortion after the txfm process
- int64_t dist;
- //! SSE of the prediction before the txfm process
- int64_t sse;
- //! Rate used to encode the txfm.
- int rate;
- //! Location of the end of non-zero entries.
- uint16_t eob;
- //! Transform type used on the current block.
- TX_TYPE tx_type;
- //! Unknown usage
- uint16_t entropy_context;
- //! Context used to code the coefficients.
- uint8_t txb_entropy_ctx;
- //! Whether the current info block contains valid info
- uint8_t valid;
- //! Unused
- uint8_t fast;
- //! Whether trellis optimization is done.
- uint8_t perform_block_coeff_opt;
-} TXB_RD_INFO;
-
-/*! \brief Hash records of txfm search result for each tx block.
- */
-typedef struct {
- //! The hash values.
- uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
- //! The txfm search results
- TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
- //! Index to insert the newest \ref TXB_RD_INFO.
- int index_start;
- //! Number of info stored in this record.
- int num;
-} TXB_RD_RECORD;
-
//! Number of compound rd stats
#define MAX_COMP_RD_STATS 64
/*! \brief Rdcost stats in compound mode.
@@ -499,16 +461,6 @@
//! Txfm hash record for the whole coding block.
MB_RD_RECORD mb_rd_record;
- //! Inter mode txfm hash record for TX_8X8 blocks.
- TXB_RD_RECORD txb_rd_record_8X8[MAX_NUM_8X8_TXBS];
- //! Inter mode txfm hash record for TX_16X16 blocks.
- TXB_RD_RECORD txb_rd_record_16X16[MAX_NUM_16X16_TXBS];
- //! Inter mode txfm hash record for TX_32X32 blocks.
- TXB_RD_RECORD txb_rd_record_32X32[MAX_NUM_32X32_TXBS];
- //! Inter mode txfm hash record for TX_64X64 blocks.
- TXB_RD_RECORD txb_rd_record_64X64[MAX_NUM_64X64_TXBS];
- //! Intra mode txfm hash record for square tx blocks.
- TXB_RD_RECORD txb_rd_record_intra;
/**@}*/
} TxbRdRecords;
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index b38bd3e..fe2b9ae 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -40,17 +40,18 @@
#define TEMPORAL_FILTER_KEY_FRAME (CONFIG_REALTIME_ONLY ? 0 : 1)
static INLINE void set_refresh_frame_flags(
- RefreshFrameFlagsInfo *const refresh_frame_flags, bool refresh_gf,
- bool refresh_bwdref, bool refresh_arf) {
- refresh_frame_flags->golden_frame = refresh_gf;
- refresh_frame_flags->bwd_ref_frame = refresh_bwdref;
- refresh_frame_flags->alt_ref_frame = refresh_arf;
+ RefreshFrameInfo *const refresh_frame, bool refresh_gf, bool refresh_bwdref,
+ bool refresh_arf) {
+ refresh_frame->golden_frame = refresh_gf;
+ refresh_frame->bwd_ref_frame = refresh_bwdref;
+ refresh_frame->alt_ref_frame = refresh_arf;
}
-void av1_configure_buffer_updates(
- AV1_COMP *const cpi, RefreshFrameFlagsInfo *const refresh_frame_flags,
- const FRAME_UPDATE_TYPE type, const REFBUF_STATE refbuf_state,
- int force_refresh_all) {
+void av1_configure_buffer_updates(AV1_COMP *const cpi,
+ RefreshFrameInfo *const refresh_frame,
+ const FRAME_UPDATE_TYPE type,
+ const REFBUF_STATE refbuf_state,
+ int force_refresh_all) {
// NOTE(weitinglin): Should we define another function to take care of
// cpi->rc.is_$Source_Type to make this function as it is in the comment?
const ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags =
@@ -59,22 +60,22 @@
switch (type) {
case KF_UPDATE:
- set_refresh_frame_flags(refresh_frame_flags, true, true, true);
+ set_refresh_frame_flags(refresh_frame, true, true, true);
break;
case LF_UPDATE:
- set_refresh_frame_flags(refresh_frame_flags, false, false, false);
+ set_refresh_frame_flags(refresh_frame, false, false, false);
break;
case GF_UPDATE:
- set_refresh_frame_flags(refresh_frame_flags, true, false, false);
+ set_refresh_frame_flags(refresh_frame, true, false, false);
break;
case OVERLAY_UPDATE:
if (refbuf_state == REFBUF_RESET)
- set_refresh_frame_flags(refresh_frame_flags, true, true, true);
+ set_refresh_frame_flags(refresh_frame, true, true, true);
else
- set_refresh_frame_flags(refresh_frame_flags, true, false, false);
+ set_refresh_frame_flags(refresh_frame, true, false, false);
cpi->rc.is_src_frame_alt_ref = 1;
break;
@@ -82,19 +83,19 @@
case ARF_UPDATE:
// NOTE: BWDREF does not get updated along with ALTREF_FRAME.
if (refbuf_state == REFBUF_RESET)
- set_refresh_frame_flags(refresh_frame_flags, true, true, true);
+ set_refresh_frame_flags(refresh_frame, true, true, true);
else
- set_refresh_frame_flags(refresh_frame_flags, false, false, true);
+ set_refresh_frame_flags(refresh_frame, false, false, true);
break;
case INTNL_OVERLAY_UPDATE:
- set_refresh_frame_flags(refresh_frame_flags, false, false, false);
+ set_refresh_frame_flags(refresh_frame, false, false, false);
cpi->rc.is_src_frame_alt_ref = 1;
break;
case INTNL_ARF_UPDATE:
- set_refresh_frame_flags(refresh_frame_flags, false, true, false);
+ set_refresh_frame_flags(refresh_frame, false, true, false);
break;
default: assert(0); break;
@@ -102,7 +103,7 @@
if (ext_refresh_frame_flags->update_pending &&
(!is_stat_generation_stage(cpi))) {
- set_refresh_frame_flags(refresh_frame_flags,
+ set_refresh_frame_flags(refresh_frame,
ext_refresh_frame_flags->golden_frame,
ext_refresh_frame_flags->bwd_ref_frame,
ext_refresh_frame_flags->alt_ref_frame);
@@ -116,7 +117,7 @@
}
if (force_refresh_all)
- set_refresh_frame_flags(refresh_frame_flags, true, true, true);
+ set_refresh_frame_flags(refresh_frame, true, true, true);
}
static void set_additional_frame_flags(const AV1_COMMON *const cm,
@@ -222,8 +223,12 @@
if (this_duration) {
if (step) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi->new_framerate = 10000000.0 / this_duration;
+#endif
av1_new_framerate(cpi, 10000000.0 / this_duration);
} else {
+ double framerate;
// Average this frame's rate into the last second's average
// frame rate. If we haven't seen 1 second yet, then average
// over the whole interval seen.
@@ -232,10 +237,21 @@
double avg_duration = 10000000.0 / cpi->framerate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
-
- av1_new_framerate(cpi, 10000000.0 / avg_duration);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi->new_framerate = (10000000.0 / avg_duration);
+ // For parallel frames update cpi->framerate with new_framerate
+ // during av1_post_encode_updates()
+ framerate =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->framerate
+ : cpi->new_framerate;
+#else
+ framerate = (10000000.0 / avg_duration);
+#endif
+ av1_new_framerate(cpi, framerate);
}
}
+
time_stamps->prev_ts_start = ts_start;
time_stamps->prev_ts_end = ts_end;
}
@@ -362,10 +378,9 @@
// Update frame_flags to tell the encoder's caller what sort of frame was
// encoded.
-static void update_frame_flags(
- const AV1_COMMON *const cm,
- const RefreshFrameFlagsInfo *const refresh_frame_flags,
- unsigned int *frame_flags) {
+static void update_frame_flags(const AV1_COMMON *const cm,
+ const RefreshFrameInfo *const refresh_frame,
+ unsigned int *frame_flags) {
if (encode_show_existing_frame(cm)) {
*frame_flags &= ~FRAMEFLAGS_GOLDEN;
*frame_flags &= ~FRAMEFLAGS_BWDREF;
@@ -374,19 +389,19 @@
return;
}
- if (refresh_frame_flags->golden_frame) {
+ if (refresh_frame->golden_frame) {
*frame_flags |= FRAMEFLAGS_GOLDEN;
} else {
*frame_flags &= ~FRAMEFLAGS_GOLDEN;
}
- if (refresh_frame_flags->alt_ref_frame) {
+ if (refresh_frame->alt_ref_frame) {
*frame_flags |= FRAMEFLAGS_ALTREF;
} else {
*frame_flags &= ~FRAMEFLAGS_ALTREF;
}
- if (refresh_frame_flags->bwd_ref_frame) {
+ if (refresh_frame->bwd_ref_frame) {
*frame_flags |= FRAMEFLAGS_BWDREF;
} else {
*frame_flags &= ~FRAMEFLAGS_BWDREF;
@@ -969,9 +984,9 @@
gf_group->refbuf_state[cpi->gf_frame_index] == REFBUF_UPDATE)
is_forward_keyframe = 1;
- const int code_arf =
- av1_temporal_filter(cpi, arf_src_index, update_type,
- is_forward_keyframe, &show_existing_alt_ref);
+ const int code_arf = av1_temporal_filter(
+ cpi, arf_src_index, update_type, is_forward_keyframe,
+ &show_existing_alt_ref, &cpi->ppi->alt_ref_buffer);
if (code_arf) {
aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer, av1_num_planes(cm));
frame_input->source = &cpi->ppi->alt_ref_buffer;
@@ -1229,9 +1244,7 @@
gf_group->frame_parallel_level[gf_index - 1] == 1 &&
gf_group->update_type[gf_index - 1] == INTNL_ARF_UPDATE) {
assert(gf_group->update_type[gf_index] == INTNL_ARF_UPDATE);
- // TODO(Remya): Use original value of is_parallel_encode when FPMT is
- // enabled.
- is_parallel_encode = 0;
+
// If parallel cpis are active, use ref_idx_to_skip, else, use display
// index.
assert(IMPLIES(is_parallel_encode, cpi->ref_idx_to_skip != INVALID_IDX));
@@ -1827,6 +1840,9 @@
}
#endif // CONFIG_REALTIME_ONLY
+ // As the frame_update_type can get modified as part of
+ // av1_adjust_gf_refresh_qp_one_pass_rt
+ frame_update_type = get_frame_update_type(gf_group, cpi->gf_frame_index);
if (!is_stat_generation_stage(cpi)) {
// First pass doesn't modify reference buffer assignment or produce frame
// flags
diff --git a/av1/encoder/encode_strategy.h b/av1/encoder/encode_strategy.h
index 0ae4c05..15681c3 100644
--- a/av1/encoder/encode_strategy.h
+++ b/av1/encoder/encode_strategy.h
@@ -62,10 +62,11 @@
// Set individual buffer update flags based on frame reference type.
// force_refresh_all is used when we have a KEY_FRAME or S_FRAME. It forces all
// refresh_*_frame flags to be set, because we refresh all buffers in this case.
-void av1_configure_buffer_updates(
- AV1_COMP *const cpi, RefreshFrameFlagsInfo *const refresh_frame_flags,
- const FRAME_UPDATE_TYPE type, const REFBUF_STATE refbuf_state,
- int force_refresh_all);
+void av1_configure_buffer_updates(AV1_COMP *const cpi,
+ RefreshFrameInfo *const refresh_frame,
+ const FRAME_UPDATE_TYPE type,
+ const REFBUF_STATE refbuf_state,
+ int force_refresh_all);
int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
const EncodeFrameParams *const frame_params,
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 04e7e5e..ff9b8e0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -241,6 +241,8 @@
current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
} else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
+ } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
+ current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
}
MACROBLOCKD *const xd = &x->e_mbd;
@@ -565,13 +567,8 @@
}
#if !CONFIG_REALTIME_ONLY
- if (has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
- cpi->oxcf.gf_cfg.lag_in_frames == 0) {
- (void)tile_info;
- (void)mi_row;
- (void)mi_col;
- (void)gather_tpl_data;
- } else {
+ if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
+ cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
init_ref_frame_space(cpi, td, mi_row, mi_col);
x->sb_energy_level = 0;
x->part_search_info.cnn_output_valid = 0;
@@ -594,8 +591,7 @@
(void)gather_tpl_data;
#endif
- // Reset hash state for transform/mode rd hash information
- reset_hash_records(&x->txfm_search_info, cpi->sf.tx_sf.use_inter_txb_hash);
+ reset_hash_records(&x->txfm_search_info);
av1_zero(x->picked_ref_frames_mask);
av1_invalid_rd_stats(rd_cost);
}
@@ -1367,7 +1363,8 @@
cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
-
+ else if (deltaq_mode == DELTA_Q_HDR)
+ cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
// Set delta_q_present_flag before it is used for the first time
cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 89c2322..9fbd68c 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -15,6 +15,7 @@
#include "av1/encoder/encodeframe_utils.h"
#include "av1/encoder/partition_strategy.h"
#include "av1/encoder/rdopt.h"
+#include "av1/encoder/aq_variance.h"
void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
const BLOCK_SIZE bsize, const int mi_row,
@@ -957,6 +958,49 @@
return qindex;
}
+
+#if !DISABLE_HDR_LUMA_DELTAQ
+// offset table defined in Table3 of T-REC-H.Sup15 document.
+static const int hdr_thres[HDR_QP_LEVELS + 1] = { 0, 301, 367, 434, 501, 567,
+ 634, 701, 767, 834, 1024 };
+
+static const int hdr10_qp_offset[HDR_QP_LEVELS] = { 3, 2, 1, 0, -1,
+ -2, -3, -4, -5, -6 };
+#endif
+
+int av1_get_q_for_hdr(AV1_COMP *const cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ AV1_COMMON *const cm = &cpi->common;
+ assert(cm->seq_params->bit_depth == AOM_BITS_10);
+
+#if DISABLE_HDR_LUMA_DELTAQ
+ (void)x;
+ (void)bsize;
+ (void)mi_row;
+ (void)mi_col;
+ return cm->quant_params.base_qindex;
+#else
+ // calculate pixel average
+ const int block_luma_avg = av1_log_block_avg(cpi, x, bsize, mi_row, mi_col);
+ // adjust offset based on average of the pixel block
+ int offset = 0;
+ for (int i = 0; i < HDR_QP_LEVELS; i++) {
+ if (block_luma_avg >= hdr_thres[i] && block_luma_avg < hdr_thres[i + 1]) {
+ offset = (int)(hdr10_qp_offset[i] * QP_SCALE_FACTOR);
+ break;
+ }
+ }
+
+ const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
+ offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
+ offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
+ int qindex = cm->quant_params.base_qindex + offset;
+ qindex = AOMMIN(qindex, MAXQ);
+ qindex = AOMMAX(qindex, MINQ);
+
+ return qindex;
+#endif
+}
#endif // !CONFIG_REALTIME_ONLY
void av1_reset_simple_motion_tree_partition(SIMPLE_MOTION_DATA_TREE *sms_tree,
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 94abc42..3604616 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -337,6 +337,9 @@
int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
int mi_row, int mi_col);
+int av1_get_q_for_hdr(AV1_COMP *const cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col);
+
int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize, const int mi_row,
const int mi_col, int orig_rdmult);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 6b40ea0..4b657c8 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -682,7 +682,7 @@
MACROBLOCK *const x = &cpi->td.mb;
AV1LevelParams *const level_params = &cpi->ppi->level_params;
InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
- RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const FrameDimensionCfg *const frm_dim_cfg = &cpi->oxcf.frm_dim_cfg;
const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
@@ -735,8 +735,8 @@
p_rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
}
- refresh_frame_flags->golden_frame = false;
- refresh_frame_flags->bwd_ref_frame = false;
+ refresh_frame->golden_frame = false;
+ refresh_frame->bwd_ref_frame = false;
cm->features.refresh_frame_context =
(oxcf->tool_cfg.frame_parallel_decoding_mode)
@@ -760,12 +760,17 @@
aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*x->tmp_conv_dst)));
x->e_mbd.tmp_conv_dst = x->tmp_conv_dst;
}
- for (int i = 0; i < 2; ++i) {
- if (x->tmp_pred_bufs[i] == NULL) {
- CHECK_MEM_ERROR(cm, x->tmp_pred_bufs[i],
- aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*x->tmp_pred_bufs[i])));
- x->e_mbd.tmp_obmc_bufs[i] = x->tmp_pred_bufs[i];
+ // The buffers 'tmp_pred_bufs[]' are used in inter frames to store temporary
+ // prediction results. Hence, the memory allocation is avoided for allintra
+ // encode.
+ if (cpi->oxcf.kf_cfg.key_freq_max != 0) {
+ for (int i = 0; i < 2; ++i) {
+ if (x->tmp_pred_bufs[i] == NULL) {
+ CHECK_MEM_ERROR(cm, x->tmp_pred_bufs[i],
+ aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*x->tmp_pred_bufs[i])));
+ x->e_mbd.tmp_obmc_bufs[i] = x->tmp_pred_bufs[i];
+ }
}
}
@@ -2074,10 +2079,7 @@
if (!is_stat_generation_stage(cpi)) av1_init_cdef_worker(cpi);
#if !CONFIG_REALTIME_ONLY
- const int use_restoration = cm->seq_params->enable_restoration &&
- !cm->features.all_lossless &&
- !cm->tiles.large_scale;
- if (use_restoration) {
+ if (is_restoration_used(cm)) {
const int frame_width = cm->superres_upscaled_width;
const int frame_height = cm->superres_upscaled_height;
set_restoration_unit_size(frame_width, frame_height,
@@ -2209,9 +2211,7 @@
!cm->features.coded_lossless && !cm->tiles.large_scale;
const int use_cdef = cm->seq_params->enable_cdef &&
!cm->features.coded_lossless && !cm->tiles.large_scale;
- const int use_restoration = cm->seq_params->enable_restoration &&
- !cm->features.all_lossless &&
- !cm->tiles.large_scale;
+ const int use_restoration = is_restoration_used(cm);
const int cur_width = cm->cur_frame->width;
const int cur_height = cm->cur_frame->height;
const int cur_width_mib = cm->mi_params.mi_cols * MI_SIZE;
@@ -2261,12 +2261,7 @@
AV1_COMMON *const cm = &cpi->common;
const QuantizationCfg *const q_cfg = &cpi->oxcf.q_cfg;
SVC *const svc = &cpi->svc;
- ResizePendingParams *const resize_pending_params =
- &cpi->resize_pending_params;
- const int resize_pending =
- (resize_pending_params->width && resize_pending_params->height &&
- (cpi->common.width != resize_pending_params->width ||
- cpi->common.height != resize_pending_params->height));
+ const int resize_pending = is_frame_resize_pending(cpi);
int top_index = 0, bottom_index = 0, q = 0;
YV12_BUFFER_CONFIG *unscaled = cpi->unscaled_source;
@@ -2309,7 +2304,9 @@
if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION)
variance_partition_alloc(cpi);
- if (cm->current_frame.frame_type == KEY_FRAME) copy_frame_prob_info(cpi);
+ if (cm->current_frame.frame_type == KEY_FRAME ||
+ ((sf->inter_sf.extra_prune_warped && cpi->refresh_frame.golden_frame)))
+ copy_frame_prob_info(cpi);
#if CONFIG_COLLECT_COMPONENT_TIMING
printf("\n Encoding a frame:");
@@ -2379,7 +2376,7 @@
}
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
- q_cfg->enable_chroma_deltaq);
+ q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
if ((q_cfg->deltaq_mode != NO_DELTA_Q) || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
@@ -2394,7 +2391,7 @@
cpi->rc.high_source_sad) {
if (av1_encodedframe_overshoot_cbr(cpi, &q)) {
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
- q_cfg->enable_chroma_deltaq);
+ q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
@@ -2438,6 +2435,13 @@
if (q_cfg->aq_mode == CYCLIC_REFRESH_AQ && !frame_is_intra_only(cm))
av1_cyclic_refresh_postencode(cpi);
+ // Adjust the refresh of the golden (longer-term) reference based on QP
+ // selected for this frame. This is for CBR with 1 layer/non-svc RTC mode.
+ if (!frame_is_intra_only(cm) && cpi->oxcf.rc_cfg.mode == AOM_CBR &&
+ cpi->oxcf.mode == REALTIME && svc->number_spatial_layers == 1 &&
+ svc->number_temporal_layers == 1)
+ av1_adjust_gf_refresh_qp_one_pass_rt(cpi);
+
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_encode_frame_time);
#endif
@@ -2613,7 +2617,7 @@
}
#endif
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
- q_cfg->enable_chroma_deltaq);
+ q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
@@ -2750,7 +2754,7 @@
#if 0
vbr_rc_info_log(&cpi->vbr_rc_info, cpi->gf_frame_index,
- cpi->ppi->gf_group.size, &cpi->ppi->gf_group.update_type);
+ cpi->ppi->gf_group.size, cpi->ppi->gf_group.update_type);
#endif
#endif
@@ -3139,6 +3143,33 @@
return err;
}
+// Conditions to disable cdf_update mode in selective mode for real-time.
+// Handle case for layers, scene change, and resizing.
+static int selective_disable_cdf_rtc(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ // For single layer.
+ if (cpi->svc.number_spatial_layers == 1 &&
+ cpi->svc.number_temporal_layers == 1) {
+ // Don't disable on intra_only, scene change (high_source_sad = 1),
+ // or resized frame. Don't disable for some consecutive frames after
+ // key, or for some consecutive frames before the golden_refresh
+ // (cpi->rc.frames_till_gf_update_due < 6).
+ // To avoid quality loss for now, force enable at every x frames.
+ if (frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
+ rc->high_source_sad || rc->frames_since_key < 10 ||
+ rc->frames_till_gf_update_due < 5 ||
+ cm->current_frame.frame_number % 10 == 0)
+ return 0;
+ else
+ return 1;
+ } else if (cpi->svc.number_temporal_layers > 1) {
+ // Disable only on top temporal enhancement layer for now.
+ return cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1;
+ }
+ return 1;
+}
+
#if !CONFIG_REALTIME_ONLY
static void subtract_stats(FIRSTPASS_STATS *section,
const FIRSTPASS_STATS *frame) {
@@ -3356,7 +3387,7 @@
// Never drop on key frame.
if (has_no_stats_stage(cpi) && oxcf->rc_cfg.mode == AOM_CBR &&
current_frame->frame_type != KEY_FRAME) {
- if (av1_rc_drop_frame(cpi)) {
+ if (cpi->oxcf.rc_cfg.target_bandwidth == 0 || av1_rc_drop_frame(cpi)) {
av1_setup_frame_size(cpi);
av1_rc_postencode_update_drop_frame(cpi);
release_scaled_references(cpi);
@@ -3431,21 +3462,12 @@
case 2:
// Strategically determine at which frames to do CDF update.
// Currently only enable CDF update for all-intra and no-show frames(1.5%
- // compression loss).
- // TODO(huisu@google.com): design schemes for various trade-offs between
- // compression quality and decoding speed.
+ // compression loss) for good qualiy or allintra mode.
if (oxcf->mode == GOOD || oxcf->mode == ALLINTRA) {
features->disable_cdf_update =
(frame_is_intra_only(cm) || !cm->show_frame) ? 0 : 1;
} else {
- if (cpi->svc.number_spatial_layers == 1 &&
- cpi->svc.number_temporal_layers == 1)
- features->disable_cdf_update =
- !((cm->current_frame.frame_number % 2) == 0);
- else if (cpi->svc.number_temporal_layers > 1)
- // Disable only on top temporal enhancement layer for now.
- features->disable_cdf_update = (cpi->svc.temporal_layer_id ==
- cpi->svc.number_temporal_layers - 1);
+ features->disable_cdf_update = selective_disable_cdf_rtc(cpi);
}
break;
}
@@ -4552,12 +4574,18 @@
AV1_COMP_DATA *cur_cpi_data =
&ppi->parallel_frames_data[parallel_frame_count - 1];
cur_cpi->gf_frame_index = i;
+ cur_cpi->framerate = first_cpi->framerate;
cur_cpi->common.current_frame.frame_number = cur_frame_num;
cur_cpi->frame_index_set.show_frame_count = show_frame_count;
cur_cpi->rc.frames_since_key = frames_since_key;
cur_cpi->rc.frames_to_key = frames_to_key;
cur_cpi->rc.frames_to_fwd_kf = frames_to_fwd_kf;
cur_cpi->rc.active_worst_quality = first_cpi->rc.active_worst_quality;
+ cur_cpi->rc.avg_frame_bandwidth = first_cpi->rc.avg_frame_bandwidth;
+ cur_cpi->rc.max_frame_bandwidth = first_cpi->rc.max_frame_bandwidth;
+ cur_cpi->rc.min_frame_bandwidth = first_cpi->rc.min_frame_bandwidth;
+ cur_cpi->rc.intervals_till_gf_calculate_due =
+ first_cpi->rc.intervals_till_gf_calculate_due;
cur_cpi->mv_search_params.max_mv_magnitude =
first_cpi->mv_search_params.max_mv_magnitude;
if (gf_group->update_type[cur_cpi->gf_frame_index] == INTNL_ARF_UPDATE) {
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 1bb2ace..26a66db 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -87,6 +87,14 @@
// Lookahead index threshold to enable temporal filtering for second arf.
#define TF_LOOKAHEAD_IDX_THR 7
+#define HDR_QP_LEVELS 10
+#define CHROMA_CB_QP_SCALE 1.04
+#define CHROMA_CR_QP_SCALE 1.04
+#define CHROMA_QP_SCALE -0.46
+#define CHROMA_QP_OFFSET 9.26
+#define QP_SCALE_FACTOR 2.0
+#define DISABLE_HDR_LUMA_DELTAQ 1
+
// Rational number with an int64 numerator
// This structure holds a fractional value
typedef struct aom_rational64 {
@@ -159,6 +167,7 @@
DELTA_Q_PERCEPTUAL = 2, // Modulation to improve video perceptual quality
DELTA_Q_PERCEPTUAL_AI = 3, // Perceptual quality opt for all intra mode
DELTA_Q_USER_RATING_BASED = 4, // User rating based delta q mode
+ DELTA_Q_HDR = 5, // QP adjustment based on HDR block pixel average
DELTA_Q_MODE_COUNT // This should always be the last member of the enum
} UENUM1BYTE(DELTAQ_MODE);
@@ -752,6 +761,8 @@
DELTAQ_MODE deltaq_mode;
// Indicates if delta quantization should be enabled in chroma planes.
bool enable_chroma_deltaq;
+ // Indicates if delta quantization should be enabled for hdr video
+ bool enable_hdr_deltaq;
// Indicates if encoding with quantization matrices should be enabled.
bool using_qm;
} QuantizationCfg;
@@ -1933,7 +1944,7 @@
bool golden_frame; /*!< Refresh flag for golden frame */
bool bwd_ref_frame; /*!< Refresh flag for bwd-ref frame */
bool alt_ref_frame; /*!< Refresh flag for alt-ref frame */
-} RefreshFrameFlagsInfo;
+} RefreshFrameInfo;
/*!
* \brief Desired dimensions for an externally triggered resize.
@@ -2139,7 +2150,6 @@
int16_t rec_pix_max;
int64_t distortion;
int64_t satd;
- double alpha;
double max_scale;
} WeberStats;
@@ -2682,7 +2692,7 @@
/*!
* Refresh frame flags for golden, bwd-ref and alt-ref frames.
*/
- RefreshFrameFlagsInfo refresh_frame;
+ RefreshFrameInfo refresh_frame;
/*!
* Flags signalled by the external interface at frame level.
@@ -2904,6 +2914,13 @@
* Retain condition for fast_extra_bits calculation.
*/
int do_update_vbr_bits_off_target_fast;
+
+ /*!
+ * Updated framerate for the current parallel frame.
+ * cpi->framerate is updated with new_framerate during
+ * post encode updates for parallel frames.
+ */
+ double new_framerate;
#endif
/*!
* Multi-threading parameters.
@@ -3238,7 +3255,7 @@
* Flags which determine which reference buffers are refreshed by this
* frame.
*/
- RefreshFrameFlagsInfo refresh_frame;
+ RefreshFrameInfo refresh_frame;
/*!
* Speed level to use for this frame: Bigger number means faster.
@@ -3439,7 +3456,7 @@
// Once the keyframe is coded, the slots in ref_frame_map will all
// point to the same frame. In that case, all subsequent pointers
// matching the current are considered "free" slots. This will find
- // the next occurance of the current pointer if ref_count indicates
+ // the next occurrence of the current pointer if ref_count indicates
// there are multiple instances of it and mark it as free.
for (int idx2 = map_idx + 1; idx2 < REF_FRAMES; ++idx2) {
const RefCntBuffer *const buf2 = cpi->common.ref_frame_map[idx2];
@@ -3453,30 +3470,6 @@
ref_frame_map_pairs[map_idx].pyr_level = buf->pyramid_level;
}
}
-
-static AOM_INLINE void calc_frame_data_update_flag(
- GF_GROUP *const gf_group, int gf_frame_index,
- bool *const do_frame_data_update) {
- *do_frame_data_update = true;
- // Set the flag to false for all frames in a given parallel encode set except
- // the last frame in the set with frame_parallel_level = 2.
- if (gf_group->frame_parallel_level[gf_frame_index] == 1) {
- *do_frame_data_update = false;
- } else if (gf_group->frame_parallel_level[gf_frame_index] == 2) {
- // Check if this is the last frame in the set with frame_parallel_level = 2.
- for (int i = gf_frame_index + 1; i < gf_group->size; i++) {
- if ((gf_group->frame_parallel_level[i] == 0 &&
- (gf_group->update_type[i] == ARF_UPDATE ||
- gf_group->update_type[i] == INTNL_ARF_UPDATE)) ||
- gf_group->frame_parallel_level[i] == 1) {
- break;
- } else if (gf_group->frame_parallel_level[i] == 2) {
- *do_frame_data_update = false;
- break;
- }
- }
- }
-}
#endif // CONFIG_FRAME_PARALLEL_ENCODE
// TODO(jingning): Move these functions as primitive members for the new cpi
@@ -3842,6 +3835,20 @@
cm->show_frame;
}
+static INLINE int is_frame_resize_pending(AV1_COMP *const cpi) {
+ ResizePendingParams *const resize_pending_params =
+ &cpi->resize_pending_params;
+ return (resize_pending_params->width && resize_pending_params->height &&
+ (cpi->common.width != resize_pending_params->width ||
+ cpi->common.height != resize_pending_params->height));
+}
+
+// Check if loop restoration filter is used.
+static INLINE int is_restoration_used(const AV1_COMMON *const cm) {
+ return cm->seq_params->enable_restoration && !cm->features.all_lossless &&
+ !cm->tiles.large_scale;
+}
+
#if CONFIG_AV1_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct AV1_COMP *const cpi) {
return (!cpi->ppi->use_svc ||
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 01719c5..22406d9 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -77,8 +77,11 @@
aom_free(cpi->td.mb.mv_costs);
cpi->td.mb.mv_costs = NULL;
}
- CHECK_MEM_ERROR(cm, cpi->td.mb.mv_costs,
- (MvCosts *)aom_calloc(1, sizeof(MvCosts)));
+ // Avoid the memory allocation of 'mv_costs' for allintra encoding mode.
+ if (cpi->oxcf.kf_cfg.key_freq_max != 0) {
+ CHECK_MEM_ERROR(cm, cpi->td.mb.mv_costs,
+ (MvCosts *)aom_calloc(1, sizeof(MvCosts)));
+ }
if (cpi->td.mb.dv_costs) {
aom_free(cpi->td.mb.dv_costs);
@@ -354,19 +357,10 @@
AV1_COMMON *const cm = &cpi->common;
const SequenceHeader *const seq_params = cm->seq_params;
const int byte_alignment = cm->features.byte_alignment;
- if (aom_realloc_frame_buffer(
- &cpi->last_frame_uf, cm->width, cm->height, seq_params->subsampling_x,
- seq_params->subsampling_y, seq_params->use_highbitdepth,
- cpi->oxcf.border_in_pixels, byte_alignment, NULL, NULL, NULL, 0))
- aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
- "Failed to allocate last frame buffer");
// The frame buffer trial_frame_rst is used during loop restoration filter
// search. Hence it is allocated only when loop restoration is used.
- const int use_restoration = cm->seq_params->enable_restoration &&
- !cm->features.all_lossless &&
- !cm->tiles.large_scale;
- if (use_restoration) {
+ if (is_restoration_used(cm)) {
if (aom_realloc_frame_buffer(
&cpi->trial_frame_rst, cm->superres_upscaled_width,
cm->superres_upscaled_height, seq_params->subsampling_x,
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 633125d..66cd272 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -1009,7 +1009,7 @@
set_encoding_params_for_screen_content(cpi, pass);
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel,
q_for_screen_content_quick_run,
- q_cfg->enable_chroma_deltaq);
+ q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 25ecee9..98638bc 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -638,21 +638,23 @@
}
#if !CONFIG_REALTIME_ONLY
- // Initialize loop restoration MT object.
- AV1LrSync *lr_sync = &mt_info->lr_row_sync;
- int rst_unit_size;
- if (cm->width * cm->height > 352 * 288)
- rst_unit_size = RESTORATION_UNITSIZE_MAX;
- else
- rst_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
- int num_rows_lr = av1_lr_count_units_in_tile(rst_unit_size, cm->height);
- int num_lr_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_LR);
- if (!lr_sync->sync_range || num_rows_lr > lr_sync->rows ||
- num_lr_workers > lr_sync->num_workers ||
- MAX_MB_PLANE > lr_sync->num_planes) {
- av1_loop_restoration_dealloc(lr_sync, num_lr_workers);
- av1_loop_restoration_alloc(lr_sync, cm, num_lr_workers, num_rows_lr,
- MAX_MB_PLANE, cm->width);
+ if (is_restoration_used(cm)) {
+ // Initialize loop restoration MT object.
+ AV1LrSync *lr_sync = &mt_info->lr_row_sync;
+ int rst_unit_size;
+ if (cm->width * cm->height > 352 * 288)
+ rst_unit_size = RESTORATION_UNITSIZE_MAX;
+ else
+ rst_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
+ int num_rows_lr = av1_lr_count_units_in_tile(rst_unit_size, cm->height);
+ int num_lr_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_LR);
+ if (!lr_sync->sync_range || num_rows_lr > lr_sync->rows ||
+ num_lr_workers > lr_sync->num_workers ||
+ MAX_MB_PLANE > lr_sync->num_planes) {
+ av1_loop_restoration_dealloc(lr_sync, num_lr_workers);
+ av1_loop_restoration_alloc(lr_sync, cm, num_lr_workers, num_rows_lr,
+ MAX_MB_PLANE, cm->width);
+ }
}
#endif
@@ -742,11 +744,17 @@
alloc_compound_type_rd_buffers(&ppi->error,
&thread_data->td->comp_rd_buffer);
- for (int j = 0; j < 2; ++j) {
- AOM_CHECK_MEM_ERROR(
- &ppi->error, thread_data->td->tmp_pred_bufs[j],
- aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
- sizeof(*thread_data->td->tmp_pred_bufs[j])));
+ // The buffers 'tmp_pred_bufs[]' are used in inter frames to store
+ // temporary prediction results. Hence, the memory allocation is avoided
+ // for allintra encode.
+ if (ppi->cpi->oxcf.kf_cfg.key_freq_max != 0) {
+ for (int j = 0; j < 2; ++j) {
+ AOM_CHECK_MEM_ERROR(
+ &ppi->error, thread_data->td->tmp_pred_bufs[j],
+ aom_memalign(32,
+ 2 * MAX_MB_PLANE * MAX_SB_SQUARE *
+ sizeof(*thread_data->td->tmp_pred_bufs[j])));
+ }
}
const SPEED_FEATURES *sf = &ppi->cpi->sf;
@@ -828,7 +836,7 @@
#if CONFIG_FRAME_PARALLEL_ENCODE
// This function returns 1 if frame parallel encode is supported for
// the current configuration. Returns 0 otherwise.
-static AOM_INLINE int is_fp_config(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
+static AOM_INLINE int is_fpmt_config(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
// FPMT is enabled for AOM_Q and AOM_VBR.
// TODO(Mufaddal, Aasaipriya): Test and enable multi-tile and resize config.
if (oxcf->rc_cfg.mode == AOM_CBR || oxcf->rc_cfg.mode == AOM_CQ) {
@@ -837,7 +845,7 @@
if (ppi->use_svc) {
return 0;
}
- if (oxcf->tile_cfg.tile_columns > 0 || oxcf->tile_cfg.tile_rows > 0) {
+ if (oxcf->tile_cfg.enable_large_scale_tile) {
return 0;
}
if (oxcf->dec_model_cfg.timing_info_present) {
@@ -867,7 +875,7 @@
// based on the number of max_enc_workers.
int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
ppi->p_mt_info.num_mod_workers[MOD_FRAME_ENC] = 0;
- if (!is_fp_config(ppi, oxcf)) {
+ if (!is_fpmt_config(ppi, oxcf)) {
return 1;
}
int max_num_enc_workers =
@@ -919,7 +927,8 @@
&p_mt_info->workers[i];
AV1_COMP *cur_cpi = ppi->parallel_cpi[frame_idx];
MultiThreadInfo *mt_info = &cur_cpi->mt_info;
- const int num_planes = av1_num_planes(&cur_cpi->common);
+ AV1_COMMON *const cm = &cur_cpi->common;
+ const int num_planes = av1_num_planes(cm);
// Assign start of level 2 worker pool
mt_info->workers = &p_mt_info->workers[i];
@@ -941,24 +950,25 @@
mt_info->cdef_worker->colbuf[plane];
}
#if !CONFIG_REALTIME_ONLY
- // Back up the original LR buffers before update.
- int idx = i + mt_info->num_workers - 1;
- mt_info->restore_state_buf.rst_tmpbuf =
- mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf;
- mt_info->restore_state_buf.rlbs =
- mt_info->lr_row_sync.lrworkerdata[idx].rlbs;
+ if (is_restoration_used(cm)) {
+ // Back up the original LR buffers before update.
+ int idx = i + mt_info->num_workers - 1;
+ mt_info->restore_state_buf.rst_tmpbuf =
+ mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf;
+ mt_info->restore_state_buf.rlbs =
+ mt_info->lr_row_sync.lrworkerdata[idx].rlbs;
- // Update LR buffers.
- mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf =
- cur_cpi->common.rst_tmpbuf;
- mt_info->lr_row_sync.lrworkerdata[idx].rlbs = cur_cpi->common.rlbs;
+ // Update LR buffers.
+ mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf = cm->rst_tmpbuf;
+ mt_info->lr_row_sync.lrworkerdata[idx].rlbs = cm->rlbs;
+ }
#endif
// At this stage, the thread specific CDEF buffers for the current frame's
// 'common' and 'cdef_sync' only need to be allocated. 'cdef_worker' has
// already been allocated across parallel frames.
- av1_alloc_cdef_buffers(&cur_cpi->common, &p_mt_info->cdef_worker,
- &mt_info->cdef_sync, p_mt_info->num_workers, 0);
+ av1_alloc_cdef_buffers(cm, &p_mt_info->cdef_worker, &mt_info->cdef_sync,
+ p_mt_info->num_workers, 0);
frame_worker->hook = hook;
frame_worker->data1 = cur_cpi;
@@ -1021,7 +1031,8 @@
while (i < num_workers) {
AV1_COMP *cur_cpi = ppi->parallel_cpi[frame_idx];
MultiThreadInfo *mt_info = &cur_cpi->mt_info;
- const int num_planes = av1_num_planes(&cur_cpi->common);
+ const AV1_COMMON *const cm = &cur_cpi->common;
+ const int num_planes = av1_num_planes(cm);
// Restore the original cdef_worker pointers.
if (ppi->p_mt_info.cdef_worker != NULL) {
@@ -1031,12 +1042,14 @@
mt_info->restore_state_buf.cdef_colbuf[plane];
}
#if !CONFIG_REALTIME_ONLY
- // Restore the original LR buffers.
- int idx = i + mt_info->num_workers - 1;
- mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf =
- mt_info->restore_state_buf.rst_tmpbuf;
- mt_info->lr_row_sync.lrworkerdata[idx].rlbs =
- mt_info->restore_state_buf.rlbs;
+ if (is_restoration_used(cm)) {
+ // Restore the original LR buffers.
+ int idx = i + mt_info->num_workers - 1;
+ mt_info->lr_row_sync.lrworkerdata[idx].rst_tmpbuf =
+ mt_info->restore_state_buf.rst_tmpbuf;
+ mt_info->lr_row_sync.lrworkerdata[idx].rlbs =
+ mt_info->restore_state_buf.rlbs;
+ }
#endif
frame_idx++;
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
index bf73a60..bd75664 100644
--- a/av1/encoder/ethread.h
+++ b/av1/encoder/ethread.h
@@ -100,8 +100,6 @@
void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass);
-int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf);
-
void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
CdefSearchCtx *cdef_search_ctx);
@@ -117,6 +115,8 @@
int av1_compute_num_enc_workers(AV1_COMP *cpi, int max_workers);
#if CONFIG_FRAME_PARALLEL_ENCODE
+int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf);
+
int av1_compress_parallel_frames(AV1_PRIMARY *const ppi,
AV1_COMP_DATA *const first_cpi_data);
#endif
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 28d527a..af1e8c1 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -1271,9 +1271,9 @@
// Do not use periodic key frames.
cpi->rc.frames_to_key = INT_MAX;
- av1_set_quantizer(cm, cpi->oxcf.q_cfg.qm_minlevel,
- cpi->oxcf.q_cfg.qm_maxlevel, qindex,
- cpi->oxcf.q_cfg.enable_chroma_deltaq);
+ av1_set_quantizer(
+ cm, cpi->oxcf.q_cfg.qm_minlevel, cpi->oxcf.q_cfg.qm_maxlevel, qindex,
+ cpi->oxcf.q_cfg.enable_chroma_deltaq, cpi->oxcf.q_cfg.enable_hdr_deltaq);
av1_setup_block_planes(xd, seq_params->subsampling_x,
seq_params->subsampling_y, num_planes);
diff --git a/av1/encoder/gop_structure.c b/av1/encoder/gop_structure.c
index c9198b4..2e53c70 100644
--- a/av1/encoder/gop_structure.c
+++ b/av1/encoder/gop_structure.c
@@ -61,10 +61,12 @@
#if CONFIG_FRAME_PARALLEL_ENCODE_2
// Sets the GF_GROUP params for LF_UPDATE frames.
static AOM_INLINE void set_params_for_leaf_frames(
+ const TWO_PASS *twopass, const TWO_PASS_FRAME *twopass_frame,
+ const PRIMARY_RATE_CONTROL *p_rc, FRAME_INFO *frame_info,
GF_GROUP *const gf_group, int *cur_frame_idx, int *frame_ind,
int *parallel_frame_count, int max_parallel_frames,
int do_frame_parallel_encode, int *first_frame_index, int *cur_disp_index,
- int layer_depth) {
+ int layer_depth, int start, int end) {
gf_group->update_type[*frame_ind] = LF_UPDATE;
gf_group->arf_src_offset[*frame_ind] = 0;
gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
@@ -73,6 +75,9 @@
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
gf_group->max_layer_depth = AOMMAX(gf_group->max_layer_depth, layer_depth);
gf_group->display_idx[*frame_ind] = (*cur_disp_index);
+ gf_group->arf_boost[*frame_ind] =
+ av1_calc_arf_boost(twopass, twopass_frame, p_rc, frame_info, start,
+ end - start, 0, NULL, NULL, 0);
++(*cur_disp_index);
// Set the level of parallelism for the LF_UPDATE frame.
@@ -108,10 +113,13 @@
// Sets the GF_GROUP params for INTNL_ARF_UPDATE frames.
static AOM_INLINE void set_params_for_internal_arfs(
+ const TWO_PASS *twopass, const TWO_PASS_FRAME *twopass_frame,
+ const PRIMARY_RATE_CONTROL *p_rc, FRAME_INFO *frame_info,
GF_GROUP *const gf_group, int *cur_frame_idx, int *frame_ind,
int *parallel_frame_count, int max_parallel_frames,
int do_frame_parallel_encode, int *first_frame_index, int depth_thr,
- int *cur_disp_idx, int layer_depth, int arf_src_offset) {
+ int *cur_disp_idx, int layer_depth, int arf_src_offset, int offset,
+ int f_frames, int b_frames) {
gf_group->update_type[*frame_ind] = INTNL_ARF_UPDATE;
gf_group->arf_src_offset[*frame_ind] = arf_src_offset;
gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
@@ -120,6 +128,9 @@
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
gf_group->display_idx[*frame_ind] =
(*cur_disp_idx) + gf_group->arf_src_offset[*frame_ind];
+ gf_group->arf_boost[*frame_ind] =
+ av1_calc_arf_boost(twopass, twopass_frame, p_rc, frame_info, offset,
+ f_frames, b_frames, NULL, NULL, 0);
if (do_frame_parallel_encode) {
if (depth_thr != INT_MAX) {
@@ -162,12 +173,12 @@
// Set parameters for frames between 'start' and 'end' (excluding both).
static void set_multi_layer_params_for_fp(
- const TWO_PASS *twopass, GF_GROUP *const gf_group,
- const PRIMARY_RATE_CONTROL *p_rc, RATE_CONTROL *rc, FRAME_INFO *frame_info,
- int start, int end, int *cur_frame_idx, int *frame_ind,
- int *parallel_frame_count, int max_parallel_frames,
- int do_frame_parallel_encode, int *first_frame_index, int depth_thr,
- int *cur_disp_idx, int layer_depth) {
+ const TWO_PASS *twopass, const TWO_PASS_FRAME *twopass_frame,
+ GF_GROUP *const gf_group, const PRIMARY_RATE_CONTROL *p_rc,
+ RATE_CONTROL *rc, FRAME_INFO *frame_info, int start, int end,
+ int *cur_frame_idx, int *frame_ind, int *parallel_frame_count,
+ int max_parallel_frames, int do_frame_parallel_encode,
+ int *first_frame_index, int depth_thr, int *cur_disp_idx, int layer_depth) {
const int num_frames_to_process = end - start;
// Either we are at the last level of the pyramid, or we don't have enough
@@ -176,10 +187,11 @@
num_frames_to_process < 3) {
// Leaf nodes.
while (start < end) {
- set_params_for_leaf_frames(gf_group, cur_frame_idx, frame_ind,
+ set_params_for_leaf_frames(twopass, twopass_frame, p_rc, frame_info,
+ gf_group, cur_frame_idx, frame_ind,
parallel_frame_count, max_parallel_frames,
do_frame_parallel_encode, first_frame_index,
- cur_disp_idx, layer_depth);
+ cur_disp_idx, layer_depth, start, end);
++start;
}
} else {
@@ -188,9 +200,10 @@
// Internal ARF.
int arf_src_offset = m - start;
set_params_for_internal_arfs(
- gf_group, cur_frame_idx, frame_ind, parallel_frame_count,
- max_parallel_frames, do_frame_parallel_encode, first_frame_index,
- INT_MAX, cur_disp_idx, layer_depth, arf_src_offset);
+ twopass, twopass_frame, p_rc, frame_info, gf_group, cur_frame_idx,
+ frame_ind, parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index, INT_MAX, cur_disp_idx,
+ layer_depth, arf_src_offset, m, end - m, m - start);
// If encode reordering is enabled, configure the multi-layers accordingly
// and return. For e.g., the encode order for gf-interval 16 after
@@ -200,13 +213,19 @@
int m1 = (m + start - 1) / 2;
int m2 = (m + 1 + end) / 2;
int arf_src_offsets[2] = { m1 - start, m2 - start };
+ // Parameters to compute arf_boost.
+ int offset[2] = { m1, m2 };
+ int f_frames[2] = { m - m1, end - m2 };
+ int b_frames[2] = { m1 - start, m2 - (m + 1) };
// Set GF_GROUP params for INTNL_ARF_UPDATE frames which are reordered.
for (int i = 0; i < 2; i++) {
set_params_for_internal_arfs(
- gf_group, cur_frame_idx, frame_ind, parallel_frame_count,
- max_parallel_frames, do_frame_parallel_encode, first_frame_index,
- depth_thr, cur_disp_idx, layer_depth + 1, arf_src_offsets[i]);
+ twopass, twopass_frame, p_rc, frame_info, gf_group, cur_frame_idx,
+ frame_ind, parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index, depth_thr,
+ cur_disp_idx, layer_depth + 1, arf_src_offsets[i], offset[i],
+ f_frames[i], b_frames[i]);
}
// Initialize the start and end indices to configure LF_UPDATE frames.
@@ -219,10 +238,10 @@
// frames after reordering.
for (int i = 0; i < 4; i++) {
set_multi_layer_params_for_fp(
- twopass, gf_group, p_rc, rc, frame_info, start_idx[i], end_idx[i],
- cur_frame_idx, frame_ind, parallel_frame_count, max_parallel_frames,
- do_frame_parallel_encode, first_frame_index, depth_thr,
- cur_disp_idx, layer_depth + 2);
+ twopass, twopass_frame, gf_group, p_rc, rc, frame_info,
+ start_idx[i], end_idx[i], cur_frame_idx, frame_ind,
+ parallel_frame_count, max_parallel_frames, do_frame_parallel_encode,
+ first_frame_index, depth_thr, cur_disp_idx, layer_depth + 2);
if (layer_depth_for_intnl_overlay[i] != INVALID_IDX)
set_params_for_intnl_overlay_frames(
gf_group, cur_frame_idx, frame_ind, first_frame_index,
@@ -232,11 +251,11 @@
}
// Frames displayed before this internal ARF.
- set_multi_layer_params_for_fp(twopass, gf_group, p_rc, rc, frame_info,
- start, m, cur_frame_idx, frame_ind,
- parallel_frame_count, max_parallel_frames,
- do_frame_parallel_encode, first_frame_index,
- depth_thr, cur_disp_idx, layer_depth + 1);
+ set_multi_layer_params_for_fp(
+ twopass, twopass_frame, gf_group, p_rc, rc, frame_info, start, m,
+ cur_frame_idx, frame_ind, parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index, depth_thr, cur_disp_idx,
+ layer_depth + 1);
// Overlay for internal ARF.
set_params_for_intnl_overlay_frames(gf_group, cur_frame_idx, frame_ind,
@@ -244,11 +263,11 @@
layer_depth);
// Frames displayed after this internal ARF.
- set_multi_layer_params_for_fp(twopass, gf_group, p_rc, rc, frame_info,
- m + 1, end, cur_frame_idx, frame_ind,
- parallel_frame_count, max_parallel_frames,
- do_frame_parallel_encode, first_frame_index,
- depth_thr, cur_disp_idx, layer_depth + 1);
+ set_multi_layer_params_for_fp(
+ twopass, twopass_frame, gf_group, p_rc, rc, frame_info, m + 1, end,
+ cur_frame_idx, frame_ind, parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index, depth_thr, cur_disp_idx,
+ layer_depth + 1);
}
}
@@ -356,6 +375,8 @@
// Configures multi-layers of the GF_GROUP when consecutive encode of frames in
// the same layer depth is enbaled.
static AOM_INLINE void set_multi_layer_params_for_gf14(
+ const TWO_PASS *twopass, const TWO_PASS_FRAME *twopass_frame,
+ const PRIMARY_RATE_CONTROL *p_rc, FRAME_INFO *frame_info,
GF_GROUP *const gf_group, FRAME_REORDER_INFO *arf_frame_stats,
int *cur_frame_idx, int *frame_ind, int *count_arf_frames,
int *doh_gf_index_map, int *parallel_frame_count, int *first_frame_index,
@@ -391,9 +412,13 @@
// LF_UPDATE frame.
if (doh_gf_index_map[i] == INVALID_IDX) {
// LF_UPDATE frames.
- set_params_for_leaf_frames(gf_group, cur_frame_idx, frame_ind,
- parallel_frame_count, max_parallel_frames, 1,
- first_frame_index, cur_disp_index, layer);
+ // TODO(Remya): Correct start and end parameters passed to
+ // set_params_for_leaf_frames() once encode reordering for gf-interval 14
+ // is enbaled for parallel encode of lower layer frames.
+ set_params_for_leaf_frames(
+ twopass, twopass_frame, p_rc, frame_info, gf_group, cur_frame_idx,
+ frame_ind, parallel_frame_count, max_parallel_frames, 1,
+ first_frame_index, cur_disp_index, layer, 0, 0);
} else {
// In order to obtain the layer depths of INTNL_OVERLAY_UPDATE frames, get
// the gf index of corresponding INTNL_ARF_UPDATE frames.
@@ -529,7 +554,9 @@
#if CONFIG_FRAME_PARALLEL_ENCODE
#if CONFIG_FRAME_PARALLEL_ENCODE_2
// Set the display order hint for the first frame in the GF_GROUP.
- int cur_disp_index = cpi->common.current_frame.frame_number;
+ int cur_disp_index = (first_frame_update_type == KF_UPDATE)
+ ? 0
+ : cpi->common.current_frame.frame_number;
#endif // CONFIG_FRAME_PARALLEL_ENCODE_2
#endif // CONFIG_FRAME_PARALLEL_ENCODE
@@ -686,9 +713,10 @@
// 7-> 3-> 10-> 5-> 12-> 1-> 2-> 4-> 6-> 8-> 9-> 11-> 13.
// TODO(Remya): Set GF_GROUP param 'arf_boost' for all frames.
set_multi_layer_params_for_gf14(
- gf_group, arf_frame_stats, &cur_frame_index, &frame_index,
- &count_arf_frames, doh_gf_index_map, ¶llel_frame_count,
- &first_frame_index, &cur_disp_index, actual_gf_length, use_altref + 1,
+ twopass, &cpi->twopass_frame, p_rc, frame_info, gf_group,
+ arf_frame_stats, &cur_frame_index, &frame_index, &count_arf_frames,
+ doh_gf_index_map, ¶llel_frame_count, &first_frame_index,
+ &cur_disp_index, actual_gf_length, use_altref + 1,
cpi->ppi->num_fp_contexts);
// Set gf_group->skip_frame_refresh.
@@ -711,12 +739,12 @@
int depth_thr =
(actual_gf_length == 16) ? 3 : (actual_gf_length == 32) ? 4 : INT_MAX;
- // TODO(Remya): Set GF_GROUP param 'arf_boost' for all frames.
set_multi_layer_params_for_fp(
- twopass, gf_group, p_rc, rc, frame_info, cur_frame_index, gf_interval,
- &cur_frame_index, &frame_index, ¶llel_frame_count,
- cpi->ppi->num_fp_contexts, do_frame_parallel_encode,
- &first_frame_index, depth_thr, &cur_disp_index, use_altref + 1);
+ twopass, &cpi->twopass_frame, gf_group, p_rc, rc, frame_info,
+ cur_frame_index, gf_interval, &cur_frame_index, &frame_index,
+ ¶llel_frame_count, cpi->ppi->num_fp_contexts,
+ do_frame_parallel_encode, &first_frame_index, depth_thr,
+ &cur_disp_index, use_altref + 1);
}
is_multi_layer_configured = 1;
}
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 902b699..e25d44d 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -149,6 +149,11 @@
* MACROBLOCK::pred_sse due to different interpolation filter used.
*/
unsigned int best_single_sse_in_refs[REF_FRAMES];
+ /*!
+ * Holds the sse of best mode so far in the mode evaluation process. This is
+ * used in intermediate termination of NEWMV mode evaluation.
+ */
+ unsigned int best_pred_sse;
} HandleInterModeArgs;
/*!\cond */
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index 25a6e2c..4e86b77 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1118,49 +1118,49 @@
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) /
- 16);
+ 16.0);
rec_var += log(
1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
xd->plane[0].dst.stride,
CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) /
- 16);
+ 16.0);
} else {
src_var +=
log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride, all_zeros, 0, &sse) /
- 16);
+ 16.0);
rec_var += log(
1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
xd->plane[0].dst.stride, all_zeros, 0, &sse) /
- 16);
+ 16.0);
}
}
}
src_var /= (double)blocks;
rec_var /= (double)blocks;
- // Only take action when the spatial complexity is low
- if ((rec_var < threshold) || (src_var < threshold)) {
- // Dont allow 0 to prevent / 0 below.
- src_var += 0.000001;
- rec_var += 0.000001;
+ // Dont allow 0 to prevent / 0 below.
+ src_var += 0.000001;
+ rec_var += 0.000001;
- // Heavier weigth if the reconstruction has lower variance.
- if (src_var >= rec_var) {
- var_diff = (src_var - rec_var) * 2;
- variance_rd_factor = 1.0 + (var_diff / src_var);
- } else {
- var_diff = (rec_var - src_var) / 2;
- variance_rd_factor = 1.0 + (var_diff / src_var);
+ if (src_var >= rec_var) {
+ var_diff = (src_var - rec_var);
+ if ((var_diff > 0.5) && (rec_var < threshold)) {
+ variance_rd_factor = 1.0 + ((var_diff * 2) / src_var);
}
-
- // Limit adjustment;
- variance_rd_factor = AOMMIN(3.0, variance_rd_factor);
+ } else {
+ var_diff = (rec_var - src_var);
+ if ((var_diff > 0.5) && (src_var < threshold)) {
+ variance_rd_factor = 1.0 + (var_diff / (2 * src_var));
+ }
}
+ // Limit adjustment;
+ variance_rd_factor = AOMMIN(3.0, variance_rd_factor);
+
return variance_rd_factor;
}
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 1a53c23..ef7cb1f 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -41,9 +41,14 @@
mv_cost_params->mv_cost_type = MV_COST_ENTROPY;
mv_cost_params->error_per_bit = errorperbit;
mv_cost_params->sad_per_bit = sadperbit;
- mv_cost_params->mvjcost = mv_costs->nmv_joint_cost;
- mv_cost_params->mvcost[0] = mv_costs->mv_cost_stack[0];
- mv_cost_params->mvcost[1] = mv_costs->mv_cost_stack[1];
+ // For allintra encoding mode, 'mv_costs' is not allocated. Hence, the
+ // population of mvjcost and mvcost are avoided. In case of IntraBC, these
+ // values are populated from 'dv_costs' in av1_set_ms_to_intra_mode().
+ if (mv_costs != NULL) {
+ mv_cost_params->mvjcost = mv_costs->nmv_joint_cost;
+ mv_cost_params->mvcost[0] = mv_costs->mv_cost_stack[0];
+ mv_cost_params->mvcost[1] = mv_costs->mv_cost_stack[1];
+ }
}
static INLINE void init_ms_buffers(MSBuffers *ms_buffers, const MACROBLOCK *x) {
diff --git a/av1/encoder/mv_prec.h b/av1/encoder/mv_prec.h
index 11dcdd8..55108b6 100644
--- a/av1/encoder/mv_prec.h
+++ b/av1/encoder/mv_prec.h
@@ -33,6 +33,9 @@
AV1_COMP *cpi, int allow_high_precision_mv,
int cur_frame_force_integer_mv) {
MvCosts *const mv_costs = cpi->td.mb.mv_costs;
+ // Avoid accessing 'mv_costs' when it is not allocated.
+ if (mv_costs == NULL) return;
+
const int copy_hp = cpi->common.features.allow_high_precision_mv =
allow_high_precision_mv && !cur_frame_force_integer_mv;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 3d710dd..5ee5761 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -757,13 +757,14 @@
int eob_cost = 0;
const int bw = 4 * num_4x4_w;
const int bh = 4 * num_4x4_h;
+ const int use_hbd = is_cur_buf_hbd(xd);
(void)mi_row;
(void)mi_col;
(void)cpi;
#if CONFIG_AV1_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (use_hbd) {
aom_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
p->src.stride, pd->dst.buf, pd->dst.stride,
x->e_mbd.bd);
@@ -784,14 +785,15 @@
if (c < max_blocks_wide) {
const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
const int block_offset = BLOCK_OFFSET(block);
+ int16_t *const low_coeff = (int16_t *)p->coeff + block_offset;
+ int16_t *const low_qcoeff = (int16_t *)p->qcoeff + block_offset;
+ int16_t *const low_dqcoeff = (int16_t *)p->dqcoeff + block_offset;
#if CONFIG_AV1_HIGHBITDEPTH
tran_low_t *const coeff = p->coeff + block_offset;
tran_low_t *const qcoeff = p->qcoeff + block_offset;
tran_low_t *const dqcoeff = p->dqcoeff + block_offset;
#else
- int16_t *const low_coeff = (int16_t *)p->coeff + block_offset;
- int16_t *const low_qcoeff = (int16_t *)p->qcoeff + block_offset;
- int16_t *const low_dqcoeff = (int16_t *)p->dqcoeff + block_offset;
+ (void)use_hbd;
#endif
uint16_t *const eob = &p->eobs[block];
const int diff_stride = bw;
@@ -805,28 +807,53 @@
case TX_32X32:
assert(0); // Not used
break;
+
#if CONFIG_AV1_HIGHBITDEPTH
case TX_16X16:
- aom_hadamard_16x16(src_diff, diff_stride, coeff);
- av1_quantize_fp(coeff, 16 * 16, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
- dqcoeff, p->dequant_QTX, eob, scan_order->scan,
- scan_order->iscan);
+ if (use_hbd) {
+ aom_hadamard_16x16(src_diff, diff_stride, coeff);
+ av1_quantize_fp(coeff, 16 * 16, p->zbin_QTX, p->round_fp_QTX,
+ p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
+ dqcoeff, p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ } else {
+ aom_hadamard_lp_16x16(src_diff, diff_stride, low_coeff);
+ av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX,
+ p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
+ p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ }
break;
case TX_8X8:
- aom_hadamard_8x8(src_diff, diff_stride, coeff);
- av1_quantize_fp(coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
- dqcoeff, p->dequant_QTX, eob, scan_order->scan,
- scan_order->iscan);
+ if (use_hbd) {
+ aom_hadamard_8x8(src_diff, diff_stride, coeff);
+ av1_quantize_fp(coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX,
+ p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
+ dqcoeff, p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ } else {
+ aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
+ av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX,
+ p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
+ p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ }
break;
default:
assert(tx_size == TX_4X4);
- aom_fdct4x4(src_diff, coeff, diff_stride);
- av1_quantize_fp(coeff, 4 * 4, p->zbin_QTX, p->round_fp_QTX,
- p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
- dqcoeff, p->dequant_QTX, eob, scan_order->scan,
- scan_order->iscan);
+ if (use_hbd) {
+ aom_fdct4x4(src_diff, coeff, diff_stride);
+ av1_quantize_fp(coeff, 4 * 4, p->zbin_QTX, p->round_fp_QTX,
+ p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
+ dqcoeff, p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ } else {
+ aom_fdct4x4_lp(src_diff, low_coeff, diff_stride);
+ av1_quantize_lp(low_coeff, 4 * 4, p->round_fp_QTX,
+ p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
+ p->dequant_QTX, eob, scan_order->scan,
+ scan_order->iscan);
+ }
break;
#else
case TX_16X16:
@@ -876,18 +903,32 @@
const int block_offset = BLOCK_OFFSET(block);
uint16_t *const eob = &p->eobs[block];
#if CONFIG_AV1_HIGHBITDEPTH
- int64_t dummy;
- tran_low_t *const coeff = p->coeff + block_offset;
- tran_low_t *const qcoeff = p->qcoeff + block_offset;
- tran_low_t *const dqcoeff = p->dqcoeff + block_offset;
+ if (use_hbd) {
+ int64_t dummy;
+ tran_low_t *const coeff = p->coeff + block_offset;
+ tran_low_t *const qcoeff = p->qcoeff + block_offset;
+ tran_low_t *const dqcoeff = p->dqcoeff + block_offset;
- if (*eob == 1)
- this_rdc->rate += (int)abs(qcoeff[0]);
- else if (*eob > 1)
- this_rdc->rate += aom_satd(qcoeff, step << 4);
+ if (*eob == 1)
+ this_rdc->rate += (int)abs(qcoeff[0]);
+ else if (*eob > 1)
+ this_rdc->rate += aom_satd(qcoeff, step << 4);
- this_rdc->dist +=
- av1_block_error(coeff, dqcoeff, step << 4, &dummy) >> 2;
+ this_rdc->dist +=
+ av1_block_error(coeff, dqcoeff, step << 4, &dummy) >> 2;
+ } else {
+ int16_t *const low_coeff = (int16_t *)p->coeff + block_offset;
+ int16_t *const low_qcoeff = (int16_t *)p->qcoeff + block_offset;
+ int16_t *const low_dqcoeff = (int16_t *)p->dqcoeff + block_offset;
+
+ if (*eob == 1)
+ this_rdc->rate += (int)abs(low_qcoeff[0]);
+ else if (*eob > 1)
+ this_rdc->rate += aom_satd_lp(low_qcoeff, step << 4);
+
+ this_rdc->dist +=
+ av1_block_error_lp(low_coeff, low_dqcoeff, step << 4) >> 2;
+ }
#else
int16_t *const low_coeff = (int16_t *)p->coeff + block_offset;
int16_t *const low_qcoeff = (int16_t *)p->qcoeff + block_offset;
@@ -2367,11 +2408,7 @@
AV1_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
int denoise_svc_pickmode = 1;
- const int resize_pending =
- (cpi->resize_pending_params.width && cpi->resize_pending_params.height &&
- (cpi->common.width != cpi->resize_pending_params.width ||
- cpi->common.height != cpi->resize_pending_params.height));
-
+ const int resize_pending = is_frame_resize_pending(cpi);
#endif
x->color_sensitivity[0] = x->color_sensitivity_sb[0];
x->color_sensitivity[1] = x->color_sensitivity_sb[1];
@@ -2728,8 +2765,11 @@
mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_SMOOTH);
}
}
-
- av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
+ if (!comp_pred)
+ av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
+ else
+ av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+ 0);
if (use_model_yrd_large) {
model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd, &this_rdc,
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 7582517..001c4c1 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -1660,11 +1660,11 @@
part_sf->adjust_var_based_rd_partitioning > 3)
return 0;
- const int is_larger_qindex = cm->quant_params.base_qindex > 190;
if (part_sf->adjust_var_based_rd_partitioning == 1) {
- return !frame_is_intra_only(cm) && is_larger_qindex && bsize <= BLOCK_32X32;
+ return bsize <= BLOCK_32X32;
} else {
if (bsize <= BLOCK_32X32) return 1;
+ const int is_larger_qindex = cm->quant_params.base_qindex > 190;
if (part_sf->adjust_var_based_rd_partitioning == 2) {
const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
return is_360p_or_larger && is_larger_qindex && bsize == BLOCK_64X64;
@@ -2083,7 +2083,8 @@
}
if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
}
- if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm)
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm &&
+ !cpi->cyclic_refresh->skip_over4x4)
av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize);
// TODO(Ravi/Remya): Move this copy function to a better logical place
// This function will copy the best mode information from block
@@ -3939,6 +3940,7 @@
fscanf(pfile, "%d,%d,%d", &read_bsize, &num_nodes, &num_configs);
assert(read_bsize == cpi->common.seq_params->sb_size);
BLOCK_SIZE bsize = (BLOCK_SIZE)read_bsize;
+ assert(bsize == pc_tree->block_size);
PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
int last_idx = 1;
@@ -3950,7 +3952,10 @@
assert(partitioning >= PARTITION_NONE &&
partitioning < EXT_PARTITION_TYPES);
PC_TREE *node = tree_node_queue[q_idx];
- if (node != NULL) node->partitioning = partitioning;
+ if (node != NULL) {
+ node->partitioning = partitioning;
+ bsize = node->block_size;
+ }
if (partitioning == PARTITION_SPLIT) {
const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
for (int i = 0; i < 4; ++i) {
@@ -3961,7 +3966,6 @@
++last_idx;
}
}
- bsize = subsize;
}
--num_nodes;
++q_idx;
@@ -4102,10 +4106,10 @@
}
static void prepare_sb_features_before_search(
- AV1_COMP *const cpi, ThreadData *td, int mi_row, int mi_col,
- const BLOCK_SIZE bsize, aom_partition_features_t *features) {
- av1_collect_motion_search_features_sb(cpi, td, mi_row, mi_col, bsize,
- features);
+ AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row,
+ int mi_col, const BLOCK_SIZE bsize, aom_partition_features_t *features) {
+ av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
+ bsize, features);
collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, features);
}
@@ -4130,7 +4134,10 @@
assert(partitioning >= PARTITION_NONE &&
partitioning < EXT_PARTITION_TYPES);
PC_TREE *node = tree_node_queue[q_idx];
- if (node != NULL) node->partitioning = partitioning;
+ if (node != NULL) {
+ node->partitioning = partitioning;
+ bsize = node->block_size;
+ }
if (partitioning == PARTITION_SPLIT) {
const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
for (int i = 0; i < 4; ++i) {
@@ -4141,7 +4148,6 @@
++last_idx;
}
}
- bsize = subsize;
}
--num_nodes;
++q_idx;
@@ -4159,7 +4165,8 @@
MACROBLOCK *const x = &td->mb;
ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
aom_partition_features_t features;
- prepare_sb_features_before_search(cpi, td, mi_row, mi_col, bsize, &features);
+ prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
+ &features);
features.mi_row = mi_row;
features.mi_col = mi_col;
features.frame_width = cpi->frame_info.frame_width;
@@ -4201,6 +4208,63 @@
return true;
}
+// Use a bitmask to represent the valid partition types for the current
+// block. "1" represents the corresponding partition type is vaild.
+// The least significant bit represents "PARTITION_NONE", the
+// largest significant bit represents "PARTITION_VERT_4", follow
+// the enum order for PARTITION_TYPE in "enums.h"
+static int get_valid_partition_types(
+ const AV1_COMP *const cpi,
+ const PartitionSearchState *const part_search_state,
+ const BLOCK_SIZE bsize) {
+ const PartitionCfg *const part_cfg = &cpi->oxcf.part_cfg;
+ const PartitionBlkParams blk_params = part_search_state->part_blk_params;
+ int valid_types = 0;
+ // PARTITION_NONE
+ valid_types |= (part_search_state->partition_none_allowed << 0);
+ // PARTITION_HORZ
+ valid_types |= (part_search_state->partition_rect_allowed[HORZ] << 1);
+ // PARTITION_VERT
+ valid_types |= (part_search_state->partition_rect_allowed[VERT] << 2);
+ // PARTITION_SPLIT
+ valid_types |= (part_search_state->do_square_split << 3);
+ // PARTITION_HORZ_A
+ const int ext_partition_allowed = part_search_state->do_rectangular_split &&
+ av1_blk_has_rows_and_cols(&blk_params);
+ const int horzab_partition_allowed =
+ ext_partition_allowed && part_cfg->enable_ab_partitions &&
+ part_search_state->partition_rect_allowed[HORZ];
+ valid_types |= (horzab_partition_allowed << 4);
+ // PARTITION_HORZ_B
+ valid_types |= (horzab_partition_allowed << 5);
+ // PARTITION_VERT_A
+ const int vertab_partition_allowed =
+ ext_partition_allowed && part_cfg->enable_ab_partitions &&
+ part_search_state->partition_rect_allowed[VERT];
+ valid_types |= (vertab_partition_allowed << 6);
+ // PARTITION_VERT_B
+ valid_types |= (vertab_partition_allowed << 7);
+ // PARTITION_HORZ_4
+ const int partition4_allowed = part_cfg->enable_1to4_partitions &&
+ ext_partition_allowed &&
+ bsize != BLOCK_128X128;
+ const int horz4_allowed =
+ partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
+ get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4),
+ part_search_state->ss_x,
+ part_search_state->ss_y) != BLOCK_INVALID;
+ valid_types |= (horz4_allowed << 8);
+ // PARTITION_VERT_4
+ const int vert4_allowed =
+ partition4_allowed && part_search_state->partition_rect_allowed[HORZ] &&
+ get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4),
+ part_search_state->ss_x,
+ part_search_state->ss_y) != BLOCK_INVALID;
+ valid_types |= (vert4_allowed << 9);
+
+ return valid_types;
+}
+
static bool recursive_partition(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TokenExtra **tp,
SIMPLE_MOTION_DATA_TREE *sms_root,
@@ -4209,24 +4273,13 @@
const AV1_COMMON *const cm = &cpi->common;
ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
if (mi_row >= cm->mi_params.mi_rows || mi_col >= cm->mi_params.mi_cols) {
return false;
}
aom_partition_decision_t partition_decision;
do {
- aom_partition_features_t features;
- features.mi_row = mi_row;
- features.mi_col = mi_col;
- features.frame_width = cpi->frame_info.frame_width;
- features.frame_height = cpi->frame_info.frame_height;
- features.block_size = bsize;
- av1_ext_part_send_features(ext_part_controller, &features);
- const bool valid_decision = av1_ext_part_get_partition_decision(
- ext_part_controller, &partition_decision);
- if (!valid_decision) return false;
- pc_tree->partitioning = partition_decision.current_decision;
PartitionSearchState part_search_state;
-
// Initialization of state variables used in partition search.
// TODO(chengchen): check if there is hidden conditions that don't allow
// all possible partition types.
@@ -4237,6 +4290,37 @@
PartitionBlkParams blk_params = part_search_state.part_blk_params;
if (!av1_blk_has_rows_and_cols(&blk_params))
set_partition_cost_for_edge_blk(cm, &part_search_state);
+ const int orig_rdmult = x->rdmult;
+ setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
+ const int valid_partition_types =
+ get_valid_partition_types(cpi, &part_search_state, bsize);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
+ const int qindex = av1_get_qindex(&cm->seg, xd->mi[0]->segment_id,
+ cm->quant_params.base_qindex);
+ // RD multiplier
+ const int rdmult = x->rdmult;
+ // pyramid level
+ const int pyramid_level =
+ cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index];
+ x->rdmult = orig_rdmult;
+
+ aom_partition_features_t features;
+ features.mi_row = mi_row;
+ features.mi_col = mi_col;
+ features.frame_width = cpi->frame_info.frame_width;
+ features.frame_height = cpi->frame_info.frame_height;
+ features.block_size = bsize;
+ features.valid_partition_types = valid_partition_types;
+ features.update_type = update_type;
+ features.qindex = qindex;
+ features.rdmult = rdmult;
+ features.pyramid_level = pyramid_level;
+ av1_ext_part_send_features(ext_part_controller, &features);
+ const bool valid_decision = av1_ext_part_get_partition_decision(
+ ext_part_controller, &partition_decision);
+ if (!valid_decision) return false;
+ pc_tree->partitioning = partition_decision.current_decision;
av1_init_rd_stats(this_rdcost);
if (partition_decision.current_decision == PARTITION_SPLIT) {
@@ -4249,9 +4333,8 @@
pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
pc_tree->split[i]->index = i;
}
- const int orig_rdmult = x->rdmult;
+ const int orig_rdmult_tmp = x->rdmult;
setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
- (void)orig_rdmult;
// TODO(chengchen): check boundary conditions
// top-left
recursive_partition(cpi, td, tile_data, tp, sms_root, pc_tree->split[0],
@@ -4276,7 +4359,7 @@
this_rdcost->dist += split_rdc[i].dist;
av1_rd_cost_update(x->rdmult, this_rdcost);
}
- x->rdmult = orig_rdmult;
+ x->rdmult = orig_rdmult_tmp;
} else {
*this_rdcost = rd_search_for_fixed_partition(
cpi, td, tile_data, tp, sms_root, mi_row, mi_col, bsize, pc_tree);
@@ -4311,7 +4394,8 @@
MACROBLOCK *const x = &td->mb;
ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
aom_partition_features_t features;
- prepare_sb_features_before_search(cpi, td, mi_row, mi_col, bsize, &features);
+ prepare_sb_features_before_search(cpi, td, tile_data, mi_row, mi_col, bsize,
+ &features);
features.mi_row = mi_row;
features.mi_col = mi_col;
features.frame_width = cpi->frame_info.frame_width;
@@ -4437,7 +4521,7 @@
const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow;
// Initialize min to a large value and max to 0 at
- *var_min = 10.0;
+ *var_min = 99.0;
*var_max = 0.0;
for (i = 0; i < bh; i += 4) {
@@ -4448,13 +4532,13 @@
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) /
- 16);
+ 16.0);
} else {
var =
log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride, all_zeros, 0, &sse) /
- 16);
+ 16.0);
}
*var_min = AOMMIN(*var_min, var);
*var_max = AOMMAX(*var_max, var);
@@ -4579,8 +4663,8 @@
// av1_get_max_min_partition_features().
if (COLLECT_MOTION_SEARCH_FEATURE_SB && !frame_is_intra_only(cm) &&
bsize == cm->seq_params->sb_size) {
- av1_collect_motion_search_features_sb(cpi, td, mi_row, mi_col, bsize,
- /*features=*/NULL);
+ av1_collect_motion_search_features_sb(cpi, td, tile_data, mi_row, mi_col,
+ bsize, /*features=*/NULL);
collect_tpl_stats_sb(cpi, bsize, mi_row, mi_col, /*features=*/NULL);
}
@@ -4648,7 +4732,7 @@
double var_min, var_max;
log_sub_block_var(cpi, x, bsize, &var_min, &var_max);
- if ((var_min < 0.5) && ((var_max - var_min) > 3.0)) {
+ if ((var_min < 0.272) && ((var_max - var_min) > 3.0)) {
part_search_state.partition_none_allowed = 0;
part_search_state.terminate_partition_search = 0;
part_search_state.do_square_split = 1;
@@ -4687,7 +4771,7 @@
// newmv mode and is skippable.
if ((cpi->sf.part_sf.skip_non_sq_part_based_on_none >= 2) &&
(pc_tree->none != NULL)) {
- if (is_inter_mode(pc_tree->none->mic.mode) &&
+ if (x->qindex <= 200 && is_inter_mode(pc_tree->none->mic.mode) &&
!have_newmv_in_inter_mode(pc_tree->none->mic.mode) &&
pc_tree->none->skippable && !x->must_find_valid_partition &&
bsize >= BLOCK_16X16)
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 9e9e8eb..91d1a74 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -115,6 +115,7 @@
snprintf(filename, sizeof(filename), "%s/%s", path,
get_feature_file_name(id));
FILE *pfile = fopen(filename, "a");
+ if (pfile == NULL) return;
if (!is_test_mode) {
fprintf(pfile, "%d,%d,%d,%d,%d\n", id, bsize, mi_row, mi_col, feature_size);
}
@@ -2296,6 +2297,7 @@
}
void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td,
+ TileDataEnc *tile_data,
const int mi_row, const int mi_col,
const BLOCK_SIZE bsize,
aom_partition_features_t *features) {
@@ -2308,6 +2310,8 @@
const int row_step = mi_size_high[fixed_block_size];
SIMPLE_MOTION_DATA_TREE *sms_tree = NULL;
SIMPLE_MOTION_DATA_TREE *sms_root = setup_sms_tree(cpi, sms_tree);
+ TileInfo *const tile_info = &tile_data->tile_info;
+ av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, bsize);
av1_init_simple_motion_search_mvs_for_sb(cpi, NULL, x, sms_root, mi_row,
mi_col);
av1_reset_simple_motion_tree_partition(sms_root, bsize);
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index 1958abb..f7daf37 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h
@@ -128,6 +128,7 @@
int *ab_partitions_allowed);
void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td,
+ TileDataEnc *tile_data,
const int mi_row, const int mi_col,
const BLOCK_SIZE bsize,
aom_partition_features_t *features);
diff --git a/av1/encoder/pass2_strategy.c b/av1/encoder/pass2_strategy.c
index b0125f2..134c6ce 100644
--- a/av1/encoder/pass2_strategy.c
+++ b/av1/encoder/pass2_strategy.c
@@ -1002,7 +1002,7 @@
gf_group->update_type[gf_group->arf_index];
int is_forward_keyframe = 0;
av1_temporal_filter(cpi, arf_src_index, arf_update_type,
- is_forward_keyframe, NULL);
+ is_forward_keyframe, NULL, &cpi->ppi->alt_ref_buffer);
aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer,
av1_num_planes(&cpi->common));
}
@@ -4051,5 +4051,10 @@
}
}
}
+
+ // Update framerate obtained from parallel encode frames
+ if (cpi->common.show_frame &&
+ cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ cpi->framerate = cpi->new_framerate;
#endif
}
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 4346caf..676c55d 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -28,9 +28,11 @@
int *pri_strength,
int *sec_strength,
int strength_idx) {
- const int tot_sec_filter = (pick_method >= CDEF_FAST_SEARCH_LVL3)
- ? REDUCED_SEC_STRENGTHS_LVL3
- : CDEF_SEC_STRENGTHS;
+ const int tot_sec_filter =
+ (pick_method == CDEF_FAST_SEARCH_LVL5)
+ ? REDUCED_SEC_STRENGTHS_LVL5
+ : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
+ : CDEF_SEC_STRENGTHS);
const int pri_idx = strength_idx / tot_sec_filter;
const int sec_idx = strength_idx % tot_sec_filter;
*pri_strength = pri_idx;
@@ -48,6 +50,10 @@
*pri_strength = priconv_lvl4[pri_idx];
*sec_strength = secconv_lvl3[sec_idx];
break;
+ case CDEF_FAST_SEARCH_LVL5:
+ *pri_strength = priconv_lvl5[pri_idx];
+ *sec_strength = secconv_lvl5[sec_idx];
+ break;
default: assert(0 && "Invalid CDEF search method");
}
}
@@ -153,7 +159,7 @@
CDEF_PICK_METHOD pick_method) {
uint64_t best_tot_mse;
int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
- pick_method <= CDEF_FAST_SEARCH_LVL4);
+ pick_method <= CDEF_FAST_SEARCH_LVL5);
int i;
best_tot_mse = (uint64_t)1 << 63;
/* Greedy search: add one strength options at a time. */
@@ -574,7 +580,7 @@
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int damping = 3 + (cm->quant_params.base_qindex >> 6);
const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
- pick_method <= CDEF_FAST_SEARCH_LVL4);
+ pick_method <= CDEF_FAST_SEARCH_LVL5);
const int num_planes = av1_num_planes(cm);
CdefSearchCtx cdef_search_ctx;
// Initialize parameters related to CDEF search context.
@@ -642,7 +648,6 @@
mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
best_gi;
}
-
if (fast) {
for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
const int luma_strength = cdef_info->cdef_strengths[j];
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index 6bea1b0..a287870 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -24,6 +24,7 @@
#define REDUCED_PRI_STRENGTHS_LVL1 8
#define REDUCED_PRI_STRENGTHS_LVL2 5
#define REDUCED_SEC_STRENGTHS_LVL3 2
+#define REDUCED_SEC_STRENGTHS_LVL5 1
#define REDUCED_PRI_STRENGTHS_LVL4 2
#define REDUCED_TOTAL_STRENGTHS_LVL1 \
@@ -34,19 +35,24 @@
(REDUCED_PRI_STRENGTHS_LVL2 * REDUCED_SEC_STRENGTHS_LVL3)
#define REDUCED_TOTAL_STRENGTHS_LVL4 \
(REDUCED_PRI_STRENGTHS_LVL4 * REDUCED_SEC_STRENGTHS_LVL3)
+#define REDUCED_TOTAL_STRENGTHS_LVL5 \
+ (REDUCED_PRI_STRENGTHS_LVL4 * REDUCED_SEC_STRENGTHS_LVL5)
#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
static const int priconv_lvl1[REDUCED_PRI_STRENGTHS_LVL1] = { 0, 1, 2, 3,
5, 7, 10, 13 };
static const int priconv_lvl2[REDUCED_PRI_STRENGTHS_LVL2] = { 0, 2, 4, 8, 14 };
static const int priconv_lvl4[REDUCED_PRI_STRENGTHS_LVL4] = { 0, 11 };
+static const int priconv_lvl5[REDUCED_PRI_STRENGTHS_LVL4] = { 0, 5 };
static const int secconv_lvl3[REDUCED_SEC_STRENGTHS_LVL3] = { 0, 2 };
+static const int secconv_lvl5[REDUCED_SEC_STRENGTHS_LVL5] = { 0 };
static const int nb_cdef_strengths[CDEF_PICK_METHODS] = {
TOTAL_STRENGTHS,
REDUCED_TOTAL_STRENGTHS_LVL1,
REDUCED_TOTAL_STRENGTHS_LVL2,
REDUCED_TOTAL_STRENGTHS_LVL3,
REDUCED_TOTAL_STRENGTHS_LVL4,
+ REDUCED_TOTAL_STRENGTHS_LVL5,
TOTAL_STRENGTHS
};
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 7608749..75c1f8d 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -84,15 +84,14 @@
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int partial_frame,
- const int *last_frame_filter_level,
- double *best_cost_ret, int plane, int dir) {
+ const int *last_frame_filter_level, int plane,
+ int dir) {
const AV1_COMMON *const cm = &cpi->common;
const int min_filter_level = 0;
const int max_filter_level = av1_get_max_filter_level(cpi);
int filt_direction = 0;
int64_t best_err;
int filt_best;
- MACROBLOCK *x = &cpi->td.mb;
// Start the search at the previous frame filter level unless it is now out of
// range.
@@ -187,18 +186,13 @@
}
}
- // Update best error
- best_err = ss_err[filt_best];
-
- if (best_cost_ret)
- *best_cost_ret = RDCOST_DBL_WITH_NATIVE_BD_DIST(
- x->rdmult, 0, (best_err << 4), cm->seq_params->bit_depth);
return filt_best;
}
void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
LPF_PICK_METHOD method) {
AV1_COMMON *const cm = &cpi->common;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int num_planes = av1_num_planes(cm);
struct loopfilter *const lf = &cm->lf;
(void)sd;
@@ -213,7 +207,7 @@
const int min_filter_level = 0;
const int max_filter_level = av1_get_max_filter_level(cpi);
const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
- cm->seq_params->bit_depth);
+ seq_params->bit_depth);
// based on tests result for rtc test set
// 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point
const int strength_boost_q_treshold = 0;
@@ -231,7 +225,7 @@
// And high bit depth separately:
// filt_guess = q * 0.316206 + 3.87252
int filt_guess;
- switch (cm->seq_params->bit_depth) {
+ switch (seq_params->bit_depth) {
case AOM_BITS_8:
filt_guess =
(cm->current_frame.frame_type == KEY_FRAME)
@@ -250,7 +244,7 @@
"or AOM_BITS_12");
return;
}
- if (cm->seq_params->bit_depth != AOM_BITS_8 &&
+ if (seq_params->bit_depth != AOM_BITS_8 &&
cm->current_frame.frame_type == KEY_FRAME)
filt_guess -= 4;
// TODO(chengchen): retrain the model for Y, U, V filter levels
@@ -273,26 +267,35 @@
last_frame_filter_level[3] = lf->filter_level_v;
#endif
}
+ // The frame buffer last_frame_uf is used to store the non-loop filtered
+ // reconstructed frame in search_filter_level().
+ if (aom_realloc_frame_buffer(
+ &cpi->last_frame_uf, cm->width, cm->height,
+ seq_params->subsampling_x, seq_params->subsampling_y,
+ seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment, NULL, NULL, NULL, 0))
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+ "Failed to allocate last frame buffer");
lf->filter_level[0] = lf->filter_level[1] =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 2);
+ last_frame_filter_level, 0, 2);
if (method != LPF_PICK_FROM_FULL_IMAGE_NON_DUAL) {
lf->filter_level[0] =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 0);
+ last_frame_filter_level, 0, 0);
lf->filter_level[1] =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 0, 1);
+ last_frame_filter_level, 0, 1);
}
if (num_planes > 1) {
lf->filter_level_u =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 1, 0);
+ last_frame_filter_level, 1, 0);
lf->filter_level_v =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
- last_frame_filter_level, NULL, 2, 0);
+ last_frame_filter_level, 2, 0);
}
}
}
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index eebaf39..9ee0c3a 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c
@@ -412,7 +412,7 @@
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1_COMMON *const cm = &cpi->common;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const int max_delta = 16;
const int change_avg_frame_bandwidth =
abs(rc->avg_frame_bandwidth - rc->prev_avg_frame_bandwidth) >
@@ -427,8 +427,7 @@
if (cm->current_frame.frame_type != KEY_FRAME && !cpi->ppi->use_svc &&
rc->frames_since_key > 1 && !change_target_bits_mb &&
(!cpi->oxcf.rc_cfg.gf_cbr_boost_pct ||
- !(refresh_frame_flags->alt_ref_frame ||
- refresh_frame_flags->golden_frame))) {
+ !(refresh_frame->alt_ref_frame || refresh_frame->golden_frame))) {
// Make sure q is between oscillating Qs to prevent resonance.
if (rc->rc_1_frame * rc->rc_2_frame == -1 &&
rc->q_1_frame != rc->q_2_frame) {
@@ -504,7 +503,7 @@
int height) {
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
double rcf;
double rate_correction_factors_kfstd;
double rate_correction_factors_gfarfstd;
@@ -545,8 +544,7 @@
#endif
rcf = rate_correction_factors_rflvl;
} else {
- if ((refresh_frame_flags->alt_ref_frame ||
- refresh_frame_flags->golden_frame) &&
+ if ((refresh_frame->alt_ref_frame || refresh_frame->golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
(cpi->oxcf.rc_cfg.mode != AOM_CBR ||
cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 20))
@@ -580,7 +578,7 @@
double factor, int width, int height) {
RATE_CONTROL *const rc = &cpi->rc;
PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
int update_default_rcf = 1;
// Normalize RCF to account for the size-dependent scaling factor.
factor /= resize_rate_factor(&cpi->oxcf.frm_dim_cfg, width, height);
@@ -601,8 +599,7 @@
#endif
if (update_default_rcf) p_rc->rate_correction_factors[rf_lvl] = factor;
} else {
- if ((refresh_frame_flags->alt_ref_frame ||
- refresh_frame_flags->golden_frame) &&
+ if ((refresh_frame->alt_ref_frame || refresh_frame->golden_frame) &&
!rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
(cpi->oxcf.rc_cfg.mode != AOM_CBR ||
cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 20)) {
@@ -847,7 +844,7 @@
static int calc_active_worst_quality_no_stats_vbr(const AV1_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const unsigned int curr_frame = cpi->common.current_frame.frame_number;
int active_worst_quality;
int last_q_key_frame;
@@ -859,9 +856,9 @@
active_worst_quality =
curr_frame == 0 ? rc->worst_quality : last_q_key_frame * 2;
} else {
- if (!rc->is_src_frame_alt_ref && (refresh_frame_flags->golden_frame ||
- refresh_frame_flags->bwd_ref_frame ||
- refresh_frame_flags->alt_ref_frame)) {
+ if (!rc->is_src_frame_alt_ref &&
+ (refresh_frame->golden_frame || refresh_frame->bwd_ref_frame ||
+ refresh_frame->alt_ref_frame)) {
active_worst_quality =
curr_frame == 1 ? last_q_key_frame * 5 / 4 : last_q_inter_frame;
} else {
@@ -882,6 +879,8 @@
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *p_rc = &cpi->ppi->p_rc;
+ const SVC *const svc = &cpi->svc;
+ unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers;
// Buffer level below which we push active_worst to worst_quality.
int64_t critical_level = p_rc->optimal_buffer_level >> 3;
int64_t buff_lvl_step = 0;
@@ -893,10 +892,20 @@
// for the first few frames following key frame. These are both initialized
// to worst_quality and updated with (3/4, 1/4) average in postencode_update.
// So for first few frames following key, the qp of that key frame is weighted
- // into the active_worst_quality setting.
- ambient_qp = (cm->current_frame.frame_number < 5)
- ? AOMMIN(p_rc->avg_frame_qindex[INTER_FRAME],
- p_rc->avg_frame_qindex[KEY_FRAME])
+ // into the active_worst_quality setting. For SVC the key frame should
+ // correspond to layer (0, 0), so use that for layer context.
+ int avg_qindex_key = p_rc->avg_frame_qindex[KEY_FRAME];
+ if (svc->number_temporal_layers > 1) {
+ int layer = LAYER_IDS_TO_IDX(0, 0, svc->number_temporal_layers);
+ const LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ const PRIMARY_RATE_CONTROL *const lp_rc = &lc->p_rc;
+ avg_qindex_key = lp_rc->avg_frame_qindex[KEY_FRAME];
+ if (svc->temporal_layer_id == 0)
+ avg_qindex_key =
+ AOMMIN(lp_rc->avg_frame_qindex[KEY_FRAME], lp_rc->last_q[KEY_FRAME]);
+ }
+ ambient_qp = (cm->current_frame.frame_number < num_frames_weight_key)
+ ? AOMMIN(p_rc->avg_frame_qindex[INTER_FRAME], avg_qindex_key)
: p_rc->avg_frame_qindex[INTER_FRAME];
active_worst_quality = AOMMIN(rc->worst_quality, ambient_qp * 5 / 4);
if (p_rc->buffer_level > p_rc->optimal_buffer_level) {
@@ -937,7 +946,7 @@
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const CurrentFrame *const current_frame = &cm->current_frame;
int *rtc_minq;
const int bit_depth = cm->seq_params->bit_depth;
@@ -972,8 +981,7 @@
}
} else if (!rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
cpi->oxcf.rc_cfg.gf_cbr_boost_pct &&
- (refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame)) {
+ (refresh_frame->golden_frame || refresh_frame->alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
@@ -1130,7 +1138,7 @@
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const CurrentFrame *const current_frame = &cm->current_frame;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const enum aom_rc_mode rc_mode = oxcf->rc_cfg.mode;
assert(has_no_stats_stage(cpi));
@@ -1182,8 +1190,7 @@
}
}
} else if (!rc->is_src_frame_alt_ref &&
- (refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame)) {
+ (refresh_frame->golden_frame || refresh_frame->alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
@@ -1201,7 +1208,7 @@
const int qindex = cq_level;
const double q_val = av1_convert_qindex_to_q(qindex, bit_depth);
const int delta_qindex =
- (refresh_frame_flags->alt_ref_frame)
+ (refresh_frame->alt_ref_frame)
? av1_compute_qdelta(rc, q_val, q_val * 0.40, bit_depth)
: av1_compute_qdelta(rc, q_val, q_val * 0.50, bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
@@ -1251,8 +1258,7 @@
&cpi->rc, current_frame->frame_type, active_worst_quality, 2.0,
cpi->is_screen_content_type, bit_depth);
} else if (!rc->is_src_frame_alt_ref &&
- (refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame)) {
+ (refresh_frame->golden_frame || refresh_frame->alt_ref_frame)) {
qdelta = av1_compute_qdelta_by_rate(
&cpi->rc, current_frame->frame_type, active_worst_quality, 1.75,
cpi->is_screen_content_type, bit_depth);
@@ -1332,7 +1338,7 @@
#define STATIC_MOTION_THRESH 95
static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
int *active_best, int *active_worst,
- int cq_level, int is_fwd_kf) {
+ int cq_level) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
@@ -1347,15 +1353,6 @@
// as q.
active_best_quality = cq_level;
active_worst_quality = cq_level;
- } else if (is_fwd_kf) {
- // Handle the special case for forward reference key frames.
- // Increase the boost because this keyframe is used as a forward and
- // backward reference.
- int qindex = p_rc->last_boosted_qindex;
- const double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
- const int delta_qindex = av1_compute_qdelta(
- rc, last_boosted_q, last_boosted_q * 0.25, bit_depth);
- active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else if (p_rc->this_key_frame_forced) {
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
@@ -1437,7 +1434,7 @@
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const int bit_depth = cpi->common.seq_params->bit_depth;
int active_best_quality = *active_best;
int active_worst_quality = *active_worst;
@@ -1446,8 +1443,8 @@
if (cpi->oxcf.rc_cfg.mode != AOM_Q) {
if (frame_is_intra_only(cm) ||
(!rc->is_src_frame_alt_ref &&
- (refresh_frame_flags->golden_frame || is_intrl_arf_boost ||
- refresh_frame_flags->alt_ref_frame))) {
+ (refresh_frame->golden_frame || is_intrl_arf_boost ||
+ refresh_frame->alt_ref_frame))) {
active_best_quality -=
(cpi->ppi->twopass.extend_minq + cpi->ppi->twopass.extend_minq_fast);
active_worst_quality += (cpi->ppi->twopass.extend_maxq / 2);
@@ -1553,7 +1550,7 @@
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const GF_GROUP *gf_group = &cpi->ppi->gf_group;
const enum aom_rc_mode rc_mode = oxcf->rc_cfg.mode;
int *inter_minq;
@@ -1568,8 +1565,8 @@
// TODO(jingning): Consider to rework this hack that covers issues incurred
// in lightfield setting.
if (cm->tiles.large_scale) {
- is_leaf_frame = !(refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame || is_intrl_arf_boost);
+ is_leaf_frame = !(refresh_frame->golden_frame ||
+ refresh_frame->alt_ref_frame || is_intrl_arf_boost);
}
const int is_overlay_frame = rc->is_src_frame_alt_ref;
@@ -1658,13 +1655,10 @@
int active_best_quality = 0;
int active_worst_quality = rc->active_worst_quality;
int q;
- GF_GROUP *gf_group = &cpi->ppi->gf_group;
if (frame_is_intra_only(cm)) {
- const int is_fwd_kf = gf_group->update_type[gf_index] == ARF_UPDATE &&
- gf_group->refbuf_state[gf_index] == REFBUF_UPDATE;
get_intra_q_and_bounds(cpi, width, height, &active_best_quality,
- &active_worst_quality, cq_level, is_fwd_kf);
+ &active_worst_quality, cq_level);
} else {
// Active best quality limited by previous layer.
active_best_quality =
@@ -1715,7 +1709,7 @@
const RATE_CONTROL *const rc = &cpi->rc;
const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const GF_GROUP *gf_group = &cpi->ppi->gf_group;
assert(IMPLIES(has_no_stats_stage(cpi),
cpi->oxcf.rc_cfg.mode == AOM_Q &&
@@ -1737,10 +1731,8 @@
gf_group->update_type[gf_index] == INTNL_ARF_UPDATE;
if (frame_is_intra_only(cm)) {
- const int is_fwd_kf = gf_group->update_type[gf_index] == ARF_UPDATE &&
- gf_group->refbuf_state[gf_index] == REFBUF_UPDATE;
get_intra_q_and_bounds(cpi, width, height, &active_best_quality,
- &active_worst_quality, cq_level, is_fwd_kf);
+ &active_worst_quality, cq_level);
#ifdef STRICT_RC
active_best_quality = 0;
#endif
@@ -1767,8 +1759,8 @@
// leaf (non arf) frames. This is important to the TPL model which assumes
// Q drops with each arf level.
if (!(rc->is_src_frame_alt_ref) &&
- (refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame || is_intrl_arf_boost)) {
+ (refresh_frame->golden_frame || refresh_frame->alt_ref_frame ||
+ is_intrl_arf_boost)) {
active_worst_quality =
(active_best_quality + (3 * active_worst_quality) + 2) / 4;
}
@@ -1885,7 +1877,7 @@
RATE_CONTROL *const rc = &cpi->rc;
PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
- const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
const int is_intrnl_arf =
gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
@@ -1910,8 +1902,8 @@
} else {
if ((cpi->ppi->use_svc && cpi->oxcf.rc_cfg.mode == AOM_CBR) ||
(!rc->is_src_frame_alt_ref &&
- !(refresh_frame_flags->golden_frame || is_intrnl_arf ||
- refresh_frame_flags->alt_ref_frame))) {
+ !(refresh_frame->golden_frame || is_intrnl_arf ||
+ refresh_frame->alt_ref_frame))) {
p_rc->last_q[INTER_FRAME] = qindex;
p_rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(
3 * p_rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
@@ -1932,8 +1924,8 @@
if ((qindex < p_rc->last_boosted_qindex) ||
(current_frame->frame_type == KEY_FRAME) ||
(!p_rc->constrained_gf_group &&
- (refresh_frame_flags->alt_ref_frame || is_intrnl_arf ||
- (refresh_frame_flags->golden_frame && !rc->is_src_frame_alt_ref)))) {
+ (refresh_frame->alt_ref_frame || is_intrnl_arf ||
+ (refresh_frame->golden_frame && !rc->is_src_frame_alt_ref)))) {
p_rc->last_boosted_qindex = qindex;
}
if (current_frame->frame_type == KEY_FRAME) p_rc->last_kf_qindex = qindex;
@@ -1960,7 +1952,7 @@
if (is_altref_enabled(cpi->oxcf.gf_cfg.lag_in_frames,
cpi->oxcf.gf_cfg.enable_auto_arf) &&
- refresh_frame_flags->alt_ref_frame &&
+ refresh_frame->alt_ref_frame &&
(current_frame->frame_type != KEY_FRAME && !frame_is_sframe(cm)))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
@@ -2284,6 +2276,9 @@
target = ((p_rc->starting_buffer_level / 2) > INT_MAX)
? INT_MAX
: (int)(p_rc->starting_buffer_level / 2);
+ if (cpi->svc.number_temporal_layers > 1 && target < (INT_MAX >> 2)) {
+ target = target << AOMMIN(2, (cpi->svc.number_temporal_layers - 1));
+ }
} else {
int kf_boost = 32;
double framerate = cpi->framerate;
@@ -2297,6 +2292,93 @@
return av1_rc_clamp_iframe_target_size(cpi, target);
}
+#define DEFAULT_KF_BOOST_RT 2300
+#define DEFAULT_GF_BOOST_RT 2000
+
+static void set_baseline_gf_interval(AV1_COMP *cpi, FRAME_TYPE frame_type) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
+ av1_cyclic_refresh_set_golden_update(cpi);
+ else
+ p_rc->baseline_gf_interval = FIXED_GF_INTERVAL;
+ if (p_rc->baseline_gf_interval > rc->frames_to_key &&
+ cpi->oxcf.kf_cfg.auto_key)
+ p_rc->baseline_gf_interval = rc->frames_to_key;
+ p_rc->gfu_boost = DEFAULT_GF_BOOST_RT;
+ p_rc->constrained_gf_group =
+ (p_rc->baseline_gf_interval >= rc->frames_to_key &&
+ cpi->oxcf.kf_cfg.auto_key)
+ ? 1
+ : 0;
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
+ cpi->gf_frame_index = 0;
+ // SVC does not use GF as periodic boost.
+ // TODO(marpan): Find better way to disable this for SVC.
+ if (cpi->ppi->use_svc) {
+ SVC *const svc = &cpi->svc;
+ p_rc->baseline_gf_interval = MAX_STATIC_GF_GROUP_LENGTH - 1;
+ p_rc->gfu_boost = 1;
+ p_rc->constrained_gf_group = 0;
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
+ for (int layer = 0;
+ layer < svc->number_spatial_layers * svc->number_temporal_layers;
+ ++layer) {
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ lc->p_rc.baseline_gf_interval = p_rc->baseline_gf_interval;
+ lc->p_rc.gfu_boost = p_rc->gfu_boost;
+ lc->p_rc.constrained_gf_group = p_rc->constrained_gf_group;
+ lc->rc.frames_till_gf_update_due = rc->frames_till_gf_update_due;
+ lc->group_index = 0;
+ }
+ }
+ gf_group->size = p_rc->baseline_gf_interval;
+ gf_group->update_type[0] = (frame_type == KEY_FRAME) ? KF_UPDATE : GF_UPDATE;
+ gf_group->refbuf_state[cpi->gf_frame_index] =
+ (frame_type == KEY_FRAME) ? REFBUF_RESET : REFBUF_UPDATE;
+}
+
+void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ SVC *const svc = &cpi->svc;
+ const int resize_pending = is_frame_resize_pending(cpi);
+ if (!resize_pending && !rc->high_source_sad) {
+ // Check if we should disable GF refresh (if period is up),
+ // or force a GF refresh update (if we are at least halfway through
+ // period) based on QP. Look into add info on segment deltaq.
+ PRIMARY_RATE_CONTROL *p_rc = &cpi->ppi->p_rc;
+ const int avg_qp = p_rc->avg_frame_qindex[INTER_FRAME];
+ const int allow_gf_update =
+ rc->frames_till_gf_update_due <= (p_rc->baseline_gf_interval - 10);
+ int gf_update_changed = 0;
+ int thresh = 87;
+ if (rc->frames_till_gf_update_due == 1 &&
+ cm->quant_params.base_qindex > avg_qp) {
+ // Disable GF refresh since QP is above the runninhg average QP.
+ svc->refresh[svc->gld_idx_1layer] = 0;
+ gf_update_changed = 1;
+ } else if (allow_gf_update &&
+ ((cm->quant_params.base_qindex < thresh * avg_qp / 100) ||
+ (rc->avg_frame_low_motion < 20))) {
+ // Force refresh since QP is well below average QP or this is a high
+ // motion frame.
+ svc->refresh[svc->gld_idx_1layer] = 1;
+ gf_update_changed = 1;
+ }
+ if (gf_update_changed) {
+ set_baseline_gf_interval(cpi, INTER_FRAME);
+ int refresh_mask = 0;
+ for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
+ int ref_frame_map_idx = svc->ref_idx[i];
+ refresh_mask |= svc->refresh[ref_frame_map_idx] << ref_frame_map_idx;
+ }
+ cm->current_frame.refresh_frame_flags = refresh_mask;
+ }
+ }
+}
+
/*!\brief Setup the reference prediction structure for 1 pass real-time
*
* Set the reference prediction structure for 1 layer.
@@ -2338,8 +2420,7 @@
// Set the reference frame flags.
ext_flags->ref_frame_flags ^= AOM_LAST_FLAG;
ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
- if (cpi->sf.rt_sf.use_golden_frame)
- ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
+ ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
const int sh = 7 - gld_fixed_slot;
@@ -2379,6 +2460,7 @@
ext_refresh_frame_flags->golden_frame = 1;
svc->refresh[gld_idx] = 1;
}
+ svc->gld_idx_1layer = gld_idx;
}
/*!\brief Check for scene detection, for 1 pass real-time mode.
@@ -2491,9 +2573,6 @@
}
}
-#define DEFAULT_KF_BOOST_RT 2300
-#define DEFAULT_GF_BOOST_RT 2000
-
/*!\brief Set the GF baseline interval for 1 pass real-time mode.
*
*
@@ -2507,55 +2586,14 @@
static int set_gf_interval_update_onepass_rt(AV1_COMP *cpi,
FRAME_TYPE frame_type) {
RATE_CONTROL *const rc = &cpi->rc;
- PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
- GF_GROUP *const gf_group = &cpi->ppi->gf_group;
- ResizePendingParams *const resize_pending_params =
- &cpi->resize_pending_params;
int gf_update = 0;
- const int resize_pending =
- (resize_pending_params->width && resize_pending_params->height &&
- (cpi->common.width != resize_pending_params->width ||
- cpi->common.height != resize_pending_params->height));
+ const int resize_pending = is_frame_resize_pending(cpi);
// GF update based on frames_till_gf_update_due, also
// force upddate on resize pending frame or for scene change.
if ((resize_pending || rc->high_source_sad ||
rc->frames_till_gf_update_due == 0) &&
cpi->svc.temporal_layer_id == 0 && cpi->svc.spatial_layer_id == 0) {
- if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
- av1_cyclic_refresh_set_golden_update(cpi);
- else
- p_rc->baseline_gf_interval = FIXED_GF_INTERVAL;
- if (p_rc->baseline_gf_interval > rc->frames_to_key)
- p_rc->baseline_gf_interval = rc->frames_to_key;
- p_rc->gfu_boost = DEFAULT_GF_BOOST_RT;
- p_rc->constrained_gf_group =
- (p_rc->baseline_gf_interval >= rc->frames_to_key) ? 1 : 0;
- rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
- cpi->gf_frame_index = 0;
- // SVC does not use GF as periodic boost.
- // TODO(marpan): Find better way to disable this for SVC.
- if (cpi->ppi->use_svc) {
- SVC *const svc = &cpi->svc;
- p_rc->baseline_gf_interval = MAX_STATIC_GF_GROUP_LENGTH - 1;
- p_rc->gfu_boost = 1;
- p_rc->constrained_gf_group = 0;
- rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
- for (int layer = 0;
- layer < svc->number_spatial_layers * svc->number_temporal_layers;
- ++layer) {
- LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- lc->p_rc.baseline_gf_interval = p_rc->baseline_gf_interval;
- lc->p_rc.gfu_boost = p_rc->gfu_boost;
- lc->p_rc.constrained_gf_group = p_rc->constrained_gf_group;
- lc->rc.frames_till_gf_update_due = rc->frames_till_gf_update_due;
- lc->group_index = 0;
- }
- }
- gf_group->size = p_rc->baseline_gf_interval;
- gf_group->update_type[0] =
- (frame_type == KEY_FRAME) ? KF_UPDATE : GF_UPDATE;
- gf_group->refbuf_state[cpi->gf_frame_index] =
- (frame_type == KEY_FRAME) ? REFBUF_RESET : REFBUF_UPDATE;
+ set_baseline_gf_interval(cpi, frame_type);
gf_update = 1;
}
return gf_update;
@@ -2714,6 +2752,30 @@
return;
}
+static INLINE int set_key_frame(AV1_COMP *cpi, unsigned int frame_flags) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ AV1_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+
+ // Very first frame has to be key frame.
+ if (cm->current_frame.frame_number == 0) return 1;
+ // Set key frame if forced by frame flags.
+ if (frame_flags & FRAMEFLAGS_KEY) return 1;
+ if (!cpi->ppi->use_svc) {
+ // Non-SVC
+ if (cpi->oxcf.kf_cfg.auto_key && rc->frames_to_key == 0) return 1;
+ } else {
+ // SVC
+ if (svc->spatial_layer_id == 0 &&
+ (cpi->oxcf.kf_cfg.auto_key &&
+ (cpi->oxcf.kf_cfg.key_freq_max == 0 ||
+ svc->current_superframe % cpi->oxcf.kf_cfg.key_freq_max == 0)))
+ return 1;
+ }
+
+ return 0;
+}
+
void av1_get_one_pass_rt_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params,
unsigned int frame_flags) {
@@ -2735,11 +2797,7 @@
av1_restore_layer_context(cpi);
}
// Set frame type.
- if ((!cpi->ppi->use_svc && rc->frames_to_key == 0) ||
- (cpi->ppi->use_svc && svc->spatial_layer_id == 0 &&
- (cpi->oxcf.kf_cfg.key_freq_max == 0 ||
- svc->current_superframe % cpi->oxcf.kf_cfg.key_freq_max == 0)) ||
- (frame_flags & FRAMEFLAGS_KEY)) {
+ if (set_key_frame(cpi, frame_flags)) {
frame_params->frame_type = KEY_FRAME;
p_rc->this_key_frame_forced =
cm->current_frame.frame_number != 0 && rc->frames_to_key == 0;
@@ -2785,9 +2843,7 @@
resize_pending_params->width = cpi->oxcf.frm_dim_cfg.width;
resize_pending_params->height = cpi->oxcf.frm_dim_cfg.height;
}
- } else if (resize_pending_params->width && resize_pending_params->height &&
- (cpi->common.width != resize_pending_params->width ||
- cpi->common.height != resize_pending_params->height)) {
+ } else if (is_frame_resize_pending(cpi)) {
resize_reset_rc(cpi, resize_pending_params->width,
resize_pending_params->height, cm->width, cm->height);
}
@@ -2886,16 +2942,13 @@
* Intended to be used only with AOM_Q mode.
*/
void av1_q_mode_compute_gop_q_indices(int gf_frame_index, int base_q_index,
- double arf_qstep_ratio,
+ const double *qstep_ratio_list,
aom_bit_depth_t bit_depth,
const struct GF_GROUP *gf_group,
int *q_index_list) {
- const int arf_q = av1_get_q_index_from_qstep_ratio(
- base_q_index, arf_qstep_ratio, bit_depth);
- for (int gf_index = gf_frame_index; gf_index < gf_group->size; ++gf_index) {
- const int height = gf_group_pyramid_level(gf_group, gf_index);
- q_index_list[gf_index] = av1_q_mode_get_q_index(
- base_q_index, gf_group->update_type[gf_index], height, arf_q);
+ for (int i = gf_frame_index; i < gf_group->size; ++i) {
+ q_index_list[i] = av1_get_q_index_from_qstep_ratio(
+ base_q_index, qstep_ratio_list[i], bit_depth);
}
}
#endif // !CONFIG_REALTIME_ONLY
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index f668968..651f19b 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h
@@ -46,6 +46,9 @@
#define FIXED_GF_INTERVAL 16
#define MAX_GF_LENGTH_LAP 16
+#define FIXED_GF_INTERVAL_RT 80
+#define MAX_GF_INTERVAL_RT 160
+
#define MAX_NUM_GF_INTERVALS 15
#define MAX_ARF_LAYERS 6
@@ -564,6 +567,8 @@
void av1_rc_set_frame_target(struct AV1_COMP *cpi, int target, int width,
int height);
+void av1_adjust_gf_refresh_qp_one_pass_rt(struct AV1_COMP *cpi);
+
void av1_set_reference_structure_one_pass_rt(struct AV1_COMP *cpi,
int gf_update);
@@ -661,8 +666,7 @@
*
* \param[in] gf_frame_index Index of the current frame
* \param[in] base_q_index Base q index
- * \param[in] arf_qstep_ratio The quantize step ratio between arf q
- * index and base q index
+ * \param[in] qstep_ratio_list Stores the qstep_ratio for each frame
* \param[in] bit_depth Bit depth
* \param[in] gf_group Pointer to the GOP
* \param[out] q_index_list An array to store output gop q indices.
@@ -670,7 +674,7 @@
* greater than gf_group.size()
*/
void av1_q_mode_compute_gop_q_indices(int gf_frame_index, int base_q_index,
- double arf_qstep_ratio,
+ const double *qstep_ratio_list,
aom_bit_depth_t bit_depth,
const struct GF_GROUP *gf_group,
int *q_index_list);
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index d88f563..8d28f43 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -620,6 +620,9 @@
void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
MvCosts *mv_costs) {
+ // Avoid accessing 'mv_costs' when it is not allocated.
+ if (mv_costs == NULL) return;
+
mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 93ab739..7256ff9 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -299,37 +299,8 @@
}
// Used to reset the state of tx/mb rd hash information
-static INLINE void reset_hash_records(TxfmSearchInfo *const txfm_info,
- int use_inter_txb_hash) {
- int32_t record_idx;
+static INLINE void reset_hash_records(TxfmSearchInfo *const txfm_info) {
if (!txfm_info->txb_rd_records) return;
- // Reset the state for use_inter_txb_hash
- if (use_inter_txb_hash) {
- for (record_idx = 0;
- record_idx < ((MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)); record_idx++)
- txfm_info->txb_rd_records->txb_rd_record_8X8[record_idx].num =
- txfm_info->txb_rd_records->txb_rd_record_8X8[record_idx].index_start =
- 0;
- for (record_idx = 0;
- record_idx < ((MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)); record_idx++)
- txfm_info->txb_rd_records->txb_rd_record_16X16[record_idx].num =
- txfm_info->txb_rd_records->txb_rd_record_16X16[record_idx]
- .index_start = 0;
- for (record_idx = 0;
- record_idx < ((MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)); record_idx++)
- txfm_info->txb_rd_records->txb_rd_record_32X32[record_idx].num =
- txfm_info->txb_rd_records->txb_rd_record_32X32[record_idx]
- .index_start = 0;
- for (record_idx = 0;
- record_idx < ((MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)); record_idx++)
- txfm_info->txb_rd_records->txb_rd_record_64X64[record_idx].num =
- txfm_info->txb_rd_records->txb_rd_record_64X64[record_idx]
- .index_start = 0;
- }
-
- // Reset the state for use_intra_txb_hash
- txfm_info->txb_rd_records->txb_rd_record_intra.num =
- txfm_info->txb_rd_records->txb_rd_record_intra.index_start = 0;
// Reset the state for use_mb_rd_hash
txfm_info->txb_rd_records->mb_rd_record.num =
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e6cf62b..c346c92 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1167,12 +1167,10 @@
return 0;
}
-static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
- int *mode_index_start,
- int *mode_index_end,
- int last_motion_mode_allowed,
- int interintra_allowed,
- int eval_motion_mode) {
+static INLINE void update_mode_start_end_index(
+ const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
+ int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
+ int interintra_allowed, int eval_motion_mode) {
*mode_index_start = (int)SIMPLE_TRANSLATION;
*mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
@@ -1184,6 +1182,8 @@
*mode_index_start = 1;
}
}
+ if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
+ *mode_index_end = SIMPLE_TRANSLATION;
}
/*!\brief AV1 motion mode search
@@ -1328,7 +1328,7 @@
// if SIMPLE_TRANSLATION has already been searched according to
// the motion_mode_for_winner_cand speed feature, update the mode_index_start
// to avoid searching it again.
- update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
+ update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
last_motion_mode_allowed, interintra_allowed,
eval_motion_mode);
// Main function loop. This loops over all of the possible motion modes and
@@ -1758,12 +1758,13 @@
}
// nearest_refmv_count indicates the closeness of block motion characteristics
- // with respect to its spatial neighbor. Lower value of nearest_refmv_count
- // means less correlation with its spatial neighbors. Hence less possibility
- // for NEARESTMV and NEARMV modes becoming the best mode since these modes
- // work well for blocks that shares similar motion characteristics with its
- // neighbor. Thus, when nearest_refmv_count is less w.r.t ref_mv_count prune
- // the mode.
+ // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
+ // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
+ // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
+ // mode since these modes work well for blocks that shares similar motion
+ // characteristics with its neighbor. Thus, NEARMV mode is pruned when
+ // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
+ // mode is pruned if none of the ref mvs are populated from nearest candidate.
const int prune_thresh = 1 + (ref_mv_count >= 2);
if (nearest_refmv_count < prune_thresh) return 1;
return 0;
@@ -2661,6 +2662,16 @@
const int base_rate =
args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
+ // As per the experiments, in real-time preset impact of model rd based
+ // breakouts is less on encoding time if the following conditions are true.
+ // (1) compound mode is disabled
+ // (2) interpolation filter search is disabled
+ // TODO(any): Check the impact of model rd based breakouts in other presets
+ const int skip_interp_search_modelrd_calc =
+ cpi->oxcf.mode == REALTIME &&
+ cm->current_frame.reference_mode == SINGLE_REFERENCE &&
+ cpi->sf.rt_sf.skip_interp_filter_search;
+
for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
save_mv[i][0].as_int = INVALID_MV;
save_mv[i][1].as_int = INVALID_MV;
@@ -2746,6 +2757,15 @@
if (this_sse < args->best_single_sse_in_refs[ref]) {
args->best_single_sse_in_refs[ref] = this_sse;
}
+
+ if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
+ const double scale_factor[11] = { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8,
+ 0.8, 0.9, 0.9, 0.9, 0.9 };
+ assert(num_pels_log2_lookup[bsize] >= 4);
+ if (args->best_pred_sse <
+ scale_factor[num_pels_log2_lookup[bsize] - 4] * this_sse)
+ continue;
+ }
}
rd_stats->rate += rate_mv;
@@ -2795,39 +2815,41 @@
if (not_best_mode) continue;
}
+ if (!skip_interp_search_modelrd_calc) {
#if CONFIG_COLLECT_COMPONENT_TIMING
- start_timing(cpi, interpolation_filter_search_time);
+ start_timing(cpi, interpolation_filter_search_time);
#endif
- // Determine the interpolation filter for this mode
- ret_val = av1_interpolation_filter_search(
- x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
- &skip_build_pred, args, ref_best_rd);
+ // Determine the interpolation filter for this mode
+ ret_val = av1_interpolation_filter_search(
+ x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
+ &skip_build_pred, args, ref_best_rd);
#if CONFIG_COLLECT_COMPONENT_TIMING
- end_timing(cpi, interpolation_filter_search_time);
+ end_timing(cpi, interpolation_filter_search_time);
#endif
- if (args->modelled_rd != NULL && !is_comp_pred) {
- args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
- }
- if (ret_val != 0) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
- ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
- }
+ if (args->modelled_rd != NULL && !is_comp_pred) {
+ args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
+ }
+ if (ret_val != 0) {
+ restore_dst_buf(xd, orig_dst, num_planes);
+ continue;
+ } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
+ ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
+ restore_dst_buf(xd, orig_dst, num_planes);
+ continue;
+ }
- // Compute modelled RD if enabled
- if (args->modelled_rd != NULL) {
- if (is_comp_pred) {
- const int mode0 = compound_ref0_mode(this_mode);
- const int mode1 = compound_ref1_mode(this_mode);
- const int64_t mrd =
- AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
- args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
- if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
- restore_dst_buf(xd, orig_dst, num_planes);
- continue;
+ // Compute modelled RD if enabled
+ if (args->modelled_rd != NULL) {
+ if (is_comp_pred) {
+ const int mode0 = compound_ref0_mode(this_mode);
+ const int mode1 = compound_ref1_mode(this_mode);
+ const int64_t mrd =
+ AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
+ args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
+ if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
+ restore_dst_buf(xd, orig_dst, num_planes);
+ continue;
+ }
}
}
}
@@ -3667,6 +3689,13 @@
disable_reference(INTRA_FRAME, mask->ref_combo);
}
+ if (!cpi->oxcf.tool_cfg.enable_global_motion) {
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
+ mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
+ }
+ }
+
mask->pred_modes[INTRA_FRAME] |=
~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
}
@@ -5379,7 +5408,8 @@
-1,
-1,
{ 0 },
- { 0 } };
+ { 0 },
+ UINT_MAX };
for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
// Indicates the appropriate number of simple translation winner modes for
// exhaustive motion mode evaluation
@@ -5598,6 +5628,7 @@
args.single_newmv_valid = search_state.single_newmv_valid;
args.single_comp_cost = real_compmode_cost;
args.ref_frame_cost = ref_frame_cost;
+ args.best_pred_sse = search_state.best_pred_sse;
int64_t skip_rd[2] = { search_state.best_skip_rd[0],
search_state.best_skip_rd[1] };
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index 059ddb1..8dacc2e 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -553,14 +553,7 @@
set_tx_type_prune(sf, txfm_params,
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
1);
-
- // Reset hash state for winner mode processing. Winner mode and subsequent
- // transform/mode evaluations (palette/IntraBC) cann't reuse old data as
- // the decisions would have been sub-optimal
- // TODO(any): Move the evaluation of palette/IntraBC modes before winner
- // mode is processed and clean-up the code below
- reset_hash_records(txfm_info, cpi->sf.tx_sf.use_inter_txb_hash);
-
+ reset_hash_records(txfm_info);
break;
default: assert(0);
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index cb2644c..dc6c2ae 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -519,12 +519,6 @@
sf->rt_sf.nonrd_check_partition_merge_mode = 0;
sf->rt_sf.hybrid_intra_pickmode = 0;
}
-
- // Intra txb hash is currently not compatible with multi-winner mode as the
- // hashes got reset during multi-winner mode processing.
- assert(IMPLIES(
- sf->winner_mode_sf.multi_winner_mode_type != MULTI_WINNER_MODE_OFF,
- !sf->tx_sf.use_intra_txb_hash));
}
static void set_good_speed_feature_framesize_dependent(
@@ -663,7 +657,7 @@
if (is_720p_or_larger) {
sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
sf->part_sf.partition_search_breakout_rate_thr = 200;
- sf->part_sf.skip_non_sq_part_based_on_none = boosted ? 0 : 2;
+ sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 2 : 0;
} else {
sf->part_sf.max_intra_bsize = BLOCK_32X32;
sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
@@ -924,7 +918,6 @@
// clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
// bit more closely to figure out why.
sf->inter_sf.adaptive_rd_thresh = 1;
- sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
sf->inter_sf.fast_interintra_wedge_search = 1;
sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
@@ -954,6 +947,7 @@
sf->tpl_sf.search_method = DIAMOND;
sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
+ sf->rd_sf.use_mb_rd_hash = 1;
sf->lpf_sf.prune_wiener_based_on_src_var = 1;
sf->lpf_sf.prune_sgr_based_on_wiener = 1;
@@ -1017,7 +1011,6 @@
sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
- sf->tx_sf.use_intra_txb_hash = 1;
sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
@@ -1083,9 +1076,6 @@
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
- // TODO(any): Experiment with enabling of this speed feature as hash state
- // is reset during winner mode processing
- sf->tx_sf.use_intra_txb_hash = 0;
sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
sf->rd_sf.tx_domain_dist_thres_level = 2;
@@ -1168,8 +1158,6 @@
sf->tpl_sf.disable_filtered_key_tpl = 1;
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
- sf->tx_sf.use_intra_txb_hash = 1;
-
sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
sf->winner_mode_sf.dc_blk_pred_level = 2;
@@ -1179,12 +1167,6 @@
sf->fp_sf.skip_zeromv_motion_search = 1;
}
-
- // Intra txb hash is currently not compatible with multi-winner mode as the
- // hashes got reset during multi-winner mode processing.
- assert(IMPLIES(
- sf->winner_mode_sf.multi_winner_mode_type != MULTI_WINNER_MODE_OFF,
- !sf->tx_sf.use_intra_txb_hash));
}
static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
@@ -1200,6 +1182,7 @@
if (!is_360p_or_larger) {
if (speed >= 6) sf->rt_sf.force_tx_search_off = 1;
+ if (speed >= 7) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
if (speed >= 8) {
sf->rt_sf.use_modeled_non_rd_cost = 0;
sf->rt_sf.use_nonrd_filter_search = 0;
@@ -1232,6 +1215,7 @@
sf->rt_sf.use_nonrd_altref_frame = 1;
}
if (speed >= 9) {
+ sf->rt_sf.gf_length_lvl = 1;
sf->rt_sf.skip_cdef_sb = 1;
}
}
@@ -1290,7 +1274,6 @@
sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
sf->inter_sf.reduce_inter_modes = 1;
sf->inter_sf.reuse_inter_intra_mode = 1;
- sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
sf->inter_sf.fast_wedge_sign_estimate = 1;
sf->inter_sf.prune_comp_type_by_comp_avg = 2;
@@ -1328,8 +1311,10 @@
sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
sf->part_sf.use_best_rd_for_pruning = 1;
sf->part_sf.prune_ext_partition_types_search_level = 2;
- sf->part_sf.partition_search_breakout_rate_thr = 80;
sf->part_sf.less_rectangular_check_level = 2;
+ sf->part_sf.early_term_after_none_split = 1;
+ sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
+ sf->part_sf.partition_search_breakout_rate_thr = 200;
sf->rd_sf.tx_domain_dist_thres_level = 1;
sf->rd_sf.tx_domain_dist_level = 1;
@@ -1350,14 +1335,10 @@
sf->tx_sf.tx_size_search_lgr_block = 1;
sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
sf->tx_sf.tx_type_search.skip_tx_search = 1;
- sf->tx_sf.use_intra_txb_hash = 1;
sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
sf->tx_sf.model_based_prune_tx_search_level = 0;
sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
- // TODO(any, yunqing): somehow this is needed by sf->rt_sf.use_nonrd_pick_mode
- // at speed 7? Need more investigation.
- sf->tx_sf.use_intra_txb_hash = 0;
sf->winner_mode_sf.tx_size_search_level = frame_is_intra_only(cm) ? 0 : 2;
@@ -1401,7 +1382,6 @@
sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
- sf->tx_sf.use_inter_txb_hash = 0;
sf->tx_sf.refine_fast_tx_search_results = 0;
sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
@@ -1414,7 +1394,7 @@
sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
sf->lpf_sf.disable_lr_filter = 1;
- sf->winner_mode_sf.dc_blk_pred_level = 1;
+ sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 2;
sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
sf->winner_mode_sf.tx_size_search_level = 1;
@@ -1435,20 +1415,31 @@
if (cpi->oxcf.kf_cfg.key_freq_max != 0 &&
cm->width * cm->height > 640 * 480)
sf->rt_sf.use_temporal_noise_estimate = 1;
- sf->rt_sf.use_golden_frame = 0;
sf->rt_sf.skip_tx_no_split_var_based_partition = 1;
+ sf->rt_sf.skip_newmv_mode_based_on_sse = 1;
+
+ // For SVC: use better mv search on base temporal layers, and only
+ // on base spatial layer if highest resolution is above 640x360.
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 &&
+ (cpi->svc.spatial_layer_id == 0 ||
+ cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
+ 640 * 360)) {
+ sf->mv_sf.search_method = NSTEP;
+ sf->mv_sf.subpel_search_method = SUBPEL_TREE;
+ sf->rt_sf.fullpel_search_step_param = 6;
+ }
}
if (speed >= 6) {
sf->mv_sf.use_fullpel_costlist = 1;
sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 0;
-
- sf->part_sf.adjust_var_based_rd_partitioning = 1;
+ sf->inter_sf.prune_warped_prob_thresh = 8;
+ sf->inter_sf.extra_prune_warped = 1;
}
if (speed >= 7) {
- sf->rt_sf.use_golden_frame = 1;
sf->part_sf.default_max_partition_size = BLOCK_128X128;
sf->part_sf.default_min_partition_size = BLOCK_8X8;
sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
@@ -1469,8 +1460,8 @@
sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
}
- sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
+ sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL5;
sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
sf->rt_sf.nonrd_prune_ref_frame_search = 1;
@@ -1487,26 +1478,26 @@
sf->rt_sf.nonrd_check_partition_merge_mode = 1;
sf->rt_sf.nonrd_check_partition_split = 0;
sf->rt_sf.skip_intra_pred_if_tx_skip = 1;
- // For SVC: use better mv search on base temporal layer, and only
+ // For SVC: use better mv search on base temporal layers, and only
// on base spatial layer if highest resolution is above 640x360.
- if (cpi->svc.number_temporal_layers > 1) {
- if (cpi->svc.temporal_layer_id == 0 &&
- (cpi->svc.spatial_layer_id == 0 ||
- cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
- 640 * 360)) {
- sf->mv_sf.search_method = NSTEP;
- sf->mv_sf.subpel_search_method = SUBPEL_TREE;
- sf->rt_sf.fullpel_search_step_param = 6;
- } else if (cpi->svc.non_reference_frame) {
- sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
- sf->rt_sf.fullpel_search_step_param = 10;
- }
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 &&
+ (cpi->svc.spatial_layer_id == 0 ||
+ cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
+ 640 * 360)) {
+ sf->mv_sf.search_method = NSTEP;
+ sf->mv_sf.subpel_search_method = SUBPEL_TREE;
+ sf->rt_sf.fullpel_search_step_param = 6;
+ } else if (cpi->svc.non_reference_frame) {
+ sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+ sf->rt_sf.fullpel_search_step_param = 10;
}
// TODO(marpan): Look into why enabling skip_loopfilter_non_reference is
// not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not
// always bitexact.
if (cpi->ppi->use_svc && cpi->svc.non_reference_frame &&
- sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q &&
+ (sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q ||
+ sf->lpf_sf.cdef_pick_method == CDEF_FAST_SEARCH_LVL5) &&
sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q)
sf->rt_sf.skip_loopfilter_non_reference = 1;
// Set mask for intra modes.
@@ -1543,6 +1534,7 @@
sf->interp_sf.cb_pred_filter_search = 1;
}
if (speed >= 9) {
+ sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
sf->rt_sf.estimate_motion_for_var_based_partition = 0;
sf->rt_sf.force_large_partition_blocks = 1;
for (int i = 0; i < BLOCK_SIZES; ++i)
@@ -1553,6 +1545,7 @@
sf->rt_sf.skip_intra_pred_if_tx_skip = 1;
sf->rt_sf.nonrd_agressive_skip = 1;
sf->rt_sf.nonrd_prune_ref_frame_search = 3;
+ sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
}
}
@@ -1659,7 +1652,6 @@
}
static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
- inter_sf->comp_inter_joint_search_thresh = BLOCK_4X4;
inter_sf->adaptive_rd_thresh = 0;
inter_sf->model_based_post_interp_filter_breakout = 0;
inter_sf->reduce_inter_modes = 0;
@@ -1756,8 +1748,6 @@
tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
tx_sf->txb_split_cap = 1;
tx_sf->adaptive_txb_search_level = 0;
- tx_sf->use_intra_txb_hash = 0;
- tx_sf->use_inter_txb_hash = 1;
tx_sf->refine_fast_tx_search_results = 1;
tx_sf->prune_tx_size_level = 0;
}
@@ -1784,7 +1774,7 @@
} else {
assert(0 && "Invalid disable_trellis_quant value");
}
- rd_sf->use_mb_rd_hash = 1;
+ rd_sf->use_mb_rd_hash = 0;
rd_sf->simple_model_rd_from_var = 0;
rd_sf->tx_domain_dist_level = 0;
rd_sf->tx_domain_dist_thres_level = 0;
@@ -1828,7 +1818,6 @@
rt_sf->nonrd_prune_ref_frame_search = 0;
rt_sf->use_nonrd_pick_mode = 0;
rt_sf->use_nonrd_altref_frame = 0;
- rt_sf->use_golden_frame = 0;
rt_sf->use_comp_ref_nonrd = 0;
rt_sf->use_real_time_ref_set = 0;
rt_sf->short_circuit_low_temp_var = 0;
@@ -1853,6 +1842,8 @@
rt_sf->skip_cdef_sb = 0;
rt_sf->force_large_partition_blocks_intra = 0;
rt_sf->skip_tx_no_split_var_based_partition = 0;
+ rt_sf->skip_newmv_mode_based_on_sse = 0;
+ rt_sf->gf_length_lvl = 0;
}
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
@@ -2057,7 +2048,16 @@
const int is_arf2_bwd_type =
cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
- if (cpi->oxcf.mode == REALTIME) return;
+ if (cpi->oxcf.mode == REALTIME) {
+ if (speed >= 6) {
+ const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
+ sf->part_sf.adjust_var_based_rd_partitioning =
+ frame_is_intra_only(cm)
+ ? 0
+ : cm->quant_params.base_qindex > qindex_thresh;
+ }
+ return;
+ }
if (speed == 0) {
// qindex_thresh for resolution < 720p
@@ -2090,7 +2090,6 @@
sf->interp_sf.cb_pred_filter_search = 0;
sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
sf->tx_sf.tx_type_search.skip_tx_search = 1;
- sf->tx_sf.use_intra_txb_hash = 1;
}
}
}
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index c1861d7..41a56a6 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -160,6 +160,7 @@
CDEF_FAST_SEARCH_LVL3, /**< Search reduced subset of secondary filters than
Level 2. */
CDEF_FAST_SEARCH_LVL4, /**< Search reduced subset of filters than Level 3. */
+ CDEF_FAST_SEARCH_LVL5, /**< Search reduced subset of filters than Level 4. */
CDEF_PICK_FROM_Q, /**< Estimate filter strength based on quantizer. */
CDEF_PICK_METHODS
} CDEF_PICK_METHOD;
@@ -793,12 +794,6 @@
// same single inter mode as a group.
int prune_comp_search_by_single_result;
- // If 1 we iterate finding a best reference for 2 ref frames together - via
- // a log search that iterates 4 times (check around mv for last for best
- // error of combined predictor then check around mv for alt). If 0 we
- // we just use the best motion vector found for each frame by itself.
- BLOCK_SIZE comp_inter_joint_search_thresh;
-
// Instead of performing a full MV search, do a simple translation first
// and only perform a full MV search on the motion vectors that performed
// well.
@@ -914,6 +909,9 @@
// Cap the no. of txfm searches for a given prediction mode.
// 0: no cap, 1: cap beyond first 4 searches, 2: cap beyond first 3 searches.
int limit_txfm_eval_per_mode;
+
+ // Prune warped motion search based on block size.
+ int extra_prune_warped;
} INTER_MODE_SPEED_FEATURES;
typedef struct INTERP_FILTER_SPEED_FEATURES {
@@ -1072,16 +1070,6 @@
// 1-2: progressively increasing aggressiveness of pruning
int model_based_prune_tx_search_level;
- // Use hash table to store intra(keyframe only) txb transform search results
- // to avoid repeated search on the same residue signal. This is currently not
- // compatible with multi-winner mode as the hash states are reset during
- // winner mode processing.
- int use_intra_txb_hash;
-
- // Use hash table to store inter txb transform search results
- // to avoid repeated search on the same residue signal.
- int use_inter_txb_hash;
-
// Refine TX type after fast TX search.
int refine_fast_tx_search_results;
@@ -1233,9 +1221,6 @@
// Use ALTREF frame in non-RD mode decision.
int use_nonrd_altref_frame;
- // Use GOLDEN frame in pickmode decision.
- int use_golden_frame;
-
// Use compound reference for non-RD mode.
int use_comp_ref_nonrd;
@@ -1320,6 +1305,14 @@
// Skip evaluation of no split in tx size selection for merge partition
int skip_tx_no_split_var_based_partition;
+
+ // Intermediate termination of newMV mode evaluation based on so far best mode
+ // sse
+ int skip_newmv_mode_based_on_sse;
+
+ // Define gf length multiplier.
+ // Level 0: use large multiplier, level 1: use medium multiplier.
+ int gf_length_lvl;
} REAL_TIME_SPEED_FEATURES;
/*!\endcond */
diff --git a/av1/encoder/svc_layercontext.c b/av1/encoder/svc_layercontext.c
index 67b3082..4e48218 100644
--- a/av1/encoder/svc_layercontext.c
+++ b/av1/encoder/svc_layercontext.c
@@ -62,7 +62,6 @@
// (i.e., ss_number_layers > 1), these need to be updated per spatial
// layer. Cyclic refresh is only applied on base temporal layer.
if (svc->number_spatial_layers > 1 && tl == 0) {
- size_t last_coded_q_map_size;
lc->sb_index = 0;
lc->actual_num_seg1_blocks = 0;
lc->actual_num_seg2_blocks = 0;
@@ -71,13 +70,6 @@
CHECK_MEM_ERROR(cm, lc->map,
aom_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
- last_coded_q_map_size =
- mi_rows * mi_cols * sizeof(*lc->last_coded_q_map);
- if (lc->last_coded_q_map) aom_free(lc->last_coded_q_map);
- CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
- aom_malloc(last_coded_q_map_size));
- assert(MAXQ <= 255);
- memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
}
}
svc->downsample_filter_type[sl] = BILINEAR;
@@ -195,7 +187,6 @@
svc->number_spatial_layers > 1 && svc->temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
swap_ptr(&cr->map, &lc->map);
- swap_ptr(&cr->last_coded_q_map, &lc->last_coded_q_map);
cr->sb_index = lc->sb_index;
cr->actual_num_seg1_blocks = lc->actual_num_seg1_blocks;
cr->actual_num_seg2_blocks = lc->actual_num_seg2_blocks;
@@ -234,11 +225,8 @@
cpi->svc.number_spatial_layers > 1 && svc->temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = lc->map;
- uint8_t *temp2 = lc->last_coded_q_map;
lc->map = cr->map;
cr->map = temp;
- lc->last_coded_q_map = cr->last_coded_q_map;
- cr->last_coded_q_map = temp2;
lc->sb_index = cr->sb_index;
lc->actual_num_seg1_blocks = cr->actual_num_seg1_blocks;
lc->actual_num_seg2_blocks = cr->actual_num_seg2_blocks;
@@ -301,7 +289,6 @@
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
if (lc->map) aom_free(lc->map);
- if (lc->last_coded_q_map) aom_free(lc->last_coded_q_map);
}
}
}
diff --git a/av1/encoder/svc_layercontext.h b/av1/encoder/svc_layercontext.h
index a1dff40..310d08a 100644
--- a/av1/encoder/svc_layercontext.h
+++ b/av1/encoder/svc_layercontext.h
@@ -49,11 +49,6 @@
*/
int8_t *map;
/*!
- * Segmentation map for last coded quantization paramters.
- */
- uint8_t *last_coded_q_map;
-
- /*!
* Number of blocks on segment 1
*/
int actual_num_seg1_blocks;
@@ -110,6 +105,7 @@
/*!\cond */
int ref_idx[INTER_REFS_PER_FRAME];
int refresh[REF_FRAMES];
+ int gld_idx_1layer;
double base_framerate;
unsigned int current_superframe;
unsigned int buffer_time_index[REF_FRAMES];
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index d0648df..b229d43 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -861,13 +861,13 @@
}
}
tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
- accum, count, &cpi->ppi->alt_ref_buffer);
+ accum, count, tf_ctx->output_frame);
if (check_show_existing) {
const int y_height = mb_height >> mbd->plane[0].subsampling_y;
const int y_width = mb_width >> mbd->plane[0].subsampling_x;
const int source_y_stride = frame_to_filter->y_stride;
- const int filter_y_stride = cpi->ppi->alt_ref_buffer.y_stride;
+ const int filter_y_stride = tf_ctx->output_frame->y_stride;
const int source_offset =
mb_row * y_height * source_y_stride + mb_col * y_width;
const int filter_offset =
@@ -875,7 +875,7 @@
unsigned int sse = 0;
cpi->ppi->fn_ptr[block_size].vf(
frame_to_filter->y_buffer + source_offset, source_y_stride,
- cpi->ppi->alt_ref_buffer.y_buffer + filter_offset, filter_y_stride,
+ tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
&sse);
diff->sum += sse;
diff->sse += sse * (int64_t)sse;
@@ -1130,12 +1130,14 @@
// Nothing will be returned. But the contents of cpi->tf_ctx will be modified.
static void init_tf_ctx(AV1_COMP *cpi, int filter_frame_lookahead_idx,
int is_second_arf, FRAME_UPDATE_TYPE update_type,
- int is_forward_keyframe) {
+ int is_forward_keyframe,
+ YV12_BUFFER_CONFIG *output_frame) {
TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
// Setup frame buffer for filtering.
YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
tf_ctx->num_frames = 0;
tf_ctx->filter_frame_idx = -1;
+ tf_ctx->output_frame = output_frame;
tf_setup_filtering_buffer(cpi, filter_frame_lookahead_idx, is_second_arf,
update_type, is_forward_keyframe);
assert(tf_ctx->num_frames > 0);
@@ -1184,7 +1186,8 @@
int av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
FRAME_UPDATE_TYPE update_type, int is_forward_keyframe,
- int *show_existing_arf) {
+ int *show_existing_arf,
+ YV12_BUFFER_CONFIG *output_frame) {
MultiThreadInfo *const mt_info = &cpi->mt_info;
// Basic informaton of the current frame.
const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
@@ -1211,7 +1214,7 @@
// Initialize temporal filter context structure.
init_tf_ctx(cpi, filter_frame_lookahead_idx, is_second_arf, update_type,
- is_forward_keyframe);
+ is_forward_keyframe, output_frame);
// Set showable frame.
if (is_forward_keyframe == 0 && update_type != KF_UPDATE) {
diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index 3b95637..bc9ff5c 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h
@@ -89,6 +89,12 @@
* Number of frames in the frame buffer.
*/
int num_frames;
+
+ /*!
+ * Output filtered frame
+ */
+ YV12_BUFFER_CONFIG *output_frame;
+
/*!
* Index of the frame to be filtered.
*/
@@ -207,21 +213,22 @@
* -15 means to filter the 17-th frame, which is a key frame in one-pass mode.
*
* \ingroup src_frame_proc
- * \param[in] cpi Top level encoder instance structure
- * \param[in] filter_frame_lookahead_idx The index of the to-filter frame in
- * the lookahead buffer cpi->lookahead.
- * \param[in] update_type This frame's update type.
- * \param[in] is_forward_keyframe Indicate whether this is a forward
- * keyframe.
- * \param[in,out] show_existing_arf Whether to show existing ARF. This
- * field is updated in this function.
+ * \param[in] cpi Top level encoder instance
+ * structure \param[in] filter_frame_lookahead_idx The index of the
+ * to-filter frame in the lookahead buffer cpi->lookahead. \param[in]
+ * update_type This frame's update type. \param[in]
+ * is_forward_keyframe Indicate whether this is a forward keyframe.
+ * \param[in,out] show_existing_arf Whether to show existing ARF. This
+ * field is updated in this function.
+ * \param[out] output_frame Ouput filtered frame.
*
* \return Whether temporal filtering is successfully done.
*/
int av1_temporal_filter(struct AV1_COMP *cpi,
const int filter_frame_lookahead_idx,
FRAME_UPDATE_TYPE update_type, int is_forward_keyframe,
- int *show_existing_arf);
+ int *show_existing_arf,
+ YV12_BUFFER_CONFIG *output_frame);
/*!\cond */
// Helper function to get `q` used for encoding.
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 70817d6..468dfe6 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -314,7 +314,8 @@
tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x,
const YV12_BUFFER_CONFIG *ref_frame_ptr[2], uint8_t *rec_buffer_pool[3],
const int rec_stride_pool[3], TX_SIZE tx_size, PREDICTION_MODE best_mode,
- int mi_row, int mi_col, int use_y_only_rate_distortion) {
+ int mi_row, int mi_col, int use_y_only_rate_distortion,
+ TplTxfmStats *tpl_txfm_stats) {
const SequenceHeader *seq_params = cm->seq_params;
*rate_cost = 0;
*recon_error = 1;
@@ -401,6 +402,11 @@
block_size_high[bsize_plane], max_txsize_rect_lookup[bsize_plane],
&this_rate, &this_recon_error, &sse);
+ if (plane == 0 && tpl_txfm_stats) {
+ // We only collect Y plane's transform coefficient
+ av1_record_tpl_txfm_block(tpl_txfm_stats, coeff);
+ }
+
*recon_error += this_recon_error;
*pred_error += sse;
*rate_cost += this_rate;
@@ -763,7 +769,7 @@
get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
- use_y_only_rate_distortion);
+ use_y_only_rate_distortion, NULL);
tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
}
@@ -790,9 +796,7 @@
get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
- use_y_only_rate_distortion);
-
- av1_record_tpl_txfm_block(tpl_txfm_stats, coeff);
+ use_y_only_rate_distortion, tpl_txfm_stats);
tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
@@ -812,7 +816,7 @@
get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
- use_y_only_rate_distortion);
+ use_y_only_rate_distortion, NULL);
tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->cmp_recrf_rate[0] = rate_cost << TPL_DEP_COST_SCALE_LOG2;
@@ -833,7 +837,7 @@
get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
- use_y_only_rate_distortion);
+ use_y_only_rate_distortion, NULL);
tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->cmp_recrf_rate[1] = rate_cost << TPL_DEP_COST_SCALE_LOG2;
@@ -1960,25 +1964,26 @@
int av1_q_mode_estimate_base_q(const GF_GROUP *gf_group,
const TplTxfmStats *txfm_stats_list,
const int *stats_valid_list, double bit_budget,
- int gf_frame_index, double arf_qstep_ratio,
- aom_bit_depth_t bit_depth, double scale_factor,
+ int gf_frame_index, aom_bit_depth_t bit_depth,
+ double scale_factor,
+ const double *qstep_ratio_list,
int *q_index_list,
double *estimated_bitrate_byframe) {
int q_max = 255; // Maximum q value.
int q_min = 0; // Minimum q value.
int q = (q_max + q_min) / 2;
- av1_q_mode_compute_gop_q_indices(gf_frame_index, q_max, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, q_max, qstep_ratio_list,
bit_depth, gf_group, q_index_list);
double q_max_estimate = av1_estimate_gop_bitrate(
q_index_list, gf_group->size, txfm_stats_list, stats_valid_list, NULL);
- av1_q_mode_compute_gop_q_indices(gf_frame_index, q_min, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, q_min, qstep_ratio_list,
bit_depth, gf_group, q_index_list);
double q_min_estimate = av1_estimate_gop_bitrate(
q_index_list, gf_group->size, txfm_stats_list, stats_valid_list, NULL);
while (true) {
- av1_q_mode_compute_gop_q_indices(gf_frame_index, q, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, q, qstep_ratio_list,
bit_depth, gf_group, q_index_list);
double estimate = av1_estimate_gop_bitrate(
@@ -2009,7 +2014,7 @@
}
// Update q_index_list and vbr_rc_info.
- av1_q_mode_compute_gop_q_indices(gf_frame_index, q, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, q, qstep_ratio_list,
bit_depth, gf_group, q_index_list);
av1_estimate_gop_bitrate(q_index_list, gf_group->size, txfm_stats_list,
stats_valid_list, estimated_bitrate_byframe);
@@ -2017,6 +2022,10 @@
}
double av1_tpl_get_qstep_ratio(const TplParams *tpl_data, int gf_frame_index) {
+ if (!av1_tpl_stats_ready(tpl_data, gf_frame_index)) {
+ return 1;
+ }
+
const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_frame_index];
const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
@@ -2069,9 +2078,11 @@
if (gf_frame_index == 0) {
vbr_rc_info->q_index_list_ready = 1;
double gop_bit_budget = vbr_rc_info->gop_bit_budget;
- // Use the gop_bit_budget to determine q_index_list.
- const double arf_qstep_ratio =
- av1_tpl_get_qstep_ratio(tpl_data, gf_frame_index);
+
+ for (int i = gf_frame_index; i < gf_group->size; i++) {
+ vbr_rc_info->qstep_ratio_list[i] = av1_tpl_get_qstep_ratio(tpl_data, i);
+ }
+
// We update the q indices in vbr_rc_info in vbr_rc_info->q_index_list
// rather than gf_group->q_val to avoid conflicts with the existing code.
int stats_valid_list[MAX_LENGTH_TPL_FRAME_STATS] = { 0 };
@@ -2087,10 +2098,17 @@
double scale_factor =
vbr_rc_info->scale_factors[gf_group->update_type[gf_frame_index]];
- av1_q_mode_estimate_base_q(
+ vbr_rc_info->base_q_index = av1_q_mode_estimate_base_q(
gf_group, tpl_data->txfm_stats_list, stats_valid_list, gop_bit_budget,
- gf_frame_index, arf_qstep_ratio, bit_depth, scale_factor,
+ gf_frame_index, bit_depth, scale_factor, vbr_rc_info->qstep_ratio_list,
vbr_rc_info->q_index_list, vbr_rc_info->estimated_bitrate_byframe);
+ } else if (gf_frame_index == 1) {
+ for (int i = gf_frame_index; i < gf_group->size; i++) {
+ vbr_rc_info->qstep_ratio_list[i] = av1_tpl_get_qstep_ratio(tpl_data, i);
+ }
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, vbr_rc_info->base_q_index,
+ vbr_rc_info->qstep_ratio_list, bit_depth,
+ gf_group, vbr_rc_info->q_index_list);
}
}
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 718d447..1cd6a6d 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -245,6 +245,7 @@
// MV entropy estimation
// === Below this line are GOP related data that will be updated per GOP ===
+ int base_q_index; // Stores the base q index.
int q_index_list_ready;
int q_index_list[MAX_LENGTH_TPL_FRAME_STATS]; // q indices for the current
// GOP
@@ -254,6 +255,9 @@
int actual_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
int actual_mv_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
int actual_coeff_bitrate_byframe[MAX_LENGTH_TPL_FRAME_STATS];
+
+ // Array to store qstep_ratio for each frame in a GOP
+ double qstep_ratio_list[MAX_LENGTH_TPL_FRAME_STATS];
} VBR_RATECTRL_INFO;
static INLINE void vbr_rc_reset_gop_data(VBR_RATECTRL_INFO *vbr_rc_info) {
@@ -271,10 +275,10 @@
vbr_rc_info->total_bit_budget = total_bit_budget;
vbr_rc_info->show_frame_count = show_frame_count;
vbr_rc_info->keyframe_bitrate = 0;
- const double scale_factors[FRAME_UPDATE_TYPES] = { 1.2, 1.2, 1.2, 1.2,
- 1.2, 1.2, 1.2 };
- const double mv_scale_factors[FRAME_UPDATE_TYPES] = { 5.0, 5.0, 5.0, 5.0,
- 5.0, 5.0, 5.0 };
+ const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.12040, 1,
+ 1.10199, 1, 1,
+ 0.16393 };
+ const double mv_scale_factors[FRAME_UPDATE_TYPES] = { 3, 3, 3, 3, 3, 3, 3 };
memcpy(vbr_rc_info->scale_factors, scale_factors,
sizeof(scale_factors[0]) * FRAME_UPDATE_TYPES);
memcpy(vbr_rc_info->mv_scale_factors, mv_scale_factors,
@@ -298,7 +302,7 @@
static INLINE void vbr_rc_info_log(const VBR_RATECTRL_INFO *vbr_rc_info,
int gf_frame_index, int gf_group_size,
- int *update_type) {
+ FRAME_UPDATE_TYPE *update_type) {
// Add +2 here because this is the last frame this method is called at.
if (gf_frame_index + 2 >= gf_group_size) {
printf(
@@ -550,10 +554,10 @@
* exists
* \param[in] bit_budget The specified bit budget to achieve
* \param[in] gf_frame_index current frame in the GOP
- * \param[in] arf_qstep_ratio ARF q step ratio
* \param[in] bit_depth bit depth
* \param[in] scale_factor Scale factor to improve budget estimation
- * \param[out] q_index_list array of q_index, one per frame
+ * \param[in] qstep_ratio_list Stores the qstep_ratio for each frame
+ * \param[out] q_index_list array of q_index, one per frame
* \param[out] estimated_bitrate_byframe bits usage per frame in the GOP
*
* \return Returns the optimal base q index to use.
@@ -561,8 +565,9 @@
int av1_q_mode_estimate_base_q(const struct GF_GROUP *gf_group,
const TplTxfmStats *txfm_stats_list,
const int *stats_valid_list, double bit_budget,
- int gf_frame_index, double arf_qstep_ratio,
- aom_bit_depth_t bit_depth, double scale_factor,
+ int gf_frame_index, aom_bit_depth_t bit_depth,
+ double scale_factor,
+ const double *qstep_ratio_list,
int *q_index_list,
double *estimated_bitrate_byframe);
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 0e6e647..a3ac0d9 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -49,11 +49,6 @@
int8_t children[4];
} RD_RECORD_IDX_NODE;
-typedef struct tx_size_rd_info_node {
- TXB_RD_INFO *rd_info_array; // Points to array of size TX_TYPES.
- struct tx_size_rd_info_node *children[4];
-} TXB_RD_INFO_NODE;
-
// origin_threshold * 128 / 100
static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
{
@@ -87,256 +82,6 @@
12, 12, 23, 23, 32, 32, 8,
8, 16, 16, 23, 23 };
-static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record,
- const uint32_t hash) {
- // Linear search through the circular buffer to find matching hash.
- for (int i = cur_record->index_start - 1; i >= 0; i--) {
- if (cur_record->hash_vals[i] == hash) return i;
- }
- for (int i = cur_record->num - 1; i >= cur_record->index_start; i--) {
- if (cur_record->hash_vals[i] == hash) return i;
- }
- int index;
- // If not found - add new RD info into the buffer and return its index
- if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
- index = (cur_record->index_start + cur_record->num) %
- TX_SIZE_RD_RECORD_BUFFER_LEN;
- cur_record->num++;
- } else {
- index = cur_record->index_start;
- cur_record->index_start =
- (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
- }
-
- cur_record->hash_vals[index] = hash;
- av1_zero(cur_record->tx_rd_info[index]);
- return index;
-}
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x8[] = {
- { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_8x16[] = {
- { 0, { 1, 2, -1, -1 } },
- { 1, { 0, 0, 0, 0 } },
- { 1, { 0, 0, 0, 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x8[] = {
- { 0, { 1, 2, -1, -1 } },
- { 1, { 0 } },
- { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_16x16[] = {
- { 0, { 1, 2, 3, 4 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_2[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, 5, 6 } },
- { 0, { 7, 8, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_2_1[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, 7, 8 } },
- { 0, { 5, 6, 9, 10 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_sqr[] = {
- { 0, { 1, 2, 3, 4 } }, { 0, { 5, 6, 9, 10 } }, { 0, { 7, 8, 11, 12 } },
- { 0, { 13, 14, 17, 18 } }, { 0, { 15, 16, 19, 20 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_64x128[] = {
- { 0, { 2, 3, 4, 5 } }, { 0, { 6, 7, 8, 9 } },
- { 0, { 10, 11, 14, 15 } }, { 0, { 12, 13, 16, 17 } },
- { 0, { 18, 19, 22, 23 } }, { 0, { 20, 21, 24, 25 } },
- { 0, { 26, 27, 30, 31 } }, { 0, { 28, 29, 32, 33 } },
- { 0, { 34, 35, 38, 39 } }, { 0, { 36, 37, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x64[] = {
- { 0, { 2, 3, 6, 7 } }, { 0, { 4, 5, 8, 9 } },
- { 0, { 10, 11, 18, 19 } }, { 0, { 12, 13, 20, 21 } },
- { 0, { 14, 15, 22, 23 } }, { 0, { 16, 17, 24, 25 } },
- { 0, { 26, 27, 34, 35 } }, { 0, { 28, 29, 36, 37 } },
- { 0, { 30, 31, 38, 39 } }, { 0, { 32, 33, 40, 41 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_128x128[] = {
- { 0, { 4, 5, 8, 9 } }, { 0, { 6, 7, 10, 11 } },
- { 0, { 12, 13, 16, 17 } }, { 0, { 14, 15, 18, 19 } },
- { 0, { 20, 21, 28, 29 } }, { 0, { 22, 23, 30, 31 } },
- { 0, { 24, 25, 32, 33 } }, { 0, { 26, 27, 34, 35 } },
- { 0, { 36, 37, 44, 45 } }, { 0, { 38, 39, 46, 47 } },
- { 0, { 40, 41, 48, 49 } }, { 0, { 42, 43, 50, 51 } },
- { 0, { 52, 53, 60, 61 } }, { 0, { 54, 55, 62, 63 } },
- { 0, { 56, 57, 64, 65 } }, { 0, { 58, 59, 66, 67 } },
- { 0, { 68, 69, 76, 77 } }, { 0, { 70, 71, 78, 79 } },
- { 0, { 72, 73, 80, 81 } }, { 0, { 74, 75, 82, 83 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_1_4[] = {
- { 0, { 1, -1, 2, -1 } },
- { 0, { 3, 4, -1, -1 } },
- { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE rd_record_tree_4_1[] = {
- { 0, { 1, 2, -1, -1 } },
- { 0, { 3, 4, -1, -1 } },
- { 0, { 5, 6, -1, -1 } },
-};
-
-static const RD_RECORD_IDX_NODE *rd_record_tree[BLOCK_SIZES_ALL] = {
- NULL, // BLOCK_4X4
- NULL, // BLOCK_4X8
- NULL, // BLOCK_8X4
- rd_record_tree_8x8, // BLOCK_8X8
- rd_record_tree_8x16, // BLOCK_8X16
- rd_record_tree_16x8, // BLOCK_16X8
- rd_record_tree_16x16, // BLOCK_16X16
- rd_record_tree_1_2, // BLOCK_16X32
- rd_record_tree_2_1, // BLOCK_32X16
- rd_record_tree_sqr, // BLOCK_32X32
- rd_record_tree_1_2, // BLOCK_32X64
- rd_record_tree_2_1, // BLOCK_64X32
- rd_record_tree_sqr, // BLOCK_64X64
- rd_record_tree_64x128, // BLOCK_64X128
- rd_record_tree_128x64, // BLOCK_128X64
- rd_record_tree_128x128, // BLOCK_128X128
- NULL, // BLOCK_4X16
- NULL, // BLOCK_16X4
- rd_record_tree_1_4, // BLOCK_8X32
- rd_record_tree_4_1, // BLOCK_32X8
- rd_record_tree_1_4, // BLOCK_16X64
- rd_record_tree_4_1, // BLOCK_64X16
-};
-
-static const int rd_record_tree_size[BLOCK_SIZES_ALL] = {
- 0, // BLOCK_4X4
- 0, // BLOCK_4X8
- 0, // BLOCK_8X4
- sizeof(rd_record_tree_8x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X8
- sizeof(rd_record_tree_8x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X16
- sizeof(rd_record_tree_16x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X8
- sizeof(rd_record_tree_16x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X16
- sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X32
- sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X16
- sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X32
- sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X64
- sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X32
- sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X64
- sizeof(rd_record_tree_64x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X128
- sizeof(rd_record_tree_128x64) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X64
- sizeof(rd_record_tree_128x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X128
- 0, // BLOCK_4X16
- 0, // BLOCK_16X4
- sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X32
- sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X8
- sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X64
- sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X16
-};
-
-static INLINE void init_rd_record_tree(TXB_RD_INFO_NODE *tree,
- BLOCK_SIZE bsize) {
- const RD_RECORD_IDX_NODE *rd_record = rd_record_tree[bsize];
- const int size = rd_record_tree_size[bsize];
- for (int i = 0; i < size; ++i) {
- if (rd_record[i].leaf) {
- av1_zero(tree[i].children);
- } else {
- for (int j = 0; j < 4; ++j) {
- const int8_t idx = rd_record[i].children[j];
- tree[i].children[j] = idx > 0 ? &tree[idx] : NULL;
- }
- }
- }
-}
-
-// Go through all TX blocks that could be used in TX size search, compute
-// residual hash values for them and find matching RD info that stores previous
-// RD search results for these TX blocks. The idea is to prevent repeated
-// rate/distortion computations that happen because of the combination of
-// partition and TX size search. The resulting RD info records are returned in
-// the form of a quadtree for easier access in actual TX size search.
-static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize,
- TXB_RD_INFO_NODE *dst_rd_info) {
- TxfmSearchInfo *txfm_info = &x->txfm_search_info;
- TXB_RD_RECORD *rd_records_table[4] = {
- txfm_info->txb_rd_records->txb_rd_record_8X8,
- txfm_info->txb_rd_records->txb_rd_record_16X16,
- txfm_info->txb_rd_records->txb_rd_record_32X32,
- txfm_info->txb_rd_records->txb_rd_record_64X64
- };
- const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
-
- // Hashing is performed only for square TX sizes larger than TX_4X4
- if (max_square_tx_size < TX_8X8) return 0;
- const int diff_stride = bw;
- const struct macroblock_plane *const p = &x->plane[0];
- const int16_t *diff = &p->src_diff[0];
- init_rd_record_tree(dst_rd_info, bsize);
- // Coordinates of the top-left corner of current block within the superblock
- // measured in pixels:
- const int mi_row = x->e_mbd.mi_row;
- const int mi_col = x->e_mbd.mi_col;
- const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
- const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
- int cur_rd_info_idx = 0;
- int cur_tx_depth = 0;
- TX_SIZE cur_tx_size = max_txsize_rect_lookup[bsize];
- while (cur_tx_depth <= MAX_VARTX_DEPTH) {
- const int cur_tx_bw = tx_size_wide[cur_tx_size];
- const int cur_tx_bh = tx_size_high[cur_tx_size];
- if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
- const TX_SIZE next_tx_size = sub_tx_size_map[cur_tx_size];
- const int tx_size_idx = cur_tx_size - TX_8X8;
- for (int row = 0; row < bh; row += cur_tx_bh) {
- for (int col = 0; col < bw; col += cur_tx_bw) {
- if (cur_tx_bw != cur_tx_bh) {
- // Use dummy nodes for all rectangular transforms within the
- // TX size search tree.
- dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
- } else {
- // Get spatial location of this TX block within the superblock
- // (measured in cur_tx_bsize units).
- const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
- const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
-
- int16_t hash_data[MAX_SB_SQUARE];
- int16_t *cur_hash_row = hash_data;
- const int16_t *cur_diff_row = diff + row * diff_stride + col;
- for (int i = 0; i < cur_tx_bh; i++) {
- memcpy(cur_hash_row, cur_diff_row, sizeof(*hash_data) * cur_tx_bw);
- cur_hash_row += cur_tx_bw;
- cur_diff_row += diff_stride;
- }
- const int hash = av1_get_crc32c_value(
- &txfm_info->txb_rd_records->mb_rd_record.crc_calculator,
- (uint8_t *)hash_data, 2 * cur_tx_bw * cur_tx_bh);
- // Find corresponding RD info based on the hash value.
- const int record_idx =
- row_in_sb * (MAX_MIB_SIZE >> (tx_size_idx + 1)) + col_in_sb;
- TXB_RD_RECORD *records = &rd_records_table[tx_size_idx][record_idx];
- int idx = find_tx_size_rd_info(records, hash);
- dst_rd_info[cur_rd_info_idx].rd_info_array =
- &records->tx_rd_info[idx];
- }
- ++cur_rd_info_idx;
- }
- }
- cur_tx_size = next_tx_size;
- ++cur_tx_depth;
- }
- return 1;
-}
-
static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
const int rows = block_size_high[bsize];
const int cols = block_size_wide[bsize];
@@ -595,8 +340,7 @@
TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
RD_STATS *rd_stats, int64_t prev_level_rd, int64_t ref_best_rd,
- int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
- TXB_RD_INFO_NODE *rd_info_node);
+ int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode);
// NOTE: CONFIG_COLLECT_RD_STATS has 3 possible values
// 0: Do not collect any RD stats
@@ -1277,66 +1021,10 @@
blk_row, blk_col, plane_bsize, tx_bsize);
}
-static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row,
- int blk_col, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size) {
- int16_t tmp_data[64 * 64];
- const int diff_stride = block_size_wide[plane_bsize];
- const int16_t *diff = x->plane[plane].src_diff;
- const int16_t *cur_diff_row = diff + 4 * blk_row * diff_stride + 4 * blk_col;
- const int txb_w = tx_size_wide[tx_size];
- const int txb_h = tx_size_high[tx_size];
- uint8_t *hash_data = (uint8_t *)cur_diff_row;
- if (txb_w != diff_stride) {
- int16_t *cur_hash_row = tmp_data;
- for (int i = 0; i < txb_h; i++) {
- memcpy(cur_hash_row, cur_diff_row, sizeof(*diff) * txb_w);
- cur_hash_row += txb_w;
- cur_diff_row += diff_stride;
- }
- hash_data = (uint8_t *)tmp_data;
- }
- CRC32C *crc =
- &x->txfm_search_info.txb_rd_records->mb_rd_record.crc_calculator;
- const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h);
- return (hash << 5) + tx_size;
-}
-
// pruning thresholds for prune_txk_type and prune_txk_type_separ
static const int prune_factors[5] = { 200, 200, 120, 80, 40 }; // scale 1000
static const int mul_factors[5] = { 80, 80, 70, 50, 30 }; // scale 100
-static INLINE int is_intra_hash_match(const AV1_COMP *cpi, MACROBLOCK *x,
- int plane, int blk_row, int blk_col,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- const TXB_CTX *const txb_ctx,
- TXB_RD_INFO **intra_txb_rd_info,
- const int tx_type_map_idx,
- uint16_t *cur_joint_ctx) {
- MACROBLOCKD *xd = &x->e_mbd;
- TxfmSearchInfo *txfm_info = &x->txfm_search_info;
- assert(cpi->sf.tx_sf.use_intra_txb_hash &&
- frame_is_intra_only(&cpi->common) && !is_inter_block(xd->mi[0]) &&
- plane == 0 && tx_size_wide[tx_size] == tx_size_high[tx_size]);
- const uint32_t intra_hash =
- get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size);
- const int intra_hash_idx = find_tx_size_rd_info(
- &txfm_info->txb_rd_records->txb_rd_record_intra, intra_hash);
- *intra_txb_rd_info = &txfm_info->txb_rd_records->txb_rd_record_intra
- .tx_rd_info[intra_hash_idx];
- *cur_joint_ctx = (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
- if ((*intra_txb_rd_info)->entropy_context == *cur_joint_ctx &&
- txfm_info->txb_rd_records->txb_rd_record_intra.tx_rd_info[intra_hash_idx]
- .valid) {
- xd->tx_type_map[tx_type_map_idx] = (*intra_txb_rd_info)->tx_type;
- const TX_TYPE ref_tx_type =
- av1_get_tx_type(xd, get_plane_type(plane), blk_row, blk_col, tx_size,
- cpi->common.features.reduced_tx_set_used);
- return (ref_tx_type == (*intra_txb_rd_info)->tx_type);
- }
- return 0;
-}
-
// R-D costs are sorted in ascending order.
static INLINE void sort_rd(int64_t rds[], int txk[], int len) {
int i, j, k;
@@ -1398,6 +1086,7 @@
int reduced_tx_set_used, int64_t ref_best_rd,
int num_sel) {
const AV1_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
int idx;
@@ -1439,6 +1128,9 @@
tx_type = idx_map[idx];
txfm_param.tx_type = tx_type;
+ av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type,
+ &quant_param);
+
av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param,
&quant_param);
@@ -1469,6 +1161,9 @@
tx_type = idx_map_v[idx_v[idx] * 4];
txfm_param.tx_type = tx_type;
+ av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type,
+ &quant_param);
+
av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param,
&quant_param);
@@ -1529,6 +1224,7 @@
uint16_t allowed_tx_mask, int prune_factor,
const TXB_CTX *const txb_ctx, int reduced_tx_set_used) {
const AV1_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
int tx_type;
int64_t rds[TX_TYPES];
@@ -1553,6 +1249,9 @@
}
txfm_param.tx_type = tx_type;
+ av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type,
+ &quant_param);
+
// do txfm and quantization
av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param,
&quant_param);
@@ -2283,45 +1982,6 @@
skip_trellis |= !is_trellis_used(cpi->optimize_seg_arr[xd->mi[0]->segment_id],
DRY_RUN_NORMAL);
- // Hashing based speed feature for intra block. If the hash of the residue
- // is found in the hash table, use the previous RD search results stored in
- // the table and terminate early.
- TXB_RD_INFO *intra_txb_rd_info = NULL;
- uint16_t cur_joint_ctx = 0;
- const int is_inter = is_inter_block(mbmi);
- const int use_intra_txb_hash =
- cpi->sf.tx_sf.use_intra_txb_hash && frame_is_intra_only(cm) &&
- !is_inter && plane == 0 && tx_size_wide[tx_size] == tx_size_high[tx_size];
- if (use_intra_txb_hash) {
- const int mi_row = xd->mi_row;
- const int mi_col = xd->mi_col;
- const int within_border =
- mi_row >= xd->tile.mi_row_start &&
- (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) &&
- mi_col >= xd->tile.mi_col_start &&
- (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end);
- if (within_border &&
- is_intra_hash_match(cpi, x, plane, blk_row, blk_col, plane_bsize,
- tx_size, txb_ctx, &intra_txb_rd_info,
- tx_type_map_idx, &cur_joint_ctx)) {
- best_rd_stats->rate = intra_txb_rd_info->rate;
- best_rd_stats->dist = intra_txb_rd_info->dist;
- best_rd_stats->sse = intra_txb_rd_info->sse;
- best_rd_stats->skip_txfm = intra_txb_rd_info->eob == 0;
- x->plane[plane].eobs[block] = intra_txb_rd_info->eob;
- x->plane[plane].txb_entropy_ctx[block] =
- intra_txb_rd_info->txb_entropy_ctx;
- best_eob = intra_txb_rd_info->eob;
- best_tx_type = intra_txb_rd_info->tx_type;
- skip_trellis |= !intra_txb_rd_info->perform_block_coeff_opt;
- update_txk_array(xd, blk_row, blk_col, tx_size, best_tx_type);
- recon_intra(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
- txb_ctx, skip_trellis, best_tx_type, 1, &rate_cost, best_eob);
- p->dqcoeff = orig_dqcoeff;
- return;
- }
- }
-
uint8_t best_txb_ctx = 0;
// txk_allowed = TX_TYPES: >1 tx types are allowed
// txk_allowed < TX_TYPES: only that specific tx type is allowed.
@@ -2595,18 +2255,6 @@
best_rd_stats->sse = block_sse;
}
- if (intra_txb_rd_info != NULL) {
- intra_txb_rd_info->valid = 1;
- intra_txb_rd_info->entropy_context = cur_joint_ctx;
- intra_txb_rd_info->rate = best_rd_stats->rate;
- intra_txb_rd_info->dist = best_rd_stats->dist;
- intra_txb_rd_info->sse = best_rd_stats->sse;
- intra_txb_rd_info->eob = best_eob;
- intra_txb_rd_info->txb_entropy_ctx = best_txb_ctx;
- intra_txb_rd_info->perform_block_coeff_opt = perform_block_coeff_opt;
- if (plane == 0) intra_txb_rd_info->tx_type = best_tx_type;
- }
-
// Intra mode needs decoded pixels such that the next transform block
// can use them for prediction.
recon_intra(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
@@ -2621,51 +2269,13 @@
int block, int plane_bsize, TXB_CTX *txb_ctx,
RD_STATS *rd_stats,
FAST_TX_SEARCH_MODE ftxs_mode,
- int64_t ref_rdcost,
- TXB_RD_INFO *rd_info_array) {
- const struct macroblock_plane *const p = &x->plane[0];
- const uint16_t cur_joint_ctx =
- (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx;
- MACROBLOCKD *xd = &x->e_mbd;
- assert(is_inter_block(xd->mi[0]));
- const int tx_type_map_idx = blk_row * xd->tx_type_map_stride + blk_col;
- // Look up RD and terminate early in case when we've already processed exactly
- // the same residue with exactly the same entropy context.
- if (rd_info_array != NULL && rd_info_array->valid &&
- rd_info_array->entropy_context == cur_joint_ctx) {
- xd->tx_type_map[tx_type_map_idx] = rd_info_array->tx_type;
- const TX_TYPE ref_tx_type =
- av1_get_tx_type(&x->e_mbd, get_plane_type(0), blk_row, blk_col, tx_size,
- cpi->common.features.reduced_tx_set_used);
- if (ref_tx_type == rd_info_array->tx_type) {
- rd_stats->rate += rd_info_array->rate;
- rd_stats->dist += rd_info_array->dist;
- rd_stats->sse += rd_info_array->sse;
- rd_stats->skip_txfm &= rd_info_array->eob == 0;
- p->eobs[block] = rd_info_array->eob;
- p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx;
- return;
- }
- }
-
+ int64_t ref_rdcost) {
RD_STATS this_rd_stats;
const int skip_trellis = 0;
search_tx_type(cpi, x, 0, block, blk_row, blk_col, plane_bsize, tx_size,
txb_ctx, ftxs_mode, skip_trellis, ref_rdcost, &this_rd_stats);
av1_merge_rd_stats(rd_stats, &this_rd_stats);
-
- // Save RD results for possible reuse in future.
- if (rd_info_array != NULL) {
- rd_info_array->valid = 1;
- rd_info_array->entropy_context = cur_joint_ctx;
- rd_info_array->rate = this_rd_stats.rate;
- rd_info_array->dist = this_rd_stats.dist;
- rd_info_array->sse = this_rd_stats.sse;
- rd_info_array->eob = p->eobs[block];
- rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block];
- rd_info_array->tx_type = xd->tx_type_map[tx_type_map_idx];
- }
}
static AOM_INLINE void try_tx_block_no_split(
@@ -2673,8 +2283,7 @@
TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl,
int txfm_partition_ctx, RD_STATS *rd_stats, int64_t ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
- TxCandidateInfo *no_split) {
+ FAST_TX_SEARCH_MODE ftxs_mode, TxCandidateInfo *no_split) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
struct macroblock_plane *const p = &x->plane[0];
@@ -2690,8 +2299,7 @@
const int index = av1_get_txb_size_index(plane_bsize, blk_row, blk_col);
mbmi->inter_tx_size[index] = tx_size;
tx_type_rd(cpi, x, tx_size, blk_row, blk_col, block, plane_bsize, &txb_ctx,
- rd_stats, ftxs_mode, ref_best_rd,
- rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
+ rd_stats, ftxs_mode, ref_best_rd);
assert(rd_stats->rate < INT_MAX);
const int pick_skip_txfm =
@@ -2726,8 +2334,7 @@
TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
int txfm_partition_ctx, int64_t no_split_rd, int64_t ref_best_rd,
- FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node,
- RD_STATS *split_rd_stats) {
+ FAST_TX_SEARCH_MODE ftxs_mode, RD_STATS *split_rd_stats) {
assert(tx_size < TX_SIZES_ALL);
MACROBLOCKD *const xd = &x->e_mbd;
const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
@@ -2755,11 +2362,10 @@
RD_STATS this_rd_stats;
int this_cost_valid = 1;
- select_tx_block(
- cpi, x, offsetr, offsetc, block, sub_txs, depth + 1, plane_bsize, ta,
- tl, tx_above, tx_left, &this_rd_stats, no_split_rd / nblks,
- ref_best_rd - split_rd_stats->rdcost, &this_cost_valid, ftxs_mode,
- (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
+ select_tx_block(cpi, x, offsetr, offsetc, block, sub_txs, depth + 1,
+ plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats,
+ no_split_rd / nblks, ref_best_rd - split_rd_stats->rdcost,
+ &this_cost_valid, ftxs_mode);
if (!this_cost_valid) {
split_rd_stats->rdcost = INT64_MAX;
return;
@@ -2874,8 +2480,7 @@
TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
RD_STATS *rd_stats, int64_t prev_level_rd, int64_t ref_best_rd,
- int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode,
- TXB_RD_INFO_NODE *rd_info_node) {
+ int *is_cost_valid, FAST_TX_SEARCH_MODE ftxs_mode) {
assert(tx_size < TX_SIZES_ALL);
av1_init_rd_stats(rd_stats);
if (ref_best_rd < 0) {
@@ -2914,7 +2519,7 @@
if (try_no_split) {
try_tx_block_no_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
plane_bsize, ta, tl, ctx, rd_stats, ref_best_rd,
- ftxs_mode, rd_info_node, &no_split);
+ ftxs_mode, &no_split);
// Speed features for early termination.
const int search_level = cpi->sf.tx_sf.adaptive_txb_search_level;
@@ -2950,7 +2555,7 @@
try_tx_block_split(cpi, x, blk_row, blk_col, block, tx_size, depth,
plane_bsize, ta, tl, tx_above, tx_left, ctx, no_split.rd,
AOMMIN(no_split.rd, ref_best_rd), ftxs_mode,
- rd_info_node, &split_rd_stats);
+ &split_rd_stats);
}
if (no_split.rd < split_rd_stats.rdcost) {
@@ -3457,7 +3062,7 @@
.txb_skip_cost[txb_ctx.txb_skip_ctx][1];
rd_stats->zero_rate = zero_blk_rate;
tx_type_rd(cpi, x, tx_size, blk_row, blk_col, block, plane_bsize, &txb_ctx,
- rd_stats, ftxs_mode, ref_best_rd, NULL);
+ rd_stats, ftxs_mode, ref_best_rd);
const int mi_width = mi_size_wide[plane_bsize];
TxfmSearchInfo *txfm_info = &x->txfm_search_info;
if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
@@ -3596,8 +3201,7 @@
// will be saved in rd_stats. The returned value is the corresponding RD cost.
static int64_t select_tx_size_and_type(const AV1_COMP *cpi, MACROBLOCK *x,
RD_STATS *rd_stats, BLOCK_SIZE bsize,
- int64_t ref_best_rd,
- TXB_RD_INFO_NODE *rd_info_tree) {
+ int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
const TxfmSearchParams *txfm_params = &x->txfm_search_params;
assert(is_inter_block(xd->mi[0]));
@@ -3650,7 +3254,7 @@
// Search for the best transform block size and type for the sub-block.
select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth, bsize,
ctxa, ctxl, tx_above, tx_left, &pn_rd_stats, INT64_MAX,
- best_rd_sofar, &is_cost_valid, ftxs_mode, rd_info_tree);
+ best_rd_sofar, &is_cost_valid, ftxs_mode);
if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
av1_invalid_rd_stats(rd_stats);
return INT64_MAX;
@@ -3660,7 +3264,6 @@
no_skip_txfm_rd =
RDCOST(x->rdmult, rd_stats->rate + no_skip_txfm_cost, rd_stats->dist);
block += step;
- if (rd_info_tree != NULL) rd_info_tree += 1;
}
}
@@ -3771,19 +3374,8 @@
++x->txfm_search_info.tx_search_count;
#endif // CONFIG_SPEED_STATS
- // Pre-compute residue hashes (transform block level) and find existing or
- // add new RD records to store and reuse rate and distortion values to speed
- // up TX size/type search.
- TXB_RD_INFO_NODE matched_rd_info[4 + 16 + 64];
- int found_rd_info = 0;
- if (ref_best_rd != INT64_MAX && within_border &&
- cpi->sf.tx_sf.use_inter_txb_hash) {
- found_rd_info = find_tx_size_rd_records(x, bsize, matched_rd_info);
- }
-
const int64_t rd =
- select_tx_size_and_type(cpi, x, rd_stats, bsize, ref_best_rd,
- found_rd_info ? matched_rd_info : NULL);
+ select_tx_size_and_type(cpi, x, rd_stats, bsize, ref_best_rd);
if (rd == INT64_MAX) {
// We should always find at least one candidate unless ref_best_rd is less
diff --git a/av1/encoder/x86/av1_quantize_sse2.c b/av1/encoder/x86/av1_quantize_sse2.c
index 5497c7e..b533894 100644
--- a/av1/encoder/x86/av1_quantize_sse2.c
+++ b/av1/encoder/x86/av1_quantize_sse2.c
@@ -15,6 +15,7 @@
#include "config/av1_rtcd.h"
#include "aom/aom_integer.h"
+#include "aom_dsp/x86/quantize_x86.h"
static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset,
__m128i *c0, __m128i *c1) {
@@ -187,3 +188,102 @@
*eob_ptr = _mm_extract_epi16(eob, 1);
}
}
+
+static INLINE void quantize_lp(const int16_t *iscan_ptr,
+ const int16_t *coeff_ptr, intptr_t n_coeffs,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const __m128i *round0, const __m128i *round1,
+ const __m128i *quant0, const __m128i *quant1,
+ const __m128i *dequant0, const __m128i *dequant1,
+ __m128i *eob) {
+ const int16_t *read = coeff_ptr + n_coeffs;
+ __m128i coeff0 = _mm_load_si128((const __m128i *)read);
+ __m128i coeff1 = _mm_load_si128((const __m128i *)read + 1);
+
+ // Poor man's sign extract
+ const __m128i coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ const __m128i coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ __m128i qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ __m128i qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ qcoeff0 = _mm_adds_epi16(qcoeff0, *round0);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, *round1);
+ const __m128i qtmp0 = _mm_mulhi_epi16(qcoeff0, *quant0);
+ const __m128i qtmp1 = _mm_mulhi_epi16(qcoeff1, *quant1);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ int16_t *addr = qcoeff_ptr + n_coeffs;
+ _mm_store_si128((__m128i *)addr, qcoeff0);
+ _mm_store_si128((__m128i *)addr + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, *dequant0);
+ coeff1 = _mm_mullo_epi16(qcoeff1, *dequant1);
+
+ addr = dqcoeff_ptr + n_coeffs;
+ _mm_store_si128((__m128i *)addr, coeff0);
+ _mm_store_si128((__m128i *)addr + 1, coeff1);
+
+ const __m128i zero = _mm_setzero_si128();
+ // Scan for eob
+ const __m128i zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ const __m128i zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ const __m128i nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ const __m128i nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+
+ const __m128i iscan0 =
+ _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ const __m128i iscan1 =
+ _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+
+ // Add one to convert from indices to counts
+ const __m128i iscan0_nz = _mm_sub_epi16(iscan0, nzero_coeff0);
+ const __m128i iscan1_nz = _mm_sub_epi16(iscan1, nzero_coeff1);
+ const __m128i eob0 = _mm_and_si128(iscan0_nz, nzero_coeff0);
+ const __m128i eob1 = _mm_and_si128(iscan1_nz, nzero_coeff1);
+ const __m128i eob2 = _mm_max_epi16(eob0, eob1);
+ *eob = _mm_max_epi16(*eob, eob2);
+}
+
+void av1_quantize_lp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ (void)scan;
+ coeff_ptr += n_coeffs;
+ iscan += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+
+ // Setup global values
+ const __m128i round0 = _mm_load_si128((const __m128i *)round_ptr);
+ const __m128i round1 = _mm_unpackhi_epi64(round0, round0);
+ const __m128i quant0 = _mm_load_si128((const __m128i *)quant_ptr);
+ const __m128i quant1 = _mm_unpackhi_epi64(quant0, quant0);
+ const __m128i dequant0 = _mm_load_si128((const __m128i *)dequant_ptr);
+ const __m128i dequant1 = _mm_unpackhi_epi64(dequant0, dequant0);
+ __m128i eob = _mm_setzero_si128();
+
+ // DC and first 15 AC
+ quantize_lp(iscan, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round0,
+ &round1, &quant0, &quant1, &dequant0, &dequant1, &eob);
+ n_coeffs += 8 * 2;
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ quantize_lp(iscan, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round1,
+ &round1, &quant1, &quant1, &dequant1, &dequant1, &eob);
+ n_coeffs += 8 * 2;
+ }
+
+ // Accumulate EOB
+ *eob_ptr = accumulate_eob(eob);
+}
diff --git a/av1/encoder/x86/error_intrin_sse2.c b/av1/encoder/x86/error_intrin_sse2.c
new file mode 100644
index 0000000..e876db1
--- /dev/null
+++ b/av1/encoder/x86/error_intrin_sse2.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "config/av1_rtcd.h"
+
+#include "aom/aom_integer.h"
+
+static AOM_INLINE __m128i reduce_sum_epi64(__m128i reg) {
+ __m128i reg_hi = _mm_srli_si128(reg, 8);
+ reg = _mm_add_epi64(reg, reg_hi);
+
+ return reg;
+}
+
+int64_t av1_block_error_lp_sse2(const int16_t *coeff, const int16_t *dqcoeff,
+ intptr_t block_size) {
+ assert(block_size % 16 == 0);
+ assert(block_size >= 16);
+
+ const __m128i zero = _mm_setzero_si128();
+ __m128i accum_0 = zero;
+ __m128i accum_1 = zero;
+
+ for (int i = 0; i < block_size; i += 16) {
+ // Load 8 elements for coeff and dqcoeff.
+ const __m128i _coeff_0 = _mm_loadu_si128((const __m128i *)coeff);
+ const __m128i _coeff_1 = _mm_loadu_si128((const __m128i *)(coeff + 8));
+ const __m128i _dqcoeff_0 = _mm_loadu_si128((const __m128i *)dqcoeff);
+ const __m128i _dqcoeff_1 = _mm_loadu_si128((const __m128i *)(dqcoeff + 8));
+ // Compute the diff
+ const __m128i diff_0 = _mm_sub_epi16(_dqcoeff_0, _coeff_0);
+ const __m128i diff_1 = _mm_sub_epi16(_dqcoeff_1, _coeff_1);
+ // Compute the error
+ const __m128i error_0 = _mm_madd_epi16(diff_0, diff_0);
+ const __m128i error_1 = _mm_madd_epi16(diff_1, diff_1);
+
+ const __m128i error_lo_0 = _mm_unpacklo_epi32(error_0, zero);
+ const __m128i error_lo_1 = _mm_unpacklo_epi32(error_1, zero);
+ const __m128i error_hi_0 = _mm_unpackhi_epi32(error_0, zero);
+ const __m128i error_hi_1 = _mm_unpackhi_epi32(error_1, zero);
+
+ // Accumulate
+ accum_0 = _mm_add_epi64(accum_0, error_lo_0);
+ accum_1 = _mm_add_epi64(accum_1, error_lo_1);
+ accum_0 = _mm_add_epi64(accum_0, error_hi_0);
+ accum_1 = _mm_add_epi64(accum_1, error_hi_1);
+
+ // Advance
+ coeff += 16;
+ dqcoeff += 16;
+ }
+
+ __m128i accum = _mm_add_epi64(accum_0, accum_1);
+ // Reduce sum the register
+ accum = reduce_sum_epi64(accum);
+
+ // Store the results.
+#if ARCH_X86_64
+ return _mm_cvtsi128_si64(accum);
+#else
+ int64_t result;
+ _mm_storel_epi64((__m128i *)&result, accum);
+ return result;
+#endif // ARCH_X86_64
+}
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index a5e1b7b..fde8a45 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -73,11 +73,9 @@
set_aom_config_var(CONFIG_BIG_ENDIAN 0 "Internal flag.")
set_aom_config_var(CONFIG_FRAME_PARALLEL_ENCODE 0
"Enable frame parallelism during encode.")
-if(CONFIG_FRAME_PARALLEL_ENCODE)
- set_aom_config_var(
- CONFIG_FRAME_PARALLEL_ENCODE_2 0
- "Enable frame parallelism during encode for frames in lower layer depths.")
-endif() # CONFIG_FRAME_PARALLEL_ENCODE
+set_aom_config_var(
+ CONFIG_FRAME_PARALLEL_ENCODE_2 0
+ "Enable frame parallelism during encode for frames in lower layer depths.")
set_aom_config_var(CONFIG_GCC 0 "Building with GCC (detect).")
set_aom_config_var(CONFIG_GCOV 0 "Enable gcov support.")
set_aom_config_var(CONFIG_GPROF 0 "Enable gprof support.")
diff --git a/examples/svc_encoder_rtc.c b/examples/svc_encoder_rtc.c
index 1516b58..e2da3ae 100644
--- a/examples/svc_encoder_rtc.c
+++ b/examples/svc_encoder_rtc.c
@@ -560,7 +560,7 @@
int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
aom_svc_ref_frame_config_t *ref_frame_config,
aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
- int spatial_layer_id, int is_key_frame, int ksvc_mode) {
+ int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
int i;
int enable_longterm_temporal_ref = 1;
int shift = (layering_mode == 8) ? 2 : 0;
@@ -646,8 +646,12 @@
// Keep golden fixed at slot 3.
ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
- // Cyclically refresh slots 4, 5, 6, 7, for lag altref.
- lag_index = 4 + (base_count % 4);
+ // Cyclically refresh slots 5, 6, 7, for lag altref.
+ lag_index = 5;
+ if (base_count > 0) {
+ lag_index = 5 + (base_count % 3);
+ if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
+ }
// Set the altref slot to lag_index.
ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
if (superframe_cnt % 4 == 0) {
@@ -681,6 +685,8 @@
// Every frame can reference GOLDEN AND ALTREF.
ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+ // Allow for compound prediction using LAST and ALTREF.
+ if (speed >= 7) ref_frame_comp_pred->use_comp_pred[2] = 1;
break;
case 4:
// 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
@@ -1307,7 +1313,7 @@
set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
&ref_frame_config, &ref_frame_comp_pred,
&use_svc_control, slx, is_key_frame,
- (app_input.layering_mode == 10));
+ (app_input.layering_mode == 10), app_input.speed);
aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
if (use_svc_control) {
aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
diff --git a/test/avg_test.cc b/test/avg_test.cc
index 4c6d9b5..92d5c59 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -9,6 +9,7 @@
*/
#include <stdlib.h>
+#include <ostream>
#include <tuple>
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
@@ -521,22 +522,39 @@
#endif
typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
-typedef ::testing::tuple<int, SatdFunc, SatdFunc> SatdTestParam;
-class SatdTest : public ::testing::Test,
- public ::testing::WithParamInterface<SatdTestParam> {
- protected:
- virtual void SetUp() {
- satd_size_ = GET_PARAM(0);
- satd_func_ref_ = GET_PARAM(1);
- satd_func_simd_ = GET_PARAM(2);
+typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
+template <typename SatdFuncType>
+struct SatdTestParam {
+ SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
+ : satd_size(s), func_ref(f1), func_simd(f2) {}
+ friend std::ostream &operator<<(std::ostream &os,
+ const SatdTestParam<SatdFuncType> ¶m) {
+ return os << "satd_size: " << param.satd_size;
+ }
+ int satd_size;
+ SatdFuncType func_ref;
+ SatdFuncType func_simd;
+};
+
+template <typename CoeffType, typename SatdFuncType>
+class SatdTestBase
+ : public ::testing::Test,
+ public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
+ protected:
+ explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
+ satd_size_ = func_param.satd_size;
+ satd_func_ref_ = func_param.func_ref;
+ satd_func_simd_ = func_param.func_simd;
+ }
+ virtual void SetUp() {
rnd_.Reset(ACMRandom::DeterministicSeed());
- src_ = reinterpret_cast<tran_low_t *>(
+ src_ = reinterpret_cast<CoeffType *>(
aom_memalign(32, sizeof(*src_) * satd_size_));
ASSERT_TRUE(src_ != NULL);
}
virtual void TearDown() { aom_free(src_); }
- void FillConstant(const tran_low_t val) {
+ void FillConstant(const CoeffType val) {
for (int i = 0; i < satd_size_; ++i) src_[i] = val;
}
void FillRandom() {
@@ -597,12 +615,17 @@
int satd_size_;
private:
- tran_low_t *src_;
- SatdFunc satd_func_ref_;
- SatdFunc satd_func_simd_;
+ CoeffType *src_;
+ SatdFuncType satd_func_ref_;
+ SatdFuncType satd_func_simd_;
ACMRandom rnd_;
};
+class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
+ public:
+ SatdTest() : SatdTestBase(GetParam()) {}
+};
+
TEST_P(SatdTest, MinValue) {
const int kMin = -32640;
const int expected = -kMin * satd_size_;
@@ -639,13 +662,21 @@
}
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
+INSTANTIATE_TEST_SUITE_P(
+ C, SatdTest,
+ ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
+ SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
+ SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
+ SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
+
#if HAVE_NEON
INSTANTIATE_TEST_SUITE_P(
NEON, SatdTest,
- ::testing::Values(make_tuple(16, &aom_satd_c, &aom_satd_neon),
- make_tuple(64, &aom_satd_c, &aom_satd_neon),
- make_tuple(256, &aom_satd_c, &aom_satd_neon),
- make_tuple(1024, &aom_satd_c, &aom_satd_neon)));
+ ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
+ SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
+ SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
+ SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+ &aom_satd_neon)));
INSTANTIATE_TEST_SUITE_P(
NEON, VectorVarTest,
::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
@@ -654,4 +685,104 @@
make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
#endif
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, SatdTest,
+ ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
+ SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
+ SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
+ SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+ &aom_satd_avx2)));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, SatdTest,
+ ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
+ SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
+ SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
+ SatdTestParam<SatdFunc>(1024, &aom_satd_c,
+ &aom_satd_sse2)));
+#endif
+
+class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
+ public:
+ SatdLpTest() : SatdTestBase(GetParam()) {}
+};
+
+TEST_P(SatdLpTest, MinValue) {
+ const int kMin = -32640;
+ const int expected = -kMin * satd_size_;
+ FillConstant(kMin);
+ Check(expected);
+}
+TEST_P(SatdLpTest, MaxValue) {
+ const int kMax = 32640;
+ const int expected = kMax * satd_size_;
+ FillConstant(kMax);
+ Check(expected);
+}
+TEST_P(SatdLpTest, Random) {
+ int expected;
+ switch (satd_size_) {
+ case 16: expected = 205298; break;
+ case 64: expected = 1113950; break;
+ case 256: expected = 4268415; break;
+ case 1024: expected = 16954082; break;
+ default:
+ FAIL() << "Invalid satd size (" << satd_size_
+ << ") valid: 16/64/256/1024";
+ }
+ FillRandom();
+ Check(expected);
+}
+TEST_P(SatdLpTest, Match) {
+ FillRandom();
+ RunComparison();
+}
+TEST_P(SatdLpTest, DISABLED_Speed) {
+ FillRandom();
+ RunSpeedTest();
+}
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
+
+// Add the following c test to avoid gtest uninitialized warning.
+INSTANTIATE_TEST_SUITE_P(
+ C, SatdLpTest,
+ ::testing::Values(
+ SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
+ SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
+ SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
+ SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+ NEON, SatdLpTest,
+ ::testing::Values(
+ SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
+ SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
+ SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
+ SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
+#endif
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, SatdLpTest,
+ ::testing::Values(
+ SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
+ SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
+ SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
+ SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, SatdLpTest,
+ ::testing::Values(
+ SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
+ SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
+ SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
+ SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
+#endif
+
} // namespace
diff --git a/test/error_block_test.cc b/test/error_block_test.cc
index 3ca4340..e4befd5 100644
--- a/test/error_block_test.cc
+++ b/test/error_block_test.cc
@@ -31,16 +31,20 @@
namespace {
const int kNumIterations = 1000;
-typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
- const tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz, int bps);
+using ErrorBlockFunc = int64_t (*)(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz, int bps);
-typedef int64_t (*ErrorBlockFunc8Bits)(const tran_low_t *coeff,
- const tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz);
+using ErrorBlockFunc8Bits = int64_t (*)(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz);
-typedef std::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>
- ErrorBlockParam;
+using ErrorBlockLpFunc = int64_t (*)(const int16_t *coeff,
+ const int16_t *dqcoeff,
+ intptr_t block_size);
+
+using ErrorBlockParam =
+ std::tuple<ErrorBlockFunc, ErrorBlockFunc, aom_bit_depth_t>;
template <ErrorBlockFunc8Bits fn>
int64_t BlockError8BitWrapper(const tran_low_t *coeff,
@@ -50,6 +54,15 @@
return fn(coeff, dqcoeff, block_size, ssz);
}
+template <ErrorBlockLpFunc fn>
+int64_t BlockErrorLpWrapper(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz, int bps) {
+ EXPECT_EQ(bps, 8);
+ *ssz = -1;
+ return fn(reinterpret_cast<const int16_t *>(coeff),
+ reinterpret_cast<const int16_t *>(dqcoeff), block_size);
+}
+
class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
public:
virtual ~ErrorBlockTest() {}
@@ -246,7 +259,9 @@
AOM_BITS_8),
#endif
make_tuple(&BlockError8BitWrapper<av1_block_error_sse2>,
- &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8)
+ &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+ make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_sse2>,
+ &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
};
INSTANTIATE_TEST_SUITE_P(SSE2, ErrorBlockTest,
@@ -264,7 +279,9 @@
AOM_BITS_8),
#endif
make_tuple(&BlockError8BitWrapper<av1_block_error_avx2>,
- &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8)
+ &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+ make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_avx2>,
+ &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
};
INSTANTIATE_TEST_SUITE_P(AVX2, ErrorBlockTest,
@@ -280,10 +297,14 @@
#endif // HAVE_MSA
#if (HAVE_NEON)
-INSTANTIATE_TEST_SUITE_P(
- NEON, ErrorBlockTest,
- ::testing::Values(make_tuple(&BlockError8BitWrapper<av1_block_error_neon>,
- &BlockError8BitWrapper<av1_block_error_c>,
- AOM_BITS_8)));
+const ErrorBlockParam kErrorBlockTestParamsNeon[] = {
+ make_tuple(&BlockError8BitWrapper<av1_block_error_neon>,
+ &BlockError8BitWrapper<av1_block_error_c>, AOM_BITS_8),
+ make_tuple(&BlockErrorLpWrapper<av1_block_error_lp_neon>,
+ &BlockErrorLpWrapper<av1_block_error_lp_c>, AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(NEON, ErrorBlockTest,
+ ::testing::ValuesIn(kErrorBlockTestParamsNeon));
#endif // HAVE_NEON
} // namespace
diff --git a/test/hadamard_test.cc b/test/hadamard_test.cc
index 0141125..8813f33 100644
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -23,16 +23,20 @@
using libaom_test::ACMRandom;
-typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
- tran_low_t *b);
+using HadamardFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
+ tran_low_t *b);
+// Low precision version of Hadamard Transform
+using HadamardLPFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
+ int16_t *b);
-void HadamardLoop(const tran_low_t *a, tran_low_t *out) {
- tran_low_t b[8];
+template <typename OutputType>
+void HadamardLoop(const OutputType *a, OutputType *out) {
+ OutputType b[8];
for (int i = 0; i < 8; i += 2) {
b[i + 0] = a[i * 8] + a[(i + 1) * 8];
b[i + 1] = a[i * 8] - a[(i + 1) * 8];
}
- tran_low_t c[8];
+ OutputType c[8];
for (int i = 0; i < 8; i += 4) {
c[i + 0] = b[i + 0] + b[i + 2];
c[i + 1] = b[i + 1] + b[i + 3];
@@ -49,19 +53,21 @@
out[5] = c[3] - c[7];
}
-void ReferenceHadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
- tran_low_t input[64];
- tran_low_t buf[64];
+template <typename OutputType>
+void ReferenceHadamard8x8(const int16_t *a, int a_stride, OutputType *b) {
+ OutputType input[64];
+ OutputType buf[64];
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
- input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
+ input[i * 8 + j] = static_cast<OutputType>(a[i * a_stride + j]);
}
}
for (int i = 0; i < 8; ++i) HadamardLoop(input + i, buf + i * 8);
for (int i = 0; i < 8; ++i) HadamardLoop(buf + i, b + i * 8);
}
-void ReferenceHadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
+template <typename OutputType>
+void ReferenceHadamard16x16(const int16_t *a, int a_stride, OutputType *b) {
/* The source is a 16x16 block. The destination is rearranged to 8x32.
* Input is 9 bit. */
ReferenceHadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
@@ -72,16 +78,16 @@
/* Overlay the 8x8 blocks and combine. */
for (int i = 0; i < 64; ++i) {
/* 8x8 steps the range up to 15 bits. */
- const tran_low_t a0 = b[0];
- const tran_low_t a1 = b[64];
- const tran_low_t a2 = b[128];
- const tran_low_t a3 = b[192];
+ const OutputType a0 = b[0];
+ const OutputType a1 = b[64];
+ const OutputType a2 = b[128];
+ const OutputType a3 = b[192];
/* Prevent the result from escaping int16_t. */
- const tran_low_t b0 = (a0 + a1) >> 1;
- const tran_low_t b1 = (a0 - a1) >> 1;
- const tran_low_t b2 = (a2 + a3) >> 1;
- const tran_low_t b3 = (a2 - a3) >> 1;
+ const OutputType b0 = (a0 + a1) >> 1;
+ const OutputType b1 = (a0 - a1) >> 1;
+ const OutputType b2 = (a2 + a3) >> 1;
+ const OutputType b3 = (a2 - a3) >> 1;
/* Store a 16 bit value. */
b[0] = b0 + b2;
@@ -93,22 +99,23 @@
}
}
-void ReferenceHadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
+template <typename OutputType>
+void ReferenceHadamard32x32(const int16_t *a, int a_stride, OutputType *b) {
ReferenceHadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
ReferenceHadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
ReferenceHadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
ReferenceHadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
for (int i = 0; i < 256; ++i) {
- const tran_low_t a0 = b[0];
- const tran_low_t a1 = b[256];
- const tran_low_t a2 = b[512];
- const tran_low_t a3 = b[768];
+ const OutputType a0 = b[0];
+ const OutputType a1 = b[256];
+ const OutputType a2 = b[512];
+ const OutputType a3 = b[768];
- const tran_low_t b0 = (a0 + a1) >> 2;
- const tran_low_t b1 = (a0 - a1) >> 2;
- const tran_low_t b2 = (a2 + a3) >> 2;
- const tran_low_t b3 = (a2 - a3) >> 2;
+ const OutputType b0 = (a0 + a1) >> 2;
+ const OutputType b1 = (a0 - a1) >> 2;
+ const OutputType b2 = (a2 + a3) >> 2;
+ const OutputType b3 = (a2 - a3) >> 2;
b[0] = b0 + b2;
b[256] = b1 + b3;
@@ -119,45 +126,57 @@
}
}
-struct HadamardFuncWithSize {
- HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
- HadamardFunc func;
+template <typename OutputType>
+void ReferenceHadamard(const int16_t *a, int a_stride, OutputType *b, int bwh) {
+ if (bwh == 32)
+ ReferenceHadamard32x32(a, a_stride, b);
+ else if (bwh == 16)
+ ReferenceHadamard16x16(a, a_stride, b);
+ else if (bwh == 8) {
+ ReferenceHadamard8x8(a, a_stride, b);
+ } else {
+ GTEST_FAIL() << "Invalid Hadamard transform size " << bwh << std::endl;
+ }
+}
+
+template <typename HadamardFuncType>
+struct FuncWithSize {
+ FuncWithSize(HadamardFuncType f, int s) : func(f), block_size(s) {}
+ HadamardFuncType func;
int block_size;
};
-std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
+using HadamardFuncWithSize = FuncWithSize<HadamardFunc>;
+using HadamardLPFuncWithSize = FuncWithSize<HadamardLPFunc>;
+
+template <typename HadamardFuncType>
+std::ostream &operator<<(std::ostream &os,
+ const FuncWithSize<HadamardFuncType> &hfs) {
return os << "block size: " << hfs.block_size;
}
-class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
+template <typename OutputType, typename HadamardFuncType>
+class HadamardTestBase
+ : public ::testing::TestWithParam<FuncWithSize<HadamardFuncType>> {
public:
- virtual void SetUp() {
- h_func_ = GetParam().func;
- bwh_ = GetParam().block_size;
+ explicit HadamardTestBase(const FuncWithSize<HadamardFuncType> &func_param) {
+ h_func_ = func_param.func;
+ bwh_ = func_param.block_size;
block_size_ = bwh_ * bwh_;
- rnd_.Reset(ACMRandom::DeterministicSeed());
}
+ virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
+
virtual int16_t Rand() = 0;
- void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
- int bwh) {
- if (bwh == 32)
- ReferenceHadamard32x32(a, a_stride, b);
- else if (bwh == 16)
- ReferenceHadamard16x16(a, a_stride, b);
- else
- ReferenceHadamard8x8(a, a_stride, b);
- }
-
void CompareReferenceRandom() {
const int kMaxBlockSize = 32 * 32;
DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
- DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
+ DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
memset(a, 0, sizeof(a));
memset(b, 0, sizeof(b));
- tran_low_t b_ref[kMaxBlockSize];
+ OutputType b_ref[kMaxBlockSize];
memset(b_ref, 0, sizeof(b_ref));
for (int i = 0; i < block_size_; ++i) a[i] = Rand();
@@ -174,11 +193,11 @@
void VaryStride() {
const int kMaxBlockSize = 32 * 32;
DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
- DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
+ DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
memset(a, 0, sizeof(a));
for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
- tran_low_t b_ref[kMaxBlockSize];
+ OutputType b_ref[kMaxBlockSize];
for (int i = 8; i < 64; i += 8) {
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
@@ -196,7 +215,7 @@
void SpeedTest(int times) {
const int kMaxBlockSize = 32 * 32;
DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
- DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
+ DECLARE_ALIGNED(16, OutputType, output[kMaxBlockSize]);
memset(input, 1, sizeof(input));
memset(output, 0, sizeof(output));
@@ -217,11 +236,12 @@
private:
int bwh_;
int block_size_;
- HadamardFunc h_func_;
+ HadamardFuncType h_func_;
};
-class HadamardLowbdTest : public HadamardTestBase {
+class HadamardLowbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> {
public:
+ HadamardLowbdTest() : HadamardTestBase(GetParam()) {}
virtual int16_t Rand() { return rnd_.Rand9Signed(); }
};
@@ -229,6 +249,8 @@
TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
+TEST_P(HadamardLowbdTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
+
INSTANTIATE_TEST_SUITE_P(
C, HadamardLowbdTest,
::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_c, 8),
@@ -257,4 +279,44 @@
HadamardFuncWithSize(&aom_hadamard_16x16_neon, 16)));
#endif // HAVE_NEON
+// Tests for low precision
+class HadamardLowbdLPTest : public HadamardTestBase<int16_t, HadamardLPFunc> {
+ public:
+ HadamardLowbdLPTest() : HadamardTestBase(GetParam()) {}
+ virtual int16_t Rand() { return rnd_.Rand9Signed(); }
+};
+
+TEST_P(HadamardLowbdLPTest, CompareReferenceRandom) {
+ CompareReferenceRandom();
+}
+
+TEST_P(HadamardLowbdLPTest, VaryStride) { VaryStride(); }
+
+TEST_P(HadamardLowbdLPTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
+
+INSTANTIATE_TEST_SUITE_P(
+ C, HadamardLowbdLPTest,
+ ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c, 8),
+ HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c, 16)));
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_SUITE_P(
+ SSE2, HadamardLowbdLPTest,
+ ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2, 8),
+ HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2, 16)));
+#endif // HAVE_SSE2
+
+#if HAVE_AVX2
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, HadamardLowbdLPTest,
+ ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_avx2, 16)));
+#endif // HAVE_AVX2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_SUITE_P(
+ NEON, HadamardLowbdLPTest,
+ ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon, 8),
+ HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon, 16)));
+#endif // HAVE_NEON
+
} // namespace
diff --git a/test/quantize_lp_func_test.cc b/test/quantize_lp_func_test.cc
index 850ec94..f398623 100644
--- a/test/quantize_lp_func_test.cc
+++ b/test/quantize_lp_func_test.cc
@@ -334,6 +334,20 @@
::testing::ValuesIn(kQParamArrayAVX2));
#endif
+#if HAVE_SSE2
+const QuantizeParam kQParamArraySSE2[] = {
+ make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+ static_cast<TX_SIZE>(TX_16X16), AOM_BITS_8),
+ make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+ static_cast<TX_SIZE>(TX_8X8), AOM_BITS_8),
+ make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_sse2,
+ static_cast<TX_SIZE>(TX_4X4), AOM_BITS_8)
+};
+
+INSTANTIATE_TEST_SUITE_P(SSE2, FullPrecisionQuantizeLpTest,
+ ::testing::ValuesIn(kQParamArraySSE2));
+#endif
+
#if HAVE_NEON
const QuantizeParam kQParamArrayNEON[] = {
make_tuple(&av1_quantize_lp_c, &av1_quantize_lp_neon,
diff --git a/test/ratectrl_test.cc b/test/ratectrl_test.cc
index 67adf27..4b462e3 100644
--- a/test/ratectrl_test.cc
+++ b/test/ratectrl_test.cc
@@ -41,7 +41,7 @@
// TODO(angiebird): Move this test to tpl_mode_test.cc
TEST(RatectrlTest, QModeComputeGOPQIndicesTest) {
const int base_q_index = 80;
- const double arf_qstep_ratio = 0.5;
+ double qstep_ratio_list[5] = { 0.5, 1, 1, 1, 0.5 };
const aom_bit_depth_t bit_depth = AOM_BITS_8;
const int gf_frame_index = 0;
@@ -58,10 +58,10 @@
}
const int arf_q = av1_get_q_index_from_qstep_ratio(
- base_q_index, arf_qstep_ratio, bit_depth);
+ base_q_index, qstep_ratio_list[0], bit_depth);
av1_q_mode_compute_gop_q_indices(gf_frame_index, base_q_index,
- arf_qstep_ratio, bit_depth, &gf_group,
+ qstep_ratio_list, bit_depth, &gf_group,
gf_group.q_val);
for (int i = 0; i < gf_group.size; i++) {
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
index b0b8ab2..5e360f2 100644
--- a/test/rt_end_to_end_test.cc
+++ b/test/rt_end_to_end_test.cc
@@ -36,19 +36,22 @@
{ 6, { { 0, 35.3 }, { 3, 36.2 } } },
{ 7, { { 0, 34.9 }, { 3, 35.8 } } },
{ 8, { { 0, 35.0 }, { 3, 35.8 } } },
- { 9, { { 0, 34.9 }, { 3, 35.5 } } } } },
+ { 9, { { 0, 34.9 }, { 3, 35.5 } } },
+ { 10, { { 0, 34.7 }, { 3, 35.3 } } } } },
{ "paris_352_288_30.y4m",
{ { 5, { { 0, 36.2 }, { 3, 36.7 } } },
{ 6, { { 0, 36.1 }, { 3, 36.5 } } },
{ 7, { { 0, 35.5 }, { 3, 36.0 } } },
{ 8, { { 0, 36.0 }, { 3, 36.5 } } },
- { 9, { { 0, 35.5 }, { 3, 36.0 } } } } },
+ { 9, { { 0, 35.5 }, { 3, 36.0 } } },
+ { 10, { { 0, 35.3 }, { 3, 35.9 } } } } },
{ "niklas_1280_720_30.y4m",
{ { 5, { { 0, 34.4 }, { 3, 34.30 } } },
{ 6, { { 0, 34.2 }, { 3, 34.2 } } },
{ 7, { { 0, 33.6 }, { 3, 33.6 } } },
{ 8, { { 0, 33.48 }, { 3, 33.48 } } },
- { 9, { { 0, 33.4 }, { 3, 33.4 } } } } } };
+ { 9, { { 0, 33.4 }, { 3, 33.4 } } },
+ { 10, { { 0, 33.2 }, { 3, 33.2 } } } } } };
typedef struct {
const char *filename;
@@ -176,13 +179,13 @@
TEST_P(RTEndToEndTestThreaded, EndtoEndPSNRTest) { DoTest(); }
AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTest, ::testing::ValuesIn(kTestVectors),
- ::testing::Range(5, 10),
+ ::testing::Range(5, 11),
::testing::Values<unsigned int>(0, 3),
::testing::Values(1), ::testing::Values(1));
AV1_INSTANTIATE_TEST_SUITE(RTEndToEndTestThreaded,
::testing::ValuesIn(kTestVectors),
- ::testing::Range(5, 10),
+ ::testing::Range(5, 11),
::testing::Values<unsigned int>(0, 3),
::testing::Range(2, 5), ::testing::Range(2, 5));
} // namespace
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 19e7de8..243fcc1 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -474,7 +474,7 @@
for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
+ ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
// Top temporal layers are non_reference, so exlcude them from
@@ -996,7 +996,7 @@
for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
+ ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
// Test that no mismatches have been found.
@@ -1041,7 +1041,7 @@
for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
+ ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
// Test that no mismatches have been found.
@@ -1086,7 +1086,7 @@
for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
+ ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
// Test that no mismatches have been found.
@@ -1132,7 +1132,7 @@
for (int i = 0; i < number_temporal_layers_ * number_spatial_layers_; i++) {
ASSERT_GE(effective_datarate_tl[i], target_layer_bitrate_[i] * 0.80)
<< " The datarate for the file is lower than target by too much!";
- ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.30)
+ ASSERT_LE(effective_datarate_tl[i], target_layer_bitrate_[i] * 1.35)
<< " The datarate for the file is greater than target by too much!";
}
// Test that no mismatches have been found.
diff --git a/test/tpl_model_test.cc b/test/tpl_model_test.cc
index 3094c6c..da5a821 100644
--- a/test/tpl_model_test.cc
+++ b/test/tpl_model_test.cc
@@ -235,7 +235,7 @@
* Helper method to brute-force search for the closest q_index
* that achieves the specified bit budget.
*/
-int find_gop_q_iterative(double bit_budget, double arf_qstep_ratio,
+int find_gop_q_iterative(double bit_budget, const double *qstep_ratio_list,
GF_GROUP gf_group, const int *stats_valid_list,
TplTxfmStats *stats_list, int gf_frame_index,
aom_bit_depth_t bit_depth) {
@@ -243,7 +243,7 @@
// Use the result to test against the binary search result.
// Initial estimate when q = 255
- av1_q_mode_compute_gop_q_indices(gf_frame_index, 255, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, 255, qstep_ratio_list,
bit_depth, &gf_group, gf_group.q_val);
double curr_estimate = av1_estimate_gop_bitrate(
gf_group.q_val, gf_group.size, stats_list, stats_valid_list, NULL);
@@ -252,7 +252,7 @@
// Start at q = 254 because we already have an estimate for q = 255.
for (int q = 254; q >= 0; q--) {
- av1_q_mode_compute_gop_q_indices(gf_frame_index, q, arf_qstep_ratio,
+ av1_q_mode_compute_gop_q_indices(gf_frame_index, q, qstep_ratio_list,
bit_depth, &gf_group, gf_group.q_val);
curr_estimate = av1_estimate_gop_bitrate(
gf_group.q_val, gf_group.size, stats_list, stats_valid_list, NULL);
@@ -274,10 +274,14 @@
5, 6, 6, 1, 5, 1, 5, 6, 1, 5, 1, 4 };
int stats_valid_list[25] = { 0 };
const int gf_frame_index = 0;
- const double arf_qstep_ratio = 2;
const aom_bit_depth_t bit_depth = AOM_BITS_8;
const double scale_factor = 1.0;
+ double qstep_ratio_list[25];
+ for (int i = 0; i < 25; i++) {
+ qstep_ratio_list[i] = 1;
+ }
+
for (int i = 0; i < gf_group.size; i++) {
stats_valid_list[i] = 1;
gf_group.update_type[i] = gf_group_update_types[i];
@@ -297,9 +301,9 @@
// Binary search method to find the optimal q.
const int result = av1_q_mode_estimate_base_q(
&gf_group, stats_list, stats_valid_list, bit_budget, gf_frame_index,
- arf_qstep_ratio, bit_depth, scale_factor, q_index_list, NULL);
+ bit_depth, scale_factor, qstep_ratio_list, q_index_list, NULL);
const int test_result = find_gop_q_iterative(
- bit_budget, arf_qstep_ratio, gf_group, stats_valid_list, stats_list,
+ bit_budget, qstep_ratio_list, gf_group, stats_valid_list, stats_list,
gf_frame_index, bit_depth);
if (bit_budget == 0) {
diff --git a/tools/gop_bitrate/python/bitrate_accuracy.py b/tools/gop_bitrate/python/bitrate_accuracy.py
index 565794f..2a5da6a 100644
--- a/tools/gop_bitrate/python/bitrate_accuracy.py
+++ b/tools/gop_bitrate/python/bitrate_accuracy.py
@@ -1,40 +1,185 @@
import numpy as np
+# Model A only.
# Uses least squares regression to find the solution
# when there is one unknown variable.
-def print_lstsq_solution(A, B):
+def lstsq_solution(A, B):
A_inv = np.linalg.pinv(A)
x = np.matmul(A_inv, B)
- print("least squares solution:", x[0][0])
+ return x[0][0]
+# Model B only.
# Uses the pseudoinverse matrix to find the solution
# when there are two unknown variables.
-def print_pinv_solution(A, mv, B):
+def pinv_solution(A, mv, B):
new_A = np.concatenate((A, mv), axis=1)
new_A_inv = np.linalg.pinv(new_A)
new_x = np.matmul(new_A_inv, B)
print("pinv solution:", new_x[0][0], new_x[1][0])
+ return (new_x[0][0], new_x[1][0])
+
+# Model A only.
+# Finds the coefficient to multiply A by to minimize
+# the percentage error between A and B.
+def minimize_percentage_error_model_a(A, B):
+ R = np.divide(A, B)
+ num = 0
+ den = 0
+ best_x = 0
+ best_error = 100
+ for r_i in R:
+ num += r_i
+ den += r_i**2
+ if den == 0:
+ return 0
+ return (num/den)[0]
+
+# Model B only.
+# Finds the coefficients to multiply to the frame bitrate
+# and the motion vector bitrate to minimize the percent error.
+def minimize_percentage_error_model_b(r_e, r_m, r_f):
+ r_ef = np.divide(r_e, r_f)
+ r_mf = np.divide(r_m, r_f)
+ sum_ef = np.sum(r_ef)
+ sum_ef_sq = np.sum(np.square(r_ef))
+ sum_mf = np.sum(r_mf)
+ sum_mf_sq = np.sum(np.square(r_mf))
+ sum_ef_mf = np.sum(np.multiply(r_ef, r_mf))
+ # Divides x by y. If y is zero, returns 0.
+ divide = lambda x, y : 0 if y == 0 else x / y
+ # Set up and solve the matrix equation
+ A = np.array([[1, divide(sum_ef_mf, sum_ef_sq)],[divide(sum_ef_mf, sum_mf_sq), 1]])
+ B = np.array([divide(sum_ef, sum_ef_sq), divide(sum_mf, sum_mf_sq)])
+ A_inv = np.linalg.pinv(A)
+ x = np.matmul(A_inv, B)
+ return x
+
+# Model A only.
+# Calculates the least squares error between A and B
+# using coefficients in X.
+def average_lstsq_error(A, B, x):
+ error = 0
+ n = 0
+ for i, a in enumerate(A):
+ a = a[0]
+ b = B[i][0]
+ if b == 0:
+ continue
+ n += 1
+ error += (b - x*a)**2
+ if n == 0:
+ return None
+ error /= n
+ return error
+
+# Model A only.
+# Calculates the average percentage error between A and B.
+def average_percent_error_model_a(A, B, x):
+ error = 0
+ n = 0
+ for i, a in enumerate(A):
+ a = a[0]
+ b = B[i][0]
+ if b == 0:
+ continue
+ n += 1
+ error_i = (abs(x*a-b)/b)*100
+ error += error_i
+ error /= n
+ return error
+
+# Model B only.
+# Calculates the average percentage error between A and B.
+def average_percent_error_model_b(A, M, B, x):
+ error = 0
+ for i, a in enumerate(A):
+ a = a[0]
+ mv = M[i]
+ b = B[i][0]
+ if b == 0:
+ continue
+ estimate = x[0]*a
+ estimate += x[1]*mv
+ error += abs(estimate - b) / b
+ error *= 100
+ error /= A.shape[0]
+ return error
+
+def average_squared_error_model_a(A, B, x):
+ error = 0
+ n = 0
+ for i, a in enumerate(A):
+ a = a[0]
+ b = B[i][0]
+ if b == 0:
+ continue
+ n += 1
+ error_i = (1 - x*(a/b))**2
+ error += error_i
+ error /= n
+ error = error**0.5
+ return error * 100
+
+def average_squared_error_model_b(A, M, B, x):
+ error = 0
+ n = 0
+ for i, a in enumerate(A):
+ a = a[0]
+ b = B[i][0]
+ mv = M[i]
+ if b == 0:
+ continue
+ n += 1
+ error_i = 1 - ((x[0]*a + x[1]*mv)/b)
+ error_i = error_i**2
+ error += error_i
+ error /= n
+ error = error**0.5
+ return error * 100
# Traverses the data and prints out one value for
# each update type.
def print_solutions(file_path):
data = np.genfromtxt(file_path, delimiter="\t")
-
prev_update = 0
split_list_indices = list()
for i, val in enumerate(data):
if prev_update != val[3]:
split_list_indices.append(i)
prev_update = val[3]
-
split = np.split(data, split_list_indices)
-
for array in split:
A, mv, B, update = np.hsplit(array, 4)
+ z = np.where(B == 0)[0]
+ r_e = np.delete(A, z, axis=0)
+ r_m = np.delete(mv, z, axis=0)
+ r_f = np.delete(B, z, axis=0)
+ A = r_e
+ mv = r_m
+ B = r_f
+ all_zeros = not A.any()
+ if all_zeros:
+ continue
print("update type:", update[0][0])
- print_lstsq_solution(A, B)
- print_pinv_solution(A, mv, B)
+ x_ls = lstsq_solution(A, B)
+ x_a = minimize_percentage_error_model_a(A, B)
+ x_b = minimize_percentage_error_model_b(A, mv, B)
+ percent_error_a = average_percent_error_model_a(A, B, x_a)
+ percent_error_b = average_percent_error_model_b(A, mv, B, x_b)[0]
+ baseline_percent_error_a = average_percent_error_model_a(A, B, 1)
+ baseline_percent_error_b = average_percent_error_model_b(A, mv, B, [1, 1])[0]
+
+ squared_error_a = average_squared_error_model_a(A, B, x_a)
+ squared_error_b = average_squared_error_model_b(A, mv, B, x_b)[0]
+ baseline_squared_error_a = average_squared_error_model_a(A, B, 1)
+ baseline_squared_error_b = average_squared_error_model_b(A, mv, B, [1, 1])[0]
+
+ print("model,\tframe_coeff,\tmv_coeff,\terror,\tbaseline_error")
+ print("Model A %_error,\t" + str(x_a) + ",\t" + str(0) + ",\t" + str(percent_error_a) + ",\t" + str(baseline_percent_error_a))
+ print("Model A sq_error,\t" + str(x_a) + ",\t" + str(0) + ",\t" + str(squared_error_a) + ",\t" + str(baseline_squared_error_a))
+ print("Model B %_error,\t" + str(x_b[0]) + ",\t" + str(x_b[1]) + ",\t" + str(percent_error_b) + ",\t" + str(baseline_percent_error_b))
+ print("Model B sq_error,\t" + str(x_b[0]) + ",\t" + str(x_b[1]) + ",\t" + str(squared_error_b) + ",\t" + str(baseline_squared_error_b))
print()
if __name__ == "__main__":
- print_solutions("data/lowres_64f_target150_data.txt")
+ print_solutions("data2/all_lowres_target_lt600_data.txt")
diff --git a/tools/gop_bitrate/python/bitrate_accuracy_percentage_error.py b/tools/gop_bitrate/python/bitrate_accuracy_percentage_error.py
deleted file mode 100644
index fcde9797..0000000
--- a/tools/gop_bitrate/python/bitrate_accuracy_percentage_error.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import numpy as np
-
-# Finds the coefficient to multiply A by to minimize
-# the percentage error between A and B.
-def minimize_percentage_error_model_a(A, B):
- z = np.where(B == 0)[0]
- A = np.delete(A, z, axis=0)
- B = np.delete(B, z, axis=0)
- z = np.where(A == 0)[0]
- A = np.delete(A, z, axis=0)
- B = np.delete(B, z, axis=0)
-
- R = np.divide(A, B)
- num = 0
- den = 0
- for r_i in R:
- num += r_i
- den += r_i**2
- if den == 0:
- x = 0
- else:
- x = (num / den)[0]
- return x
-
-def minimize_percentage_error_model_b(r_e, r_m, r_f):
- z = np.where(r_f == 0)[0]
- r_e = np.delete(r_e, z, axis=0)
- r_m = np.delete(r_m, z, axis=0)
- r_f = np.delete(r_f, z, axis=0)
-
- r_ef = np.divide(r_e, r_f)
- r_mf = np.divide(r_m, r_f)
- sum_ef = np.sum(r_ef)
- sum_ef_sq = np.sum(np.square(r_ef))
- sum_mf = np.sum(r_mf)
- sum_mf_sq = np.sum(np.square(r_mf))
- sum_ef_mf = np.sum(np.multiply(r_ef, r_mf))
-
- # Divides x by y. If y is zero, returns 0.
- divide = lambda x, y : 0 if y == 0 else x / y
-
- # Set up and solve the matrix equation
- A = np.array([[1, divide(sum_ef_mf, sum_ef_sq)],[divide(sum_ef_mf, sum_mf_sq), 1]])
- B = np.array([divide(sum_ef, sum_ef_sq), divide(sum_mf, sum_mf_sq)])
- A_inv = np.linalg.pinv(A)
- x = np.matmul(A_inv, B)
- return x
-
-# Calculates the average percentage error between A and B.
-def average_error_model_a(A, B, x):
- error = 0
- for i, a in enumerate(A):
- a = a[0]
- b = B[i][0]
- if b == 0:
- continue
- error += abs(x*a - b) / b
- error *= 100
- error /= A.shape[0]
- return error
-
-def average_error_model_b(A, M, B, x):
- error = 0
- for i, a in enumerate(A):
- a = a[0]
- mv = M[i]
- b = B[i][0]
- if b == 0:
- continue
- estimate = x[0]*a
- estimate += x[1]*mv
- error += abs(estimate - b) / b
- error *= 100
- error /= A.shape[0]
- return error
-
-# Traverses the data and prints out one value for
-# each update type.
-def print_solutions(file_path):
- data = np.genfromtxt(file_path, delimiter="\t")
-
- prev_update = 0
- split_list_indices = list()
- for i, val in enumerate(data):
- if prev_update != val[3]:
- split_list_indices.append(i)
- prev_update = val[3]
-
- split = np.split(data, split_list_indices)
-
- for array in split:
- A, mv, B, update = np.hsplit(array, 4)
- print("update type:", update[0][0])
- xa = minimize_percentage_error_model_a(A, B)
- xb = minimize_percentage_error_model_b(A, mv, B)
- print("Model A coefficients:", xa, " | Model B coefficients:", xb)
- error_a = average_error_model_a(A, B, xa)
- error_b = average_error_model_b(A, mv, B, xb)
- baseline_error_a = average_error_model_a(A, B, 1)
- baseline_error_b = average_error_model_b(A, mv, B, [1, 1])
- print("error a:", error_a, " | error b:", error_b)
- print("baseline error a:", baseline_error_a, "baseline error b:", baseline_error_b)
- print()
-
-if __name__ == "__main__":
- print_solutions("data2/lowres_17f_target150_data.txt")