Initialize quantization matrix only when it is used
The computation of QM initialization is significant for decoding time. We initialize it only when it is actually used.
On all intra coding, A3 test set, before the change, decoder profile shows about 5-6% decoding time is spent on QM initialization.
This change avoids the unnecessary computation for most use cases.
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 831978d..973849a 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -2372,6 +2372,25 @@
AV1_COMP *cpi = ctx->cpi;
SequenceHeader *seq_params = &cpi->common.seq_params;
+ if (num_planes != av1_num_planes(&cpi->common)) {
+ return AOM_CODEC_INVALID_PARAM;
+ }
+ if (!cpi->common.quant_params.qmatrix_allocated) {
+ seq_params->quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
+ seq_params->quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
+ seq_params->quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
+ cpi->common.quant_params.qmatrix_allocated = true;
+ }
+ if (!cpi->common.quant_params.qmatrix_initialized) {
+ av1_init_qmatrix(seq_params->quantizer_matrix_8x8,
+ seq_params->quantizer_matrix_8x4,
+ seq_params->quantizer_matrix_4x8, num_planes);
+ qm_val_t ***fund_mat[3] = { cpi->common.seq_params.quantizer_matrix_8x8,
+ cpi->common.seq_params.quantizer_matrix_8x4,
+ cpi->common.seq_params.quantizer_matrix_4x8 };
+ av1_qm_init(&cpi->common.quant_params, num_planes, fund_mat);
+ cpi->common.quant_params.qmatrix_initialized = true;
+ }
// Copy user-defined QMs for level.
for (int c = 0; c < num_planes; c++) {
if (!user_defined_qm->qm_8x8[c]) {
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index 2577cf9..893ae82 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -1402,6 +1402,19 @@
* matrix tables.
*/
+ /*!
+ * Flag indicating whether quantization matrices are allocated.
+ */
+ bool qmatrix_allocated;
+
+ /*!
+ * Flag indicating whether quantization matrices are initialized.
+ * To avoid unnecessary computation, for the decoder we want to initialize
+ * quantization matrices only when they are used.
+ * Note that when sequence header OBUs change, we should reset the parameter.
+ */
+ bool qmatrix_initialized;
+
/**@{*/
/*!
* Number of QM levels available for use by the segments in the frame.
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 3aa866c..4d16673 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3778,13 +3778,34 @@
}
}
-static AOM_INLINE void setup_qm_params(CommonQuantParams *quant_params,
+static AOM_INLINE void setup_qm_params(SequenceHeader *seq_params,
+ CommonQuantParams *quant_params,
#if CONFIG_F311_QM_PARAMS
bool segmentation_enabled,
#endif // CONFIG_F311_QM_PARAMS
- int num_planes, bool separate_uv_delta_q,
+ int num_planes,
struct aom_read_bit_buffer *rb) {
quant_params->using_qmatrix = aom_rb_read_bit(rb);
+ if (quant_params->using_qmatrix) {
+ if (!quant_params->qmatrix_allocated) {
+ seq_params->quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
+ seq_params->quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
+ seq_params->quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
+ quant_params->qmatrix_allocated = true;
+ }
+ if (!quant_params->qmatrix_initialized) {
+ if (!seq_params->user_defined_qmatrix) {
+ av1_init_qmatrix(seq_params->quantizer_matrix_8x8,
+ seq_params->quantizer_matrix_8x4,
+ seq_params->quantizer_matrix_4x8, num_planes);
+ }
+ qm_val_t ***fund_mat[3] = { seq_params->quantizer_matrix_8x8,
+ seq_params->quantizer_matrix_8x4,
+ seq_params->quantizer_matrix_4x8 };
+ av1_qm_init_dequant_only(quant_params, num_planes, fund_mat);
+ quant_params->qmatrix_initialized = true;
+ }
+ }
#if CONFIG_QM_DEBUG
printf("[DEC-FRM] using_qmatrix: %d\n", quant_params->using_qmatrix);
#endif
@@ -3814,7 +3835,7 @@
quant_params->qm_v[i] = quant_params->qm_y[i];
} else {
quant_params->qm_u[i] = aom_rb_read_literal(rb, QM_LEVEL_BITS);
- if (!separate_uv_delta_q) {
+ if (!seq_params->separate_uv_delta_q) {
quant_params->qm_v[i] = quant_params->qm_u[i];
} else {
quant_params->qm_v[i] = aom_rb_read_literal(rb, QM_LEVEL_BITS);
@@ -6775,6 +6796,7 @@
void av1_read_sequence_header_beyond_av1(
struct aom_read_bit_buffer *rb, SequenceHeader *seq_params,
+ CommonQuantParams *quant_params,
struct aom_internal_error_info *error_info) {
// printf("print sps\n");
seq_params->enable_refmvbank = aom_rb_read_bit(rb);
@@ -6972,16 +6994,22 @@
#if CONFIG_EXT_SEG
seq_params->enable_ext_seg = aom_rb_read_bit(rb);
#endif // CONFIG_EXT_SEG
- int num_planes = seq_params->monochrome ? 1 : MAX_MB_PLANE;
- av1_init_qmatrix(seq_params->quantizer_matrix_8x8,
- seq_params->quantizer_matrix_8x4,
- seq_params->quantizer_matrix_4x8, num_planes);
seq_params->user_defined_qmatrix = aom_rb_read_bit(rb);
#if CONFIG_QM_DEBUG
printf("[DEC-SEQ] user_defined_qmatrix=%d\n",
seq_params->user_defined_qmatrix);
#endif
if (seq_params->user_defined_qmatrix) {
+ int num_planes = seq_params->monochrome ? 1 : MAX_MB_PLANE;
+ if (!quant_params->qmatrix_allocated) {
+ seq_params->quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
+ seq_params->quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
+ seq_params->quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
+ quant_params->qmatrix_allocated = true;
+ }
+ av1_init_qmatrix(seq_params->quantizer_matrix_8x8,
+ seq_params->quantizer_matrix_8x4,
+ seq_params->quantizer_matrix_4x8, num_planes);
decode_user_defined_qm(seq_params, rb, num_planes, error_info);
} else {
for (uint16_t i = 0; i < NUM_CUSTOM_QMS; i++) {
@@ -8934,8 +8962,7 @@
CommonQuantParams *const quant_params = &cm->quant_params;
setup_quantization(quant_params, av1_num_planes(cm), &cm->seq_params, rb);
#if !CONFIG_F311_QM_PARAMS
- setup_qm_params(quant_params, av1_num_planes(cm),
- cm->seq_params.separate_uv_delta_q, rb);
+ setup_qm_params(&cm->seq_params, quant_params, av1_num_planes(cm), rb);
#endif // !CONFIG_F311_QM_PARAMS
cm->cur_frame->base_qindex = quant_params->base_qindex;
cm->cur_frame->u_ac_delta_q = quant_params->u_ac_delta_q;
@@ -8960,8 +8987,8 @@
setup_segmentation(cm, rb);
#if CONFIG_F311_QM_PARAMS
- setup_qm_params(quant_params, cm->seg.enabled, av1_num_planes(cm),
- cm->seq_params.separate_uv_delta_q, rb);
+ setup_qm_params(&cm->seq_params, quant_params, cm->seg.enabled,
+ av1_num_planes(cm), rb);
#endif // CONFIG_F311_QM_PARAMS
cm->delta_q_info.delta_q_res = 1;
diff --git a/av1/decoder/decodeframe.h b/av1/decoder/decodeframe.h
index d6e0c16..42e596f 100644
--- a/av1/decoder/decodeframe.h
+++ b/av1/decoder/decodeframe.h
@@ -45,6 +45,7 @@
// Reads additional sequence header for coding tools beyond AV1
void av1_read_sequence_header_beyond_av1(
struct aom_read_bit_buffer *rb, SequenceHeader *seq_params,
+ CommonQuantParams *quant_params,
struct aom_internal_error_info *error_info);
void av1_read_frame_size(struct aom_read_bit_buffer *rb, int num_bits_width,
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 64be01f..025eb5b 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -231,13 +231,9 @@
av1_loop_filter_init(cm);
- SequenceHeader *seq_params = &cm->seq_params;
- // Allocate memory for fundamental QM arrays, in case the coded video
- // sequence requires custom QMs (i.e. seq->user_defined_qmatrix == 1)
- // av1_qm_init_dequant_only() is called when we process a sequence header OBU.
- seq_params->quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
- seq_params->quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
- seq_params->quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
+ cm->quant_params.qmatrix_allocated = false;
+ cm->quant_params.qmatrix_initialized = false;
+
#if CONFIG_ACCOUNTING
pbi->acct_enabled = 1;
aom_accounting_init(&pbi->accounting);
@@ -452,9 +448,11 @@
}
#endif
- av1_free_qm(pbi->common.seq_params.quantizer_matrix_8x8);
- av1_free_qm(pbi->common.seq_params.quantizer_matrix_8x4);
- av1_free_qm(pbi->common.seq_params.quantizer_matrix_4x8);
+ if (pbi->common.quant_params.qmatrix_allocated) {
+ av1_free_qm(pbi->common.seq_params.quantizer_matrix_8x8);
+ av1_free_qm(pbi->common.seq_params.quantizer_matrix_8x4);
+ av1_free_qm(pbi->common.seq_params.quantizer_matrix_4x8);
+ }
aom_free(pbi);
}
diff --git a/av1/decoder/obu.c b/av1/decoder/obu.c
index 8b3c739..0f33873 100644
--- a/av1/decoder/obu.c
+++ b/av1/decoder/obu.c
@@ -362,12 +362,8 @@
seq_params->film_grain_params_present = aom_rb_read_bit(rb);
// Sequence header for coding tools beyond AV1
- av1_read_sequence_header_beyond_av1(rb, seq_params, &cm->error);
- int num_planes = seq_params->monochrome ? 1 : MAX_MB_PLANE;
- qm_val_t ***fund_mat[3] = { seq_params->quantizer_matrix_8x8,
- seq_params->quantizer_matrix_8x4,
- seq_params->quantizer_matrix_4x8 };
- av1_qm_init_dequant_only(&cm->quant_params, num_planes, fund_mat);
+ av1_read_sequence_header_beyond_av1(rb, seq_params, &cm->quant_params,
+ &cm->error);
#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT_ENHANCEMENT
#if !CONFIG_CWG_F243_REMOVE_ENABLE_ORDER_HINT
if (!seq_params->order_hint_info.enable_order_hint &&
@@ -396,8 +392,10 @@
// If a sequence header has been decoded before, we check if the new
// one is consistent with the old one.
if (pbi->sequence_header_ready) {
- if (!are_seq_headers_consistent(&cm->seq_params, seq_params))
+ if (!are_seq_headers_consistent(&cm->seq_params, seq_params)) {
pbi->sequence_header_changed = 1;
+ cm->quant_params.qmatrix_initialized = false;
+ }
}
cm->seq_params = *seq_params;
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 1c28fa7..b1d9f9e 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -1360,6 +1360,24 @@
av1_set_lr_tools(cm->seq_params.lr_tools_disable_mask[1], 1, &cm->features);
av1_set_lr_tools(cm->seq_params.lr_tools_disable_mask[1], 2, &cm->features);
}
+ if (cm->quant_params.using_qmatrix) {
+ if (!cm->quant_params.qmatrix_allocated) {
+ cm->seq_params.quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
+ cm->seq_params.quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
+ cm->seq_params.quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
+ cm->quant_params.qmatrix_allocated = true;
+ }
+ if (!cm->quant_params.qmatrix_initialized) {
+ av1_init_qmatrix(cm->seq_params.quantizer_matrix_8x8,
+ cm->seq_params.quantizer_matrix_8x4,
+ cm->seq_params.quantizer_matrix_4x8, av1_num_planes(cm));
+ qm_val_t ***fund_mat[3] = { cm->seq_params.quantizer_matrix_8x8,
+ cm->seq_params.quantizer_matrix_8x4,
+ cm->seq_params.quantizer_matrix_4x8 };
+ av1_qm_init(&cm->quant_params, av1_num_planes(cm), fund_mat);
+ cm->quant_params.qmatrix_initialized = true;
+ }
+ }
if (denoise_and_encode(cpi, dest, &frame_input, &frame_params,
&frame_results) != AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 79bc2de..c1dca3b 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1462,19 +1462,8 @@
seq->qm_data_present[i] = false;
}
- // Allocate memory for fundamental QM arrays, in case the coded video
- // sequence requires custom QMs (i.e. seq->user_defined_qmatrix == 1)
- seq->quantizer_matrix_8x8 = av1_alloc_qm(8, 8);
- seq->quantizer_matrix_8x4 = av1_alloc_qm(8, 4);
- seq->quantizer_matrix_4x8 = av1_alloc_qm(4, 8);
-
- // Initialize QMs with default fundamental matrices.
- av1_init_qmatrix(seq->quantizer_matrix_8x8, seq->quantizer_matrix_8x4,
- seq->quantizer_matrix_4x8, av1_num_planes(cm));
- qm_val_t ***fund_mat[3] = { seq->quantizer_matrix_8x8,
- seq->quantizer_matrix_8x4,
- seq->quantizer_matrix_4x8 };
- av1_qm_init(&cm->quant_params, av1_num_planes(cm), fund_mat);
+ cm->quant_params.qmatrix_allocated = false;
+ cm->quant_params.qmatrix_initialized = false;
cm->seq_params.df_par_bits_minus2 = DF_PAR_BITS - 2;
av1_loop_filter_init(cm);
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index c543479..bba5814 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -317,9 +317,11 @@
cpi->alloc_height = 0;
cpi->alloc_sb_size = 0;
- av1_free_qm(cm->seq_params.quantizer_matrix_8x8);
- av1_free_qm(cm->seq_params.quantizer_matrix_8x4);
- av1_free_qm(cm->seq_params.quantizer_matrix_4x8);
+ if (cm->quant_params.qmatrix_allocated) {
+ av1_free_qm(cm->seq_params.quantizer_matrix_8x8);
+ av1_free_qm(cm->seq_params.quantizer_matrix_8x4);
+ av1_free_qm(cm->seq_params.quantizer_matrix_4x8);
+ }
}
static AOM_INLINE void alloc_altref_frame_buffer(AV1_COMP *cpi) {