Collect transform stats in tpl model
Change-Id: I7a1a19dd7545ff22b58964c066c3f7e80b3a4cae
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index 9c30204..8f36eb1 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -36,6 +36,7 @@
#define DEFAULT_QM_V 12
#define DEFAULT_QM_FIRST 5
#define DEFAULT_QM_LAST 9
+#define LOSSLESS_Q_STEP 4 // this should equal to dc/ac_qlookup_QTX[0]
struct AV1Common;
struct CommonQuantParams;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 50ed63d..c869f06 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -35,6 +35,39 @@
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/tpl_model.h"
+static AOM_INLINE int tpl_use_multithread(const AV1_COMP *cpi) {
+ return cpi->mt_info.num_workers > 1 && !cpi->sf.tpl_sf.allow_compound_pred;
+}
+
+static AOM_INLINE void tpl_stats_record_txfm_block(TplDepFrame *tpl_frame,
+ const tran_low_t *coeff) {
+ aom_clear_system_state();
+ // For transform larger than 16x16, the scale of coeff need to be adjusted.
+ // It's not LOSSLESS_Q_STEP.
+ assert(tpl_frame->coeff_num <= 256);
+ for (int i = 0; i < tpl_frame->coeff_num; ++i) {
+ tpl_frame->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP;
+ }
+ ++tpl_frame->txfm_block_count;
+}
+
+static AOM_INLINE void tpl_stats_update_abs_coeff_mean(TplDepFrame *tpl_frame) {
+ aom_clear_system_state();
+ for (int i = 0; i < tpl_frame->coeff_num; ++i) {
+ tpl_frame->abs_coeff_mean[i] =
+ tpl_frame->abs_coeff_sum[i] / tpl_frame->txfm_block_count;
+ }
+}
+
+void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int tpl_bsize_1d) {
+ aom_clear_system_state();
+ tpl_frame->txfm_block_count = 0;
+ tpl_frame->coeff_num = tpl_bsize_1d * tpl_bsize_1d;
+ assert(sizeof(tpl_frame->abs_coeff_mean) /
+ sizeof(tpl_frame->abs_coeff_mean[0]) ==
+ tpl_frame->coeff_num);
+}
+
static AOM_INLINE void get_quantize_error(const MACROBLOCK *x, int plane,
const tran_low_t *coeff,
tran_low_t *qcoeff,
@@ -98,14 +131,14 @@
ALIGN_POWER_OF_TWO(mi_params->mi_cols, MAX_MIB_SIZE_LOG2);
const int mi_rows =
ALIGN_POWER_OF_TWO(mi_params->mi_rows, MAX_MIB_SIZE_LOG2);
-
- tpl_data->tpl_stats_buffer[frame].is_valid = 0;
- tpl_data->tpl_stats_buffer[frame].width = mi_cols >> block_mis_log2;
- tpl_data->tpl_stats_buffer[frame].height = mi_rows >> block_mis_log2;
- tpl_data->tpl_stats_buffer[frame].stride =
- tpl_data->tpl_stats_buffer[frame].width;
- tpl_data->tpl_stats_buffer[frame].mi_rows = mi_params->mi_rows;
- tpl_data->tpl_stats_buffer[frame].mi_cols = mi_params->mi_cols;
+ TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame];
+ tpl_frame->is_valid = 0;
+ tpl_frame->width = mi_cols >> block_mis_log2;
+ tpl_frame->height = mi_rows >> block_mis_log2;
+ tpl_frame->stride = tpl_data->tpl_stats_buffer[frame].width;
+ tpl_frame->mi_rows = mi_params->mi_rows;
+ tpl_frame->mi_cols = mi_params->mi_cols;
+ av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
}
tpl_data->tpl_frame = &tpl_data->tpl_stats_buffer[REF_FRAMES + 1];
@@ -756,6 +789,11 @@
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
use_y_only_rate_distortion);
+ if (!tpl_use_multithread(cpi)) {
+ // TODO(angiebird): make this work for multithread
+ tpl_stats_record_txfm_block(tpl_frame, coeff);
+ }
+
tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
if (!is_inter_mode(best_mode)) {
@@ -1236,6 +1274,11 @@
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
av1_mc_flow_dispenser_row(cpi, x, mi_row, bsize, tx_size);
}
+ if (!tpl_use_multithread(cpi)) {
+ // TODO(angiebird): make this work for multithread
+ TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[cpi->tpl_data.frame_idx];
+ tpl_stats_update_abs_coeff_mean(tpl_frame);
+ }
}
static void mc_flow_synthesizer(AV1_COMP *cpi, int frame_idx) {
@@ -1436,8 +1479,11 @@
}
void av1_init_tpl_stats(TplParams *const tpl_data) {
+ set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2,
+ &tpl_data->tpl_bsize_1d);
for (int frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx];
+ av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
if (tpl_data->tpl_stats_pool[frame_idx] == NULL) continue;
memset(tpl_data->tpl_stats_pool[frame_idx], 0,
tpl_frame->height * tpl_frame->width *
@@ -1513,7 +1559,7 @@
continue;
init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
- if (mt_info->num_workers > 1 && !cpi->sf.tpl_sf.allow_compound_pred) {
+ if (tpl_use_multithread(cpi)) {
tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write;
av1_mc_flow_dispenser_mt(cpi);
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 8492b4f..4b85740 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -111,6 +111,10 @@
int mi_cols;
int base_rdmult;
uint32_t frame_display_index;
+ double abs_coeff_sum[256]; // Assume we are using 16x16 transform block
+ double abs_coeff_mean[256];
+ int coeff_num; // number of coefficients in a transform block
+ int txfm_block_count;
} TplDepFrame;
/*!\endcond */
@@ -284,6 +288,17 @@
double av1_laplace_estimate_frame_rate(int q_index, int block_count,
const double *abs_coeff_mean,
int coeff_num);
+
+/*!\brief Init data structure storing transform stats
+ *
+ *\ingroup tpl_modelling
+ *
+ * \param[in] tpl_frame pointer of tpl frame data structure
+ * \param[in] coeff_num number of coefficients per transform block
+ *
+ */
+void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int coeff_num);
+
/*!\endcond */
#ifdef __cplusplus
} // extern "C"