Collect transform stats in tpl model

Change-Id: I7a1a19dd7545ff22b58964c066c3f7e80b3a4cae
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
index 9c30204..8f36eb1 100644
--- a/av1/common/quant_common.h
+++ b/av1/common/quant_common.h
@@ -36,6 +36,7 @@
 #define DEFAULT_QM_V 12
 #define DEFAULT_QM_FIRST 5
 #define DEFAULT_QM_LAST 9
+#define LOSSLESS_Q_STEP 4  // this should equal to dc/ac_qlookup_QTX[0]
 
 struct AV1Common;
 struct CommonQuantParams;
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 50ed63d..c869f06 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -35,6 +35,39 @@
 #include "av1/encoder/reconinter_enc.h"
 #include "av1/encoder/tpl_model.h"
 
+static AOM_INLINE int tpl_use_multithread(const AV1_COMP *cpi) {
+  return cpi->mt_info.num_workers > 1 && !cpi->sf.tpl_sf.allow_compound_pred;
+}
+
+static AOM_INLINE void tpl_stats_record_txfm_block(TplDepFrame *tpl_frame,
+                                                   const tran_low_t *coeff) {
+  aom_clear_system_state();
+  // For transform larger than 16x16, the scale of coeff need to be adjusted.
+  // It's not LOSSLESS_Q_STEP.
+  assert(tpl_frame->coeff_num <= 256);
+  for (int i = 0; i < tpl_frame->coeff_num; ++i) {
+    tpl_frame->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP;
+  }
+  ++tpl_frame->txfm_block_count;
+}
+
+static AOM_INLINE void tpl_stats_update_abs_coeff_mean(TplDepFrame *tpl_frame) {
+  aom_clear_system_state();
+  for (int i = 0; i < tpl_frame->coeff_num; ++i) {
+    tpl_frame->abs_coeff_mean[i] =
+        tpl_frame->abs_coeff_sum[i] / tpl_frame->txfm_block_count;
+  }
+}
+
+void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int tpl_bsize_1d) {
+  aom_clear_system_state();
+  tpl_frame->txfm_block_count = 0;
+  tpl_frame->coeff_num = tpl_bsize_1d * tpl_bsize_1d;
+  assert(sizeof(tpl_frame->abs_coeff_mean) /
+             sizeof(tpl_frame->abs_coeff_mean[0]) ==
+         tpl_frame->coeff_num);
+}
+
 static AOM_INLINE void get_quantize_error(const MACROBLOCK *x, int plane,
                                           const tran_low_t *coeff,
                                           tran_low_t *qcoeff,
@@ -98,14 +131,14 @@
         ALIGN_POWER_OF_TWO(mi_params->mi_cols, MAX_MIB_SIZE_LOG2);
     const int mi_rows =
         ALIGN_POWER_OF_TWO(mi_params->mi_rows, MAX_MIB_SIZE_LOG2);
-
-    tpl_data->tpl_stats_buffer[frame].is_valid = 0;
-    tpl_data->tpl_stats_buffer[frame].width = mi_cols >> block_mis_log2;
-    tpl_data->tpl_stats_buffer[frame].height = mi_rows >> block_mis_log2;
-    tpl_data->tpl_stats_buffer[frame].stride =
-        tpl_data->tpl_stats_buffer[frame].width;
-    tpl_data->tpl_stats_buffer[frame].mi_rows = mi_params->mi_rows;
-    tpl_data->tpl_stats_buffer[frame].mi_cols = mi_params->mi_cols;
+    TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame];
+    tpl_frame->is_valid = 0;
+    tpl_frame->width = mi_cols >> block_mis_log2;
+    tpl_frame->height = mi_rows >> block_mis_log2;
+    tpl_frame->stride = tpl_data->tpl_stats_buffer[frame].width;
+    tpl_frame->mi_rows = mi_params->mi_rows;
+    tpl_frame->mi_cols = mi_params->mi_cols;
+    av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
   }
   tpl_data->tpl_frame = &tpl_data->tpl_stats_buffer[REF_FRAMES + 1];
 
@@ -756,6 +789,11 @@
                       rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
                       use_y_only_rate_distortion);
 
+  if (!tpl_use_multithread(cpi)) {
+    // TODO(angiebird): make this work for multithread
+    tpl_stats_record_txfm_block(tpl_frame, coeff);
+  }
+
   tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
   tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
   if (!is_inter_mode(best_mode)) {
@@ -1236,6 +1274,11 @@
         GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
     av1_mc_flow_dispenser_row(cpi, x, mi_row, bsize, tx_size);
   }
+  if (!tpl_use_multithread(cpi)) {
+    // TODO(angiebird): make this work for multithread
+    TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[cpi->tpl_data.frame_idx];
+    tpl_stats_update_abs_coeff_mean(tpl_frame);
+  }
 }
 
 static void mc_flow_synthesizer(AV1_COMP *cpi, int frame_idx) {
@@ -1436,8 +1479,11 @@
 }
 
 void av1_init_tpl_stats(TplParams *const tpl_data) {
+  set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2,
+                           &tpl_data->tpl_bsize_1d);
   for (int frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
     TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx];
+    av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
     if (tpl_data->tpl_stats_pool[frame_idx] == NULL) continue;
     memset(tpl_data->tpl_stats_pool[frame_idx], 0,
            tpl_frame->height * tpl_frame->width *
@@ -1513,7 +1559,7 @@
       continue;
 
     init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
-    if (mt_info->num_workers > 1 && !cpi->sf.tpl_sf.allow_compound_pred) {
+    if (tpl_use_multithread(cpi)) {
       tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
       tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write;
       av1_mc_flow_dispenser_mt(cpi);
diff --git a/av1/encoder/tpl_model.h b/av1/encoder/tpl_model.h
index 8492b4f..4b85740 100644
--- a/av1/encoder/tpl_model.h
+++ b/av1/encoder/tpl_model.h
@@ -111,6 +111,10 @@
   int mi_cols;
   int base_rdmult;
   uint32_t frame_display_index;
+  double abs_coeff_sum[256];  // Assume we are using 16x16 transform block
+  double abs_coeff_mean[256];
+  int coeff_num;  // number of coefficients in a transform block
+  int txfm_block_count;
 } TplDepFrame;
 
 /*!\endcond */
@@ -284,6 +288,17 @@
 double av1_laplace_estimate_frame_rate(int q_index, int block_count,
                                        const double *abs_coeff_mean,
                                        int coeff_num);
+
+/*!\brief  Init data structure storing transform stats
+ *
+ *\ingroup tpl_modelling
+ *
+ * \param[in]    tpl_frame       pointer of tpl frame data structure
+ * \param[in]    coeff_num       number of coefficients per transform block
+ *
+ */
+void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int coeff_num);
+
 /*!\endcond */
 #ifdef __cplusplus
 }  // extern "C"