Add a macro to gather per-transform-unit stats.
The stats are printed when COLLECT_RD_STATS is 1.
Change-Id: I90b1f2d3428d112074731fee9d91d4a96e2e0ab8
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index f257161..55b508b 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -785,7 +785,8 @@
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
- double *hordist, double *verdist) {
+ int need_4th, double *hordist,
+ double *verdist) {
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
@@ -860,13 +861,22 @@
hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
+ if (need_4th) {
+ hordist[3] = ((double)esq[3] + esq[7] + esq[11] + esq[15]) * e_recip;
+ }
verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
+ if (need_4th) {
+ verdist[3] = ((double)esq[12] + esq[13] + esq[14] + esq[15]) * e_recip;
+ }
} else {
hordist[0] = verdist[0] = 0.25;
hordist[1] = verdist[1] = 0.25;
hordist[2] = verdist[2] = 0.25;
+ if (need_4th) {
+ hordist[3] = verdist[3] = 0.25;
+ }
}
}
@@ -876,7 +886,7 @@
int prune_bitmask = 0;
double svm_proj_h = 0, svm_proj_v = 0;
double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
- get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
+ get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, 0,
hdist, vdist);
svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
@@ -1860,6 +1870,81 @@
}
}
+// This macro has 3 possible values:
+// 0: Do not collect any RD stats
+// 1: Collect RD stats for transform units
+// 2: Collect RD stats for partition units
+#define COLLECT_RD_STATS 0
+
+#if COLLECT_RD_STATS == 1
+
+static void get_mean(const int16_t *diff, int stride, int w, int h,
+ double *mean) {
+ double sum = 0.0;
+ for (int j = 0; j < h; ++j) {
+ for (int i = 0; i < w; ++i) {
+ sum += diff[j * stride + i];
+ }
+ }
+ assert(w > 0 && h > 0);
+ *mean = sum / (w * h);
+}
+
+static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x,
+ const RD_STATS *const rd_stats,
+ TX_SIZE tx_size, TX_TYPE tx_type) {
+ const BLOCK_SIZE fake_bsize = txsize_to_bsize[tx_size];
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const int plane = 0;
+ struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int txw = tx_size_wide[tx_size];
+ const int txh = tx_size_high[tx_size];
+ const int dequant_shift =
+ (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
+ const int q_step = pd->dequant_Q3[1] >> dequant_shift;
+ const double num_samples = txw * txh;
+
+ const double rate_norm = (double)rd_stats->rate / num_samples;
+ const double dist_norm = (double)rd_stats->dist / num_samples;
+
+ unsigned int sse;
+ cpi->fn_ptr[fake_bsize].vf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride, &sse);
+ const double sse_norm = (double)sse / num_samples;
+
+ const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type];
+ const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type];
+
+ fprintf(stderr, "%g %g %g %d %d %d %d %d", rate_norm, dist_norm, sse_norm,
+ q_step, tx_size_wide[tx_size], tx_size_high[tx_size], tx_type_1d_row,
+ tx_type_1d_col);
+
+ int model_rate;
+ int64_t model_dist;
+ model_rd_from_sse(cpi, xd, fake_bsize, plane, sse, &model_rate, &model_dist);
+ const double model_rate_norm = (double)model_rate / num_samples;
+ const double model_dist_norm = (double)model_dist / num_samples;
+ fprintf(stderr, " %g %g", model_rate_norm, model_dist_norm);
+
+ // TODO(urvang): Check if we need to add an offset to 'src_diff'.
+ double mean;
+ get_mean(p->src_diff, txw, txw, txh, &mean);
+ double hor_corr, vert_corr;
+ get_horver_correlation(p->src_diff, txw, txw, txh, &hor_corr, &vert_corr);
+ fprintf(stderr, " %g %g %g", mean, hor_corr, vert_corr);
+
+ double hdist[4] = { 0 }, vdist[4] = { 0 };
+ get_energy_distribution_fine(cpi, fake_bsize, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, 1, hdist, vdist);
+ fprintf(stderr, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2],
+ hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]);
+
+ fprintf(stderr, "\n");
+}
+
+#endif // COLLECT_RD_STATS == 1
+
static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
@@ -2058,6 +2143,12 @@
best_eob = x->plane[plane].eobs[block];
}
+#if COLLECT_RD_STATS == 1
+ if (plane == 0) {
+ PrintTransformUnitStats(cpi, x, &this_rd_stats, tx_size, tx_type);
+ }
+#endif // COLLECT_RD_STATS == 1
+
if (cpi->sf.adaptive_txb_search)
if ((best_rd - (best_rd >> 2)) > ref_best_rd) break;
@@ -2125,6 +2216,8 @@
return best_rd;
}
+#undef COLLECT_RD_STATS
+
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;