Allocate mc_count and mc_saved conditionally to save memory
mc_count and mc_saved is not used for default tpl model computation
Therefore, we hide them behind a MACRO, only use it when non-classic
tpl model is used. (Later we may consider deleting the non-classic
model completely)
Memory usage for encoding a 4k video will be reduced by 8.1%
BUG=aomedia:2453
Change-Id: I9bb2d753f5c37b3bf80607ebe04924428c2d10d2
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index e462c9b..503889e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -30,6 +30,9 @@
extern "C" {
#endif
+// 1: use classic model 0: use count or saving stats
+#define USE_TPL_CLASSIC_MODEL 0
+
typedef struct {
unsigned int sse;
int sum;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 93988ab..0c63a5c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3454,7 +3454,9 @@
if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
+#if !USE_TPL_CLASSIC_MODEL
int64_t mc_count = 0, mc_saved = 0;
+#endif // !USE_TPL_CLASSIC_MODEL
int mi_count = 0;
const int mi_col_sr =
av1_coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
@@ -3467,8 +3469,10 @@
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost += this_stats->intra_cost;
mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow;
+#if !USE_TPL_CLASSIC_MODEL
mc_count += this_stats->mc_count;
mc_saved += this_stats->mc_saved;
+#endif // !USE_TPL_CLASSIC_MODEL
mi_count++;
}
}
@@ -3482,6 +3486,7 @@
const double rk = (double)intra_cost / mc_dep_cost;
beta = (r0 / rk);
}
+#if !USE_TPL_CLASSIC_MODEL
} else if (analysis_type == 1) {
const double mc_count_base = (mi_count * cpi->rd.mc_count_base);
beta = (mc_count + 1.0) / (mc_count_base + 1.0);
@@ -3490,6 +3495,7 @@
const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base);
beta = (mc_saved + 1.0) / (mc_saved_base + 1.0);
beta = pow(beta, 0.5);
+#endif // !USE_TPL_CLASSIC_MODEL
}
int rdmult = av1_get_adaptive_rdmult(cpi, beta);
@@ -3536,7 +3542,9 @@
if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return cm->base_qindex;
+#if !USE_TPL_CLASSIC_MODEL
int64_t mc_count = 0, mc_saved = 0;
+#endif // !USE_TPL_CLASSIC_MODEL
int mi_count = 0;
const int mi_col_sr =
av1_coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
@@ -3549,8 +3557,10 @@
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost += this_stats->intra_cost;
mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow;
+#if !USE_TPL_CLASSIC_MODEL
mc_count += this_stats->mc_count;
mc_saved += this_stats->mc_saved;
+#endif // !USE_TPL_CLASSIC_MODEL
mi_count++;
}
}
@@ -3566,6 +3576,7 @@
beta = (r0 / rk);
assert(beta > 0.0);
}
+#if !USE_TPL_CLASSIC_MODEL
} else if (analysis_type == 1) {
const double mc_count_base = (mi_count * cpi->rd.mc_count_base);
beta = (mc_count + 1.0) / (mc_count_base + 1.0);
@@ -3574,6 +3585,7 @@
const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base);
beta = (mc_saved + 1.0) / (mc_saved_base + 1.0);
beta = pow(beta, 0.5);
+#endif // !USE_TPL_CLASSIC_MODEL
}
offset = (7 * av1_get_deltaq_offset(cpi, cm->base_qindex, beta)) / 8;
// printf("[%d/%d]: beta %g offset %d\n", pyr_lev_from_top,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index a8cc1f8..469e6a8 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3619,8 +3619,10 @@
int tpl_stride = tpl_frame->stride;
int64_t intra_cost_base = 0;
int64_t mc_dep_cost_base = 0;
+#if !USE_TPL_CLASSIC_MODEL
int64_t mc_saved_base = 0;
int64_t mc_count_base = 0;
+#endif // !USE_TPL_CLASSIC_MODEL
int row, col;
const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
@@ -3629,8 +3631,10 @@
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost_base += this_stats->intra_cost;
mc_dep_cost_base += this_stats->intra_cost + this_stats->mc_flow;
+#if !USE_TPL_CLASSIC_MODEL
mc_count_base += this_stats->mc_count;
mc_saved_base += this_stats->mc_saved;
+#endif // !USE_TPL_CLASSIC_MODEL
}
}
@@ -3660,10 +3664,12 @@
cpi->rc.kf_boost, kf_boost, cpi->rc.frames_to_key);
}
}
+#if !USE_TPL_CLASSIC_MODEL
cpi->rd.mc_count_base =
(double)mc_count_base / (cm->mi_rows * cm->mi_cols);
cpi->rd.mc_saved_base =
(double)mc_saved_base / (cm->mi_rows * cm->mi_cols);
+#endif // !USE_TPL_CLASSIC_MODEL
aom_clear_system_state();
}
}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index ca865d9..87b1aff 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -188,8 +188,10 @@
int64_t inter_cost;
int64_t mc_flow;
int64_t mc_dep_cost;
+#if !USE_TPL_CLASSIC_MODEL
int64_t mc_count;
int64_t mc_saved;
+#endif // !USE_TPL_CLASSIC_MODEL
int ref_frame_index;
int ref_disp_frame_index;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 5162e61..fb8ed62 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -64,7 +64,9 @@
int RDMULT;
double r0, arf_r0;
+#if !USE_TPL_CLASSIC_MODEL
double mc_saved_base, mc_count_base;
+#endif // !USE_TPL_CLASSIC_MODEL
} RD_OPT;
static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 03c4849..9f9eb81 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -376,7 +376,9 @@
(tpl_stats_ptr->mc_dep_cost * tpl_stats_ptr->inter_cost) /
tpl_stats_ptr->intra_cost;
*/
+#if !USE_TPL_CLASSIC_MODEL
int64_t mc_saved = tpl_stats_ptr->intra_cost - tpl_stats_ptr->inter_cost;
+#endif // #if !USE_TPL_CLASSIC_MODEL
int idx, idy;
for (idy = 0; idy < mi_height; ++idy) {
for (idx = 0; idx < mi_width; ++idx) {
@@ -384,8 +386,10 @@
&ref_stats_ptr[(ref_mi_row + idy) * ref_tpl_frame->stride +
(ref_mi_col + idx)];
des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
+#if !USE_TPL_CLASSIC_MODEL
des_stats->mc_count += overlap_area << TPL_DEP_COST_SCALE_LOG2;
des_stats->mc_saved += (mc_saved * overlap_area) / pix_num;
+#endif // !USE_TPL_CLASSIC_MODEL
assert(overlap_area >= 0);
}
}
@@ -727,8 +731,9 @@
YV12_BUFFER_CONFIG *ref,
YV12_BUFFER_CONFIG *src,
TplDepFrame *ref_tpl_frame) {
+// TODO(yuec) Consider deleting forward tpl model completely
+#if !USE_TPL_CLASSIC_MODEL
AV1_COMMON *cm = &cpi->common;
-
const int bw = 4 << mi_size_wide_log2[bsize];
const int bh = 4 << mi_size_high_log2[bsize];
const int mi_height = mi_size_high[bsize];
@@ -890,6 +895,7 @@
(ref_mi_col + idx)];
des_stats->mc_count += overlap_area << TPL_DEP_COST_SCALE_LOG2;
des_stats->mc_saved += (mc_saved * overlap_area) / pix_num;
+
assert(overlap_area >= 0);
}
}
@@ -897,6 +903,16 @@
}
}
}
+#else
+ (void)cpi;
+ (void)x;
+ (void)xd;
+ (void)bsize;
+ (void)use_satd;
+ (void)ref;
+ (void)src;
+ (void)ref_tpl_frame;
+#endif // !USE_TPL_CLASSIC_MODEL
}
void av1_tpl_setup_forward_stats(AV1_COMP *cpi) {