Allocate mc_count and mc_saved conditionally to save memory mc_count and mc_saved is not used for default tpl model computation Therefore, we hide them behind a MACRO, only use it when non-classic tpl model is used. (Later we may consider deleting the non-classic model completely) Memory usage for encoding a 4k video will be reduced by 8.1% BUG=aomedia:2453 Change-Id: I9bb2d753f5c37b3bf80607ebe04924428c2d10d2
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index e462c9b..503889e 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -30,6 +30,9 @@ extern "C" { #endif +// 1: use classic model 0: use count or saving stats +#define USE_TPL_CLASSIC_MODEL 0 + typedef struct { unsigned int sse; int sum;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 93988ab..0c63a5c 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -3454,7 +3454,9 @@ if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult; +#if !USE_TPL_CLASSIC_MODEL int64_t mc_count = 0, mc_saved = 0; +#endif // !USE_TPL_CLASSIC_MODEL int mi_count = 0; const int mi_col_sr = av1_coded_to_superres_mi(mi_col, cm->superres_scale_denominator); @@ -3467,8 +3469,10 @@ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; intra_cost += this_stats->intra_cost; mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow; +#if !USE_TPL_CLASSIC_MODEL mc_count += this_stats->mc_count; mc_saved += this_stats->mc_saved; +#endif // !USE_TPL_CLASSIC_MODEL mi_count++; } } @@ -3482,6 +3486,7 @@ const double rk = (double)intra_cost / mc_dep_cost; beta = (r0 / rk); } +#if !USE_TPL_CLASSIC_MODEL } else if (analysis_type == 1) { const double mc_count_base = (mi_count * cpi->rd.mc_count_base); beta = (mc_count + 1.0) / (mc_count_base + 1.0); @@ -3490,6 +3495,7 @@ const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base); beta = (mc_saved + 1.0) / (mc_saved_base + 1.0); beta = pow(beta, 0.5); +#endif // !USE_TPL_CLASSIC_MODEL } int rdmult = av1_get_adaptive_rdmult(cpi, beta); @@ -3536,7 +3542,9 @@ if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return cm->base_qindex; +#if !USE_TPL_CLASSIC_MODEL int64_t mc_count = 0, mc_saved = 0; +#endif // !USE_TPL_CLASSIC_MODEL int mi_count = 0; const int mi_col_sr = av1_coded_to_superres_mi(mi_col, cm->superres_scale_denominator); @@ -3549,8 +3557,10 @@ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; intra_cost += this_stats->intra_cost; mc_dep_cost += this_stats->intra_cost + this_stats->mc_flow; +#if !USE_TPL_CLASSIC_MODEL mc_count += this_stats->mc_count; mc_saved += this_stats->mc_saved; +#endif // !USE_TPL_CLASSIC_MODEL mi_count++; } } @@ -3566,6 +3576,7 @@ beta = (r0 / rk); assert(beta > 0.0); } +#if !USE_TPL_CLASSIC_MODEL } else if (analysis_type == 1) { const double mc_count_base = (mi_count * cpi->rd.mc_count_base); beta = (mc_count + 1.0) / (mc_count_base + 1.0); @@ -3574,6 +3585,7 @@ const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base); beta = (mc_saved + 1.0) / (mc_saved_base + 1.0); beta = pow(beta, 0.5); +#endif // !USE_TPL_CLASSIC_MODEL } offset = (7 * av1_get_deltaq_offset(cpi, cm->base_qindex, beta)) / 8; // printf("[%d/%d]: beta %g offset %d\n", pyr_lev_from_top,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index a8cc1f8..469e6a8 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -3619,8 +3619,10 @@ int tpl_stride = tpl_frame->stride; int64_t intra_cost_base = 0; int64_t mc_dep_cost_base = 0; +#if !USE_TPL_CLASSIC_MODEL int64_t mc_saved_base = 0; int64_t mc_count_base = 0; +#endif // !USE_TPL_CLASSIC_MODEL int row, col; const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); @@ -3629,8 +3631,10 @@ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; intra_cost_base += this_stats->intra_cost; mc_dep_cost_base += this_stats->intra_cost + this_stats->mc_flow; +#if !USE_TPL_CLASSIC_MODEL mc_count_base += this_stats->mc_count; mc_saved_base += this_stats->mc_saved; +#endif // !USE_TPL_CLASSIC_MODEL } } @@ -3660,10 +3664,12 @@ cpi->rc.kf_boost, kf_boost, cpi->rc.frames_to_key); } } +#if !USE_TPL_CLASSIC_MODEL cpi->rd.mc_count_base = (double)mc_count_base / (cm->mi_rows * cm->mi_cols); cpi->rd.mc_saved_base = (double)mc_saved_base / (cm->mi_rows * cm->mi_cols); +#endif // !USE_TPL_CLASSIC_MODEL aom_clear_system_state(); } }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index ca865d9..87b1aff 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -188,8 +188,10 @@ int64_t inter_cost; int64_t mc_flow; int64_t mc_dep_cost; +#if !USE_TPL_CLASSIC_MODEL int64_t mc_count; int64_t mc_saved; +#endif // !USE_TPL_CLASSIC_MODEL int ref_frame_index; int ref_disp_frame_index;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h index 5162e61..fb8ed62 100644 --- a/av1/encoder/rd.h +++ b/av1/encoder/rd.h
@@ -64,7 +64,9 @@ int RDMULT; double r0, arf_r0; +#if !USE_TPL_CLASSIC_MODEL double mc_saved_base, mc_count_base; +#endif // !USE_TPL_CLASSIC_MODEL } RD_OPT; static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c index 03c4849..9f9eb81 100644 --- a/av1/encoder/tpl_model.c +++ b/av1/encoder/tpl_model.c
@@ -376,7 +376,9 @@ (tpl_stats_ptr->mc_dep_cost * tpl_stats_ptr->inter_cost) / tpl_stats_ptr->intra_cost; */ +#if !USE_TPL_CLASSIC_MODEL int64_t mc_saved = tpl_stats_ptr->intra_cost - tpl_stats_ptr->inter_cost; +#endif // #if !USE_TPL_CLASSIC_MODEL int idx, idy; for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { @@ -384,8 +386,10 @@ &ref_stats_ptr[(ref_mi_row + idy) * ref_tpl_frame->stride + (ref_mi_col + idx)]; des_stats->mc_flow += (mc_flow * overlap_area) / pix_num; +#if !USE_TPL_CLASSIC_MODEL des_stats->mc_count += overlap_area << TPL_DEP_COST_SCALE_LOG2; des_stats->mc_saved += (mc_saved * overlap_area) / pix_num; +#endif // !USE_TPL_CLASSIC_MODEL assert(overlap_area >= 0); } } @@ -727,8 +731,9 @@ YV12_BUFFER_CONFIG *ref, YV12_BUFFER_CONFIG *src, TplDepFrame *ref_tpl_frame) { +// TODO(yuec) Consider deleting forward tpl model completely +#if !USE_TPL_CLASSIC_MODEL AV1_COMMON *cm = &cpi->common; - const int bw = 4 << mi_size_wide_log2[bsize]; const int bh = 4 << mi_size_high_log2[bsize]; const int mi_height = mi_size_high[bsize]; @@ -890,6 +895,7 @@ (ref_mi_col + idx)]; des_stats->mc_count += overlap_area << TPL_DEP_COST_SCALE_LOG2; des_stats->mc_saved += (mc_saved * overlap_area) / pix_num; + assert(overlap_area >= 0); } } @@ -897,6 +903,16 @@ } } } +#else + (void)cpi; + (void)x; + (void)xd; + (void)bsize; + (void)use_satd; + (void)ref; + (void)src; + (void)ref_tpl_frame; +#endif // !USE_TPL_CLASSIC_MODEL } void av1_tpl_setup_forward_stats(AV1_COMP *cpi) {