Fix maximum TPL buffer length MACROs.
Added documentation of the MACROS for maximum TPL buffer length.
Also fixed a few places checking availability of tpl_frame.
Note one of them relates to the prune_inter_modes_based_on_tpl
speed feature, that was previously not used correctly.
This patch enables it for all eligible frames.
On speed 5 borg test shows 0.3% to 0.4% BD-rate increase.
Local speed test shows 4% speed up on speed 5 for both
instruction count and run time.
STATS_CHANGED
Change-Id: I38ba5b05a1d5e37946bd5fede0d706721560c7e9
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 87af91d..352de50 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -294,7 +294,7 @@
const int deltaq_rdmult = set_deltaq_rdmult(cpi, xd);
if (tpl_frame->is_valid == 0) return deltaq_rdmult;
if (!is_frame_tpl_eligible((AV1_COMP *)cpi)) return deltaq_rdmult;
- if (tpl_idx >= MAX_LAG_BUFFERS) return deltaq_rdmult;
+ if (tpl_idx >= MAX_TPL_FRAME_IDX) return deltaq_rdmult;
if (cpi->superres_mode != SUPERRES_NONE) return deltaq_rdmult;
if (cpi->oxcf.aq_mode != NO_AQ) return deltaq_rdmult;
@@ -4013,7 +4013,7 @@
if (!is_frame_tpl_eligible(cpi)) return orig_rdmult;
- if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
+ if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return orig_rdmult;
int64_t mc_count = 0, mc_saved = 0;
int mi_count = 0;
@@ -4093,7 +4093,7 @@
const int mi_high = mi_size_high[bsize];
if (tpl_frame->is_valid == 0) return 0;
- if (gf_group_index >= MAX_LAG_BUFFERS) return 0;
+ if (gf_group_index >= MAX_TPL_FRAME_IDX) return 0;
int mi_count = 0;
int count = 0;
@@ -4166,7 +4166,7 @@
if (!is_frame_tpl_eligible(cpi)) return base_qindex;
- if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return base_qindex;
+ if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return base_qindex;
int64_t mc_count = 0, mc_saved = 0;
int mi_count = 0;
@@ -4689,7 +4689,7 @@
if (tpl_frame->is_valid == 0) return;
if (!is_frame_tpl_eligible(cpi)) return;
- if (frame_idx >= MAX_LAG_BUFFERS) return;
+ if (frame_idx >= MAX_TPL_FRAME_IDX) return;
if (cpi->superres_mode != SUPERRES_NONE) return;
if (cpi->oxcf.aq_mode != NO_AQ) return;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 88f024f..8213ade 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -150,7 +150,20 @@
} UENUM1BYTE(SS_CFG_OFFSET);
// TODO(jingning): This needs to be cleaned up next.
-#define MAX_LENGTH_TPL_FRAME_STATS (MAX_TOTAL_BUFFERS + REF_FRAMES + 1)
+
+// TPL stats buffers are prepared for every frame in the GOP,
+// including (internal) overlays and (internal) arfs.
+// In addition, frames in the lookahead that are outside of the GOP
+// are also used.
+// Thus it should use
+// (gop_length) + (# overlays) + (MAX_LAG_BUFFERS - gop_len) =
+// MAX_LAG_BUFFERS + (# overlays)
+// 2 * MAX_LAG_BUFFERS is therefore a safe estimate.
+// TODO(bohanli): test setting it to 1.5 * MAX_LAG_BUFFER
+#define MAX_TPL_FRAME_IDX (2 * MAX_LAG_BUFFERS)
+// The first REF_FRAMES + 1 buffers are reserved.
+// tpl_data->tpl_frame starts after REF_FRAMES + 1
+#define MAX_LENGTH_TPL_FRAME_STATS (MAX_TPL_FRAME_IDX + REF_FRAMES + 1)
typedef struct TplDepStats {
int64_t intra_cost;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 02afcd1..036fa77 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2126,7 +2126,7 @@
const int tpl_idx = gf_group->index;
TplParams *const tpl_data = &cpi->tpl_data;
const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
- if (tpl_idx >= MAX_LAG_BUFFERS || !tpl_frame->is_valid) {
+ if (tpl_idx >= MAX_TPL_FRAME_IDX || !tpl_frame->is_valid) {
return;
}
@@ -2240,7 +2240,7 @@
TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
const int prune_modes_based_on_tpl =
cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
- tpl_idx >= MAX_LAG_BUFFERS && tpl_frame->is_valid;
+ tpl_idx < MAX_TPL_FRAME_IDX && tpl_frame->is_valid;
int i;
const int refs[2] = { mbmi->ref_frame[0],
(mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 79b94f3..5cc7c4e 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -833,7 +833,7 @@
int anc_frame_offset = gf_group->cur_frame_idx[cur_frame_idx] + 1;
int process_frame_count = 0;
const int gop_length =
- AOMMIN(gf_group->size - 1 + use_arf, MAX_LENGTH_TPL_FRAME_STATS - 1);
+ AOMMIN(gf_group->size - 1 + use_arf, MAX_TPL_FRAME_IDX - 1);
for (gf_index = cur_frame_idx; gf_index <= gop_length; ++gf_index) {
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
@@ -904,8 +904,8 @@
cpi->rc.frames_to_key - cpi->rc.baseline_gf_interval);
int frame_display_index = cpi->rc.baseline_gf_interval + 1;
- for (; gf_index < MAX_LENGTH_TPL_FRAME_STATS &&
- extend_frame_count < extend_frame_length;
+ for (;
+ gf_index < MAX_TPL_FRAME_IDX && extend_frame_count < extend_frame_length;
++gf_index) {
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
FRAME_UPDATE_TYPE frame_update_type = LF_UPDATE;
@@ -1137,7 +1137,7 @@
if (tpl_frame->is_valid == 0) return;
if (!is_frame_tpl_eligible(cpi)) return;
- if (tpl_idx >= MAX_LAG_BUFFERS) return;
+ if (tpl_idx >= MAX_TPL_FRAME_IDX) return;
if (cpi->superres_mode != SUPERRES_NONE) return;
if (cpi->oxcf.aq_mode != NO_AQ) return;