Fix maximum TPL buffer length MACROs.

Added documentation of the MACROS for maximum TPL buffer length.

Also fixed a few places checking availability of tpl_frame.

Note one of them relates to the prune_inter_modes_based_on_tpl
speed feature, that was previously not used correctly.
This patch enables it for all eligible frames.

On speed 5 borg test shows 0.3% to 0.4% BD-rate increase.
Local speed test shows 4% speed up on speed 5 for both
instruction count and run time.

STATS_CHANGED

Change-Id: I38ba5b05a1d5e37946bd5fede0d706721560c7e9
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 87af91d..352de50 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -294,7 +294,7 @@
   const int deltaq_rdmult = set_deltaq_rdmult(cpi, xd);
   if (tpl_frame->is_valid == 0) return deltaq_rdmult;
   if (!is_frame_tpl_eligible((AV1_COMP *)cpi)) return deltaq_rdmult;
-  if (tpl_idx >= MAX_LAG_BUFFERS) return deltaq_rdmult;
+  if (tpl_idx >= MAX_TPL_FRAME_IDX) return deltaq_rdmult;
   if (cpi->superres_mode != SUPERRES_NONE) return deltaq_rdmult;
   if (cpi->oxcf.aq_mode != NO_AQ) return deltaq_rdmult;
 
@@ -4013,7 +4013,7 @@
 
   if (!is_frame_tpl_eligible(cpi)) return orig_rdmult;
 
-  if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
+  if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return orig_rdmult;
 
   int64_t mc_count = 0, mc_saved = 0;
   int mi_count = 0;
@@ -4093,7 +4093,7 @@
   const int mi_high = mi_size_high[bsize];
 
   if (tpl_frame->is_valid == 0) return 0;
-  if (gf_group_index >= MAX_LAG_BUFFERS) return 0;
+  if (gf_group_index >= MAX_TPL_FRAME_IDX) return 0;
 
   int mi_count = 0;
   int count = 0;
@@ -4166,7 +4166,7 @@
 
   if (!is_frame_tpl_eligible(cpi)) return base_qindex;
 
-  if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return base_qindex;
+  if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return base_qindex;
 
   int64_t mc_count = 0, mc_saved = 0;
   int mi_count = 0;
@@ -4689,7 +4689,7 @@
 
   if (tpl_frame->is_valid == 0) return;
   if (!is_frame_tpl_eligible(cpi)) return;
-  if (frame_idx >= MAX_LAG_BUFFERS) return;
+  if (frame_idx >= MAX_TPL_FRAME_IDX) return;
   if (cpi->superres_mode != SUPERRES_NONE) return;
   if (cpi->oxcf.aq_mode != NO_AQ) return;
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 88f024f..8213ade 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -150,7 +150,20 @@
 } UENUM1BYTE(SS_CFG_OFFSET);
 
 // TODO(jingning): This needs to be cleaned up next.
-#define MAX_LENGTH_TPL_FRAME_STATS (MAX_TOTAL_BUFFERS + REF_FRAMES + 1)
+
+// TPL stats buffers are prepared for every frame in the GOP,
+// including (internal) overlays and (internal) arfs.
+// In addition, frames in the lookahead that are outside of the GOP
+// are also used.
+// Thus it should use
+// (gop_length) + (# overlays) + (MAX_LAG_BUFFERS - gop_len) =
+// MAX_LAG_BUFFERS + (# overlays)
+// 2 * MAX_LAG_BUFFERS is therefore a safe estimate.
+// TODO(bohanli): test setting it to 1.5 * MAX_LAG_BUFFER
+#define MAX_TPL_FRAME_IDX (2 * MAX_LAG_BUFFERS)
+// The first REF_FRAMES + 1 buffers are reserved.
+// tpl_data->tpl_frame starts after REF_FRAMES + 1
+#define MAX_LENGTH_TPL_FRAME_STATS (MAX_TPL_FRAME_IDX + REF_FRAMES + 1)
 
 typedef struct TplDepStats {
   int64_t intra_cost;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 02afcd1..036fa77 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2126,7 +2126,7 @@
   const int tpl_idx = gf_group->index;
   TplParams *const tpl_data = &cpi->tpl_data;
   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
-  if (tpl_idx >= MAX_LAG_BUFFERS || !tpl_frame->is_valid) {
+  if (tpl_idx >= MAX_TPL_FRAME_IDX || !tpl_frame->is_valid) {
     return;
   }
 
@@ -2240,7 +2240,7 @@
   TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
   const int prune_modes_based_on_tpl =
       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
-      tpl_idx >= MAX_LAG_BUFFERS && tpl_frame->is_valid;
+      tpl_idx < MAX_TPL_FRAME_IDX && tpl_frame->is_valid;
   int i;
   const int refs[2] = { mbmi->ref_frame[0],
                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 79b94f3..5cc7c4e 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -833,7 +833,7 @@
   int anc_frame_offset = gf_group->cur_frame_idx[cur_frame_idx] + 1;
   int process_frame_count = 0;
   const int gop_length =
-      AOMMIN(gf_group->size - 1 + use_arf, MAX_LENGTH_TPL_FRAME_STATS - 1);
+      AOMMIN(gf_group->size - 1 + use_arf, MAX_TPL_FRAME_IDX - 1);
   for (gf_index = cur_frame_idx; gf_index <= gop_length; ++gf_index) {
     TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
     FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
@@ -904,8 +904,8 @@
              cpi->rc.frames_to_key - cpi->rc.baseline_gf_interval);
   int frame_display_index = cpi->rc.baseline_gf_interval + 1;
 
-  for (; gf_index < MAX_LENGTH_TPL_FRAME_STATS &&
-         extend_frame_count < extend_frame_length;
+  for (;
+       gf_index < MAX_TPL_FRAME_IDX && extend_frame_count < extend_frame_length;
        ++gf_index) {
     TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
     FRAME_UPDATE_TYPE frame_update_type = LF_UPDATE;
@@ -1137,7 +1137,7 @@
 
   if (tpl_frame->is_valid == 0) return;
   if (!is_frame_tpl_eligible(cpi)) return;
-  if (tpl_idx >= MAX_LAG_BUFFERS) return;
+  if (tpl_idx >= MAX_TPL_FRAME_IDX) return;
   if (cpi->superres_mode != SUPERRES_NONE) return;
   if (cpi->oxcf.aq_mode != NO_AQ) return;