Assign references to named slots without stacks
Neutral performance on enh and default subgop configs on speed 0,
0.2% gain on speed 3. Still needs a followup to reduce performance
drop with TPL model.
Also reduces the performance drop in aomedia:2819 to 0.15% loss.
BUG=aomedia:2819
Change-Id: I1317c00bcd91257374df39a031a85f1890b763c3
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 7338d15..ef516e8 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -313,11 +313,6 @@
const int frame_order = (int)buf->display_order_hint;
frame_level = buf->pyramid_level;
- // Handle special cases where the pyramid level computed in the gf group
- // won't match the assumptions made in the subgop cfg.
- frame_level =
- get_true_pyr_level(frame_level, frame_order, gf_group.max_layer_depth);
-
// Sometimes a frame index is in multiple reference buffers.
// Do not add a frame to the pyramid list multiple times.
int found = 0;
@@ -1235,7 +1230,218 @@
return INVALID_IDX;
}
-void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack) {
+/*!\cond */
+// Struct to keep track of relevant reference frame data
+typedef struct {
+ int map_idx;
+ int disp_order;
+ int pyr_level;
+ int used;
+} RefBufMapData;
+/*!\endcond */
+
+// Comparison function to sort reference frames in ascending display order
+static int compare_map_idx_pair_asc(const void *a, const void *b) {
+ if (((RefBufMapData *)a)->disp_order == ((RefBufMapData *)b)->disp_order) {
+ return 0;
+ } else if (((const RefBufMapData *)a)->disp_order >
+ ((const RefBufMapData *)b)->disp_order) {
+ return 1;
+ } else {
+ return -1;
+ }
+}
+
+// Checks to see if a particular reference frame is already in the reference
+// frame map
+static int is_in_ref_map(RefBufMapData *map, int disp_order, int n_frames) {
+ for (int i = 0; i < n_frames; i++) {
+ if (disp_order == map[i].disp_order) return 1;
+ }
+ return 0;
+}
+
+// Add a reference buffer index to a named reference slot
+static void add_ref_to_slot(RefBufMapData *ref, int *const remapped_ref_idx,
+ int frame) {
+ remapped_ref_idx[frame - LAST_FRAME] = ref->map_idx;
+ ref->used = 1;
+}
+
+// Find which reference buffer should be left out of the named mapping.
+// This is because there are 8 reference buffers and only 7 named slots.
+static void set_unmapped_ref(RefBufMapData *buffer_map, int n_bufs,
+ int min_level, int cur_frame_disp) {
+ int max_dist = 0;
+ int unmapped_idx = -1;
+ if (n_bufs <= ALTREF_FRAME) return;
+ for (int i = 0; i < n_bufs; i++) {
+ if (buffer_map[i].used) continue;
+ if (buffer_map[i].pyr_level != min_level) {
+ int dist = abs(cur_frame_disp - buffer_map[i].disp_order);
+ if (dist > max_dist) {
+ max_dist = dist;
+ unmapped_idx = i;
+ }
+ }
+ }
+ assert(unmapped_idx >= 0 && "Unmapped reference not found");
+ buffer_map[unmapped_idx].used = 1;
+}
+
+static void get_ref_frames_subgop(
+ AV1_COMP *const cpi, RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]) {
+ AV1_COMMON *cm = &cpi->common;
+ GF_GROUP gf_group = cpi->gf_group;
+ int *const remapped_ref_idx = cm->remapped_ref_idx;
+
+ // The current display index stored has not yet been updated. We must add
+ // The order offset to get the correct value here.
+ const int order_offset = gf_group.arf_src_offset[gf_group.index];
+ const int cur_frame_disp =
+ cpi->common.current_frame.frame_number + order_offset;
+ int buf_map_idx = 0;
+
+ // Initialize reference frame mappings
+ for (int i = 0; i < REF_FRAMES; ++i) remapped_ref_idx[i] = INVALID_IDX;
+
+ RefBufMapData buffer_map[REF_FRAMES];
+ int n_bufs = 0;
+ memset(buffer_map, 0, REF_FRAMES * sizeof(buffer_map[0]));
+ int min_level = MAX_ARF_LAYERS;
+ int max_level = 0;
+
+ // Go through current reference buffers and store display order, pyr level,
+ // and map index.
+ for (int map_idx = 0; map_idx < REF_FRAMES; map_idx++) {
+ // Get reference frame buffer
+ RefFrameMapPair ref_pair = ref_frame_map_pairs[map_idx];
+ if (ref_pair.disp_order == -1) continue;
+ const int frame_order = ref_pair.disp_order;
+ // Avoid duplicates
+ if (is_in_ref_map(buffer_map, frame_order, n_bufs)) continue;
+ const int reference_frame_level = ref_pair.pyr_level;
+
+ // Keep track of the lowest and highest levels that currently exist
+ if (reference_frame_level < min_level) min_level = reference_frame_level;
+ if (reference_frame_level > max_level) max_level = reference_frame_level;
+
+ buffer_map[n_bufs].map_idx = map_idx;
+ buffer_map[n_bufs].disp_order = frame_order;
+ buffer_map[n_bufs].pyr_level = reference_frame_level;
+ buffer_map[n_bufs].used = 0;
+ n_bufs++;
+ }
+
+ // Sort frames in ascending display order
+ qsort(buffer_map, n_bufs, sizeof(buffer_map[0]), compare_map_idx_pair_asc);
+
+ int n_min_level_refs = 0;
+ int n_past_high_level = 0;
+ int closest_past_ref = -1;
+
+ // Find the GOLDEN_FRAME and BWDREF_FRAME.
+ // Also collect various stats about the reference frames for the remaining
+ // mappings
+ for (int i = n_bufs - 1; i >= 0; i--) {
+ if (buffer_map[i].pyr_level == min_level) {
+ // Keep track of the number of lowest level frames
+ n_min_level_refs++;
+ if (buffer_map[i].disp_order < cur_frame_disp &&
+ remapped_ref_idx[GOLDEN_FRAME - LAST_FRAME] == INVALID_IDX) {
+ // Map the GOLDEN_FRAME
+ add_ref_to_slot(&buffer_map[i], remapped_ref_idx, GOLDEN_FRAME);
+ }
+ } else if (buffer_map[i].disp_order == cur_frame_disp) {
+ // Map the BWDREF_FRAME if this is the show_existing_frame
+ add_ref_to_slot(&buffer_map[i], remapped_ref_idx, BWDREF_FRAME);
+ }
+
+ // Keep track of the number of past frames that are not at the lowest level
+ if (buffer_map[i].disp_order < cur_frame_disp &&
+ buffer_map[i].pyr_level != min_level)
+ n_past_high_level++;
+
+ // Keep track of where the frames change from being past frames to future
+ // frames
+ if (buffer_map[i].disp_order < cur_frame_disp && closest_past_ref < 0)
+ closest_past_ref = i;
+ }
+
+ // Find the buffer to be excluded from the mapping
+ set_unmapped_ref(buffer_map, n_bufs, min_level, cur_frame_disp);
+
+ // Map LAST3_FRAME
+ if (n_bufs >= ALTREF_FRAME) {
+ const int use_low_level_last3 =
+ n_past_high_level < 4 && n_bufs > ALTREF_FRAME;
+ for (int i = 0; i < n_bufs; i++) {
+ if (buffer_map[i].used) continue;
+ if ((buffer_map[i].pyr_level != min_level ||
+ (use_low_level_last3 && buffer_map[i].pyr_level == min_level))) {
+ add_ref_to_slot(&buffer_map[i], remapped_ref_idx, LAST3_FRAME);
+ break;
+ }
+ }
+ }
+
+ // Assign low level frames
+ buf_map_idx = n_bufs - 1;
+ for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; frame--) {
+ // Continue if the current ref slot is already full
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped low level reference buffer
+ for (; buf_map_idx >= 0; buf_map_idx--) {
+ if (buffer_map[buf_map_idx].pyr_level != min_level) continue;
+ if (!buffer_map[buf_map_idx].used) break;
+ }
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Place remaining past frames
+ buf_map_idx = closest_past_ref;
+ for (int frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
+ // Continue if the current ref slot is already full
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer
+ for (; buf_map_idx >= 0; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used) break;
+ }
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Place remaining future frames
+ buf_map_idx = n_bufs - 1;
+ for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; frame--) {
+ // Continue if the current ref slot is already full
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer
+ for (; buf_map_idx > closest_past_ref; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used) break;
+ }
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Fill any slots that are empty (should only happen for the first 7 frames)
+ for (int i = 0; i < REF_FRAMES; ++i)
+ if (remapped_ref_idx[i] == INVALID_IDX) remapped_ref_idx[i] = 0;
+}
+
+void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]) {
+ // TODO(sarahparker) Fix compatibility with tpl model
+ if (!cpi->oxcf.algo_cfg.enable_tpl_model &&
+ use_subgop_cfg(&cpi->gf_group, cpi->gf_group.index)) {
+ get_ref_frames_subgop(cpi, ref_frame_map_pairs);
+ return;
+ }
+
AV1_COMMON *cm = &cpi->common;
int *const remapped_ref_idx = cm->remapped_ref_idx;
int *const arf_stack = ref_buffer_stack->arf_stack;
@@ -1489,12 +1695,15 @@
frame_update_type, frame_params.frame_type,
force_refresh_all);
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
+ init_ref_map_pair(cpi, ref_frame_map_pairs);
+
if (!is_stat_generation_stage(cpi)) {
const RefCntBuffer *ref_frames[INTER_REFS_PER_FRAME];
const YV12_BUFFER_CONFIG *ref_frame_buf[INTER_REFS_PER_FRAME];
if (!ext_flags->refresh_frame.update_pending) {
- av1_get_ref_frames(cpi, &cpi->ref_buffer_stack);
+ av1_get_ref_frames(cpi, &cpi->ref_buffer_stack, ref_frame_map_pairs);
} else if (cpi->svc.external_ref_frame_config) {
for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++)
cm->remapped_ref_idx[i] = cpi->svc.ref_idx[i];
@@ -1531,9 +1740,6 @@
const int cur_frame_disp =
cpi->common.current_frame.frame_number + frame_params.order_offset;
- RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
- init_ref_map_pair(cpi, ref_frame_map_pairs);
-
frame_params.refresh_frame_flags = av1_get_refresh_frame_flags(
cpi, &frame_params, frame_update_type, cpi->gf_group.index,
cur_frame_disp, ref_frame_map_pairs, &cpi->ref_buffer_stack);
diff --git a/av1/encoder/encode_strategy.h b/av1/encoder/encode_strategy.h
index 59ee094..e236fc5 100644
--- a/av1/encoder/encode_strategy.h
+++ b/av1/encoder/encode_strategy.h
@@ -81,7 +81,8 @@
int show_existing_frame, int ref_map_index,
RefBufferStack *ref_buffer_stack);
-void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack);
+void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]);
int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
const int up_to_index,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 2e42c9d..b5e8287 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3176,7 +3176,9 @@
current_frame->order_hint =
current_frame->frame_number + frame_params->order_offset;
current_frame->display_order_hint = current_frame->order_hint;
- current_frame->pyramid_level = cpi->gf_group.layer_depth[cpi->gf_group.index];
+ current_frame->pyramid_level = get_true_pyr_level(
+ cpi->gf_group.layer_depth[cpi->gf_group.index],
+ current_frame->display_order_hint, cpi->gf_group.max_layer_depth);
current_frame->absolute_poc =
current_frame->key_frame_number + current_frame->display_order_hint;
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index bc88f74..f14da5d 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -118,6 +118,21 @@
FRAMEFLAGS_ERROR_RESILIENT = 1 << 6,
} UENUM1BYTE(FRAMETYPE_FLAGS);
+static INLINE int get_true_pyr_level(int frame_level, int frame_order,
+ int max_layer_depth) {
+ if (frame_order == 0) {
+ // Keyframe case
+ return 1;
+ } else if (frame_level == MAX_ARF_LAYERS) {
+ // Leaves
+ return max_layer_depth;
+ } else if (frame_level == (MAX_ARF_LAYERS + 1)) {
+ // Altrefs
+ return 1;
+ }
+ return frame_level;
+}
+
enum {
NO_AQ = 0,
VARIANCE_AQ = 1,
@@ -2812,21 +2827,6 @@
void av1_set_screen_content_options(const struct AV1_COMP *cpi,
FeatureFlags *features);
-static INLINE int get_true_pyr_level(int frame_level, int frame_order,
- int max_layer_depth) {
- if (frame_order == 0) {
- // Keyframe case
- return 1;
- } else if (frame_level == MAX_ARF_LAYERS) {
- // Leaves
- return max_layer_depth;
- } else if (frame_level == (MAX_ARF_LAYERS + 1)) {
- // Altrefs
- return 1;
- }
- return frame_level;
-}
-
typedef struct {
int pyr_level;
int disp_order;
@@ -2840,9 +2840,7 @@
const RefCntBuffer *const buf = cpi->common.ref_frame_map[map_idx];
if (buf == NULL) continue;
ref_frame_map_pairs[map_idx].disp_order = (int)buf->display_order_hint;
- const int reference_frame_level = get_true_pyr_level(
- buf->pyramid_level, ref_frame_map_pairs[map_idx].disp_order,
- cpi->gf_group.max_layer_depth);
+ const int reference_frame_level = buf->pyramid_level;
ref_frame_map_pairs[map_idx].pyr_level = reference_frame_level;
}
}
@@ -3301,6 +3299,7 @@
cpi->frame_component_time[component] +=
aom_usec_timer_elapsed(&cpi->component_timer[component]);
}
+
static INLINE char const *get_frame_type_enum(int type) {
switch (type) {
case 0: return "KEY_FRAME";
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 0a39855..e4e2732 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1010,7 +1010,7 @@
++process_frame_count;
}
- av1_get_ref_frames(cpi, &ref_buffer_stack);
+ av1_get_ref_frames(cpi, &ref_buffer_stack, ref_frame_map_pairs);
const int true_disp =
(int)(tpl_frame->frame_display_index) - frame_params.show_frame;
int refresh_mask = av1_get_refresh_frame_flags(
@@ -1082,7 +1082,7 @@
gf_group->update_type[gf_index] = LF_UPDATE;
gf_group->q_val[gf_index] = *pframe_qindex;
- av1_get_ref_frames(cpi, &ref_buffer_stack);
+ av1_get_ref_frames(cpi, &ref_buffer_stack, ref_frame_map_pairs);
// TODO(sarahparker) av1_get_refresh_frame_flags() and
// av1_update_ref_frame_map() will execute default behavior even when
// subgop cfg is enabled. This should be addressed if we ever remove the
@@ -1119,7 +1119,7 @@
++frame_display_index;
}
- av1_get_ref_frames(cpi, &cpi->ref_buffer_stack);
+ av1_get_ref_frames(cpi, &cpi->ref_buffer_stack, ref_frame_map_pairs);
}
static AOM_INLINE void init_tpl_stats(TplParams *const tpl_data) {