Pull out reads from mi_params from loops in seg_map functions
This commit pulls out the remaining access to mi_params->mi_(row|col)s
from loops related to seg_map.
Performance:
| SPD_SET | TESTSET | AVG_PSNR | OVR_PSNR | SSIM | ENC_T |
|---------|------------|----------|----------|---------|-------|
| 5 | rtc | +0.000% | +0.000% | +0.000% | -0.0% |
| 5 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 5 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.0% |
| 5 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.1% |
|---------|------------|----------|----------|---------|-------|
| 6 | rtc | +0.000% | +0.000% | +0.000% | -0.0% |
| 6 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 6 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.0% |
| 6 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.2% |
|---------|------------|----------|----------|---------|-------|
| 7 | rtc | +0.000% | +0.000% | +0.000% | -0.1% |
| 7 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 7 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.1% |
| 7 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.2% |
|---------|------------|----------|----------|---------|-------|
| 8 | rtc | +0.000% | +0.000% | +0.000% | -0.1% |
| 8 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 8 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.1% |
| 8 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.2% |
|---------|------------|----------|----------|---------|-------|
| 9 | rtc | +0.000% | +0.000% | +0.000% | -0.1% |
| 9 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 9 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.1% |
| 9 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.3% |
|---------|------------|----------|----------|---------|-------|
| 10 | rtc | +0.000% | +0.000% | +0.000% | -0.1% |
| 10 | rtc_1080p | +0.000% | +0.000% | +0.000% | -0.1% |
| 10 | rtc_derf | +0.000% | +0.000% | +0.000% | -0.1% |
| 10 | rtc_screen | +0.000% | +0.000% | +0.000% | -0.3% |
Change-Id: I8c2e1ee97fabba03661f3799fdba56f2332f402e
diff --git a/av1/common/seg_common.h b/av1/common/seg_common.h
index 6ce408c..44b508b 100644
--- a/av1/common/seg_common.h
+++ b/av1/common/seg_common.h
@@ -96,6 +96,16 @@
return seg->feature_data[segment_id][feature_id];
}
+static AOM_INLINE void set_segment_id(uint8_t *segment_ids, int mi_offset,
+ int x_mis, int y_mis, int mi_stride,
+ uint8_t segment_id) {
+ segment_ids += mi_offset;
+ for (int y = 0; y < y_mis; ++y) {
+ memset(&segment_ids[y * mi_stride], segment_id,
+ x_mis * sizeof(segment_ids[0]));
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index f73a711..5f114f9 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -310,16 +310,6 @@
return segment_id;
}
-static void set_segment_id(AV1_COMMON *cm, int mi_offset, int x_mis, int y_mis,
- int segment_id) {
- assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
-
- for (int y = 0; y < y_mis; y++)
- for (int x = 0; x < x_mis; x++)
- cm->cur_frame->seg_map[mi_offset + y * cm->mi_params.mi_cols + x] =
- segment_id;
-}
-
static int read_intra_segment_id(AV1_COMMON *const cm,
const MACROBLOCKD *const xd, int bsize,
aom_reader *r, int skip) {
@@ -330,13 +320,15 @@
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
- const int mi_offset = mi_row * mi_params->mi_cols + mi_col;
+ const int mi_stride = cm->mi_params.mi_cols;
+ const int mi_offset = mi_row * mi_stride + mi_col;
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
const int x_mis = AOMMIN(mi_params->mi_cols - mi_col, bw);
const int y_mis = AOMMIN(mi_params->mi_rows - mi_row, bh);
const int segment_id = read_segment_id(cm, xd, r, skip);
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
+ set_segment_id(cm->cur_frame->seg_map, mi_offset, x_mis, y_mis, mi_stride,
+ segment_id);
return segment_id;
}
@@ -344,12 +336,20 @@
const uint8_t *last_segment_ids,
uint8_t *current_segment_ids, int mi_offset,
int x_mis, int y_mis) {
- for (int y = 0; y < y_mis; y++)
- for (int x = 0; x < x_mis; x++)
- current_segment_ids[mi_offset + y * mi_params->mi_cols + x] =
- last_segment_ids
- ? last_segment_ids[mi_offset + y * mi_params->mi_cols + x]
- : 0;
+ const int stride = mi_params->mi_cols;
+ if (last_segment_ids) {
+ assert(last_segment_ids != current_segment_ids);
+ for (int y = 0; y < y_mis; y++) {
+ memcpy(¤t_segment_ids[mi_offset + y * stride],
+ &last_segment_ids[mi_offset + y * stride],
+ sizeof(current_segment_ids[0]) * x_mis);
+ }
+ } else {
+ for (int y = 0; y < y_mis; y++) {
+ memset(¤t_segment_ids[mi_offset + y * stride], 0,
+ sizeof(current_segment_ids[0]) * x_mis);
+ }
+ }
}
static int get_predicted_segment_id(AV1_COMMON *const cm, int mi_offset,
@@ -382,7 +382,8 @@
return get_predicted_segment_id(cm, mi_offset, x_mis, y_mis);
}
- int segment_id;
+ uint8_t segment_id;
+ const int mi_stride = cm->mi_params.mi_cols;
if (preskip) {
if (!seg->segid_preskip) return 0;
} else {
@@ -391,7 +392,8 @@
mbmi->seg_id_predicted = 0;
}
segment_id = read_segment_id(cm, xd, r, 1);
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
+ set_segment_id(cm->cur_frame->seg_map, mi_offset, x_mis, y_mis, mi_stride,
+ segment_id);
return segment_id;
}
}
@@ -410,7 +412,8 @@
} else {
segment_id = read_segment_id(cm, xd, r, 0);
}
- set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
+ set_segment_id(cm->cur_frame->seg_map, mi_offset, x_mis, y_mis, mi_stride,
+ segment_id);
return segment_id;
}
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
index 9f3e910..4cf6bd5 100644
--- a/av1/encoder/aq_complexity.c
+++ b/av1/encoder/aq_complexity.c
@@ -136,48 +136,40 @@
const int mi_offset = mi_row * cm->mi_params.mi_cols + mi_col;
const int xmis = AOMMIN(cm->mi_params.mi_cols - mi_col, mi_size_wide[bs]);
const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, mi_size_high[bs]);
- int x, y;
int i;
unsigned char segment;
- if (0) {
- segment = DEFAULT_AQ2_SEG;
- } else {
- // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
- // It is converted to bits << AV1_PROB_COST_SHIFT units.
- const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis)
- << AV1_PROB_COST_SHIFT;
- const int denom = cm->seq_params->mib_size * cm->seq_params->mib_size;
- const int target_rate = (int)(num / denom);
- double logvar;
- double low_var_thresh;
- const int aq_strength = get_aq_c_strength(cm->quant_params.base_qindex,
- cm->seq_params->bit_depth);
+ // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
+ // It is converted to bits << AV1_PROB_COST_SHIFT units.
+ const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis)
+ << AV1_PROB_COST_SHIFT;
+ const int denom = cm->seq_params->mib_size * cm->seq_params->mib_size;
+ const int target_rate = (int)(num / denom);
+ double logvar;
+ double low_var_thresh;
+ const int aq_strength = get_aq_c_strength(cm->quant_params.base_qindex,
+ cm->seq_params->bit_depth);
- low_var_thresh = (is_stat_consumption_stage_twopass(cpi))
- ? AOMMAX(exp(cpi->twopass_frame.mb_av_energy),
- MIN_DEFAULT_LV_THRESH)
- : DEFAULT_LV_THRESH;
+ low_var_thresh =
+ (is_stat_consumption_stage_twopass(cpi))
+ ? AOMMAX(exp(cpi->twopass_frame.mb_av_energy), MIN_DEFAULT_LV_THRESH)
+ : DEFAULT_LV_THRESH;
- av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes, bs);
- logvar = av1_log_block_var(cpi, mb, bs);
+ av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes, bs);
+ logvar = av1_log_block_var(cpi, mb, bs);
- segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
- for (i = 0; i < AQ_C_SEGMENTS; ++i) {
- // Test rate against a threshold value and variance against a threshold.
- // Increasing segment number (higher variance and complexity) = higher Q.
- if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
- (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
- segment = i;
- break;
- }
+ segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
+ for (i = 0; i < AQ_C_SEGMENTS; ++i) {
+ // Test rate against a threshold value and variance against a threshold.
+ // Increasing segment number (higher variance and complexity) = higher Q.
+ if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
+ (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
+ segment = i;
+ break;
}
}
// Fill in the entires in the segment map corresponding to this SB64.
- for (y = 0; y < ymis; y++) {
- for (x = 0; x < xmis; x++) {
- cpi->enc_seg.map[mi_offset + y * cm->mi_params.mi_cols + x] = segment;
- }
- }
+ const int mi_stride = cm->mi_params.mi_cols;
+ set_segment_id(cpi->enc_seg.map, mi_offset, xmis, ymis, mi_stride, segment);
}
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 3f03969..41ccaab 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -166,11 +166,13 @@
av1_get_spatial_seg_pred(cm, xd, &cdf_num, cr->skip_over4x4);
if (prev_segment_id != mbmi->segment_id) {
const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
+ const int mi_stride = cm->mi_params.mi_cols;
+ const uint8_t segment_id = mbmi->segment_id;
for (int mi_y = 0; mi_y < ymis; mi_y++) {
- const int map_offset = block_index + mi_y * cm->mi_params.mi_cols;
+ const int map_offset = block_index + mi_y * mi_stride;
memset(&cr->map[map_offset], 0, xmis);
- memset(&cpi->enc_seg.map[map_offset], mbmi->segment_id, xmis);
- memset(&cm->cur_frame->seg_map[map_offset], mbmi->segment_id, xmis);
+ memset(&cpi->enc_seg.map[map_offset], segment_id, xmis);
+ memset(&cm->cur_frame->seg_map[map_offset], segment_id, xmis);
}
}
}
@@ -210,12 +212,13 @@
// Reset segment_id if will be skipped.
if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE;
}
+ const uint8_t segment_id = mbmi->segment_id;
// Update the cyclic refresh map, to be used for setting segmentation map
// for the next frame. If the block will be refreshed this frame, mark it
// as clean. The magnitude of the -ve influences how long before we consider
// it for refresh again.
- if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
+ if (cyclic_refresh_segment_id_boosted(segment_id)) {
new_map_value = -cr->time_for_refresh;
} else if (refresh_this_block) {
// Else if it is accepted as candidate for refresh, and has not already
@@ -229,19 +232,19 @@
// Update entries in the cyclic refresh map with new_map_value, and
// copy mbmi->segment_id into global segmentation map.
+ const int mi_stride = cm->mi_params.mi_cols;
for (int mi_y = 0; mi_y < ymis; mi_y += sh) {
- const int map_offset = block_index + mi_y * cm->mi_params.mi_cols;
+ const int map_offset = block_index + mi_y * mi_stride;
memset(&cr->map[map_offset], new_map_value, xmis);
- memset(&cpi->enc_seg.map[map_offset], mbmi->segment_id, xmis);
- memset(&cm->cur_frame->seg_map[map_offset], mbmi->segment_id, xmis);
+ memset(&cpi->enc_seg.map[map_offset], segment_id, xmis);
+ memset(&cm->cur_frame->seg_map[map_offset], segment_id, xmis);
}
// Accumulate cyclic refresh update counters.
if (!dry_run) {
- if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
+ if (cyclic_refresh_segment_id(segment_id) == CR_SEGMENT_ID_BOOST1)
x->actual_num_seg1_blocks += xmis * ymis;
- else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
- CR_SEGMENT_ID_BOOST2)
+ else if (cyclic_refresh_segment_id(segment_id) == CR_SEGMENT_ID_BOOST2)
x->actual_num_seg2_blocks += xmis * ymis;
}
}
@@ -294,15 +297,14 @@
uint64_t sb_sad = 0;
uint64_t thresh_sad_low = 0;
uint64_t thresh_sad = INT64_MAX;
- memset(seg_map, CR_SEGMENT_ID_BASE, mi_params->mi_rows * mi_params->mi_cols);
- sb_cols = (mi_params->mi_cols + cm->seq_params->mib_size - 1) /
- cm->seq_params->mib_size;
- sb_rows = (mi_params->mi_rows + cm->seq_params->mib_size - 1) /
- cm->seq_params->mib_size;
+ const int mi_rows = mi_params->mi_rows, mi_cols = mi_params->mi_cols;
+ const int mi_stride = mi_cols;
+ memset(seg_map, CR_SEGMENT_ID_BASE, mi_rows * mi_cols);
+ sb_cols = (mi_cols + cm->seq_params->mib_size - 1) / cm->seq_params->mib_size;
+ sb_rows = (mi_rows + cm->seq_params->mib_size - 1) / cm->seq_params->mib_size;
sbs_in_frame = sb_cols * sb_rows;
// Number of target blocks to get the q delta (segment 1).
- block_count =
- cr->percent_refresh * mi_params->mi_rows * mi_params->mi_cols / 100;
+ block_count = cr->percent_refresh * mi_rows * mi_cols / 100;
// Set the segmentation map: cycle through the superblocks, starting at
// cr->mb_index, and stopping when either block_count blocks have been found
// to be refreshed, or we have passed through whole frame.
@@ -317,12 +319,12 @@
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * cm->seq_params->mib_size;
int mi_col = sb_col_index * cm->seq_params->mib_size;
- assert(mi_row >= 0 && mi_row < mi_params->mi_rows);
- assert(mi_col >= 0 && mi_col < mi_params->mi_cols);
- bl_index = mi_row * mi_params->mi_cols + mi_col;
+ assert(mi_row >= 0 && mi_row < mi_rows);
+ assert(mi_col >= 0 && mi_col < mi_cols);
+ bl_index = mi_row * mi_stride + mi_col;
// Loop through all MI blocks in superblock and update map.
- xmis = AOMMIN(mi_params->mi_cols - mi_col, cm->seq_params->mib_size);
- ymis = AOMMIN(mi_params->mi_rows - mi_row, cm->seq_params->mib_size);
+ xmis = AOMMIN(mi_cols - mi_col, cm->seq_params->mib_size);
+ ymis = AOMMIN(mi_rows - mi_row, cm->seq_params->mib_size);
if (cr->use_block_sad_scene_det && cpi->rc.frames_since_key > 30 &&
cr->counter_encode_maxq_scene_change > 30 &&
cpi->src_sad_blk_64x64 != NULL &&
@@ -337,7 +339,7 @@
// cr_map only needed at 8x8 blocks.
for (y = 0; y < ymis; y += 2) {
for (x = 0; x < xmis; x += 2) {
- const int bl_index2 = bl_index + y * mi_params->mi_cols + x;
+ const int bl_index2 = bl_index + y * mi_stride + x;
// If the block is as a candidate for clean up then mark it
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than low motion.
@@ -353,10 +355,8 @@
// If segment is at least half of superblock, set to 1.
// Enforce that block sad (sb_sad) is not too high.
if (sum_map >= (xmis * ymis) >> 1 && sb_sad < thresh_sad) {
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- seg_map[bl_index + y * mi_params->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
- }
+ set_segment_id(seg_map, bl_index, xmis, ymis, mi_stride,
+ CR_SEGMENT_ID_BOOST1);
cr->target_num_seg_blocks += xmis * ymis;
}
i++;
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 2ce6057..4f85307 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -424,11 +424,7 @@
const int mi_stride = mi_params->mi_cols;
- segment_ids += mi_offset;
- for (int y = 0; y < ymis; ++y) {
- memset(&segment_ids[y * mi_stride], segment_id,
- xmis * sizeof(segment_ids[0]));
- }
+ set_segment_id(segment_ids, mi_offset, xmis, ymis, mi_stride, segment_id);
}
int av1_neg_interleave(int x, int ref, int max) {
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 0c69e4d..530ef59 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -432,9 +432,9 @@
if (cpi->active_map.update) {
if (cpi->active_map.enabled) {
- for (i = 0;
- i < cpi->common.mi_params.mi_rows * cpi->common.mi_params.mi_cols;
- ++i)
+ const int num_mis =
+ cpi->common.mi_params.mi_rows * cpi->common.mi_params.mi_cols;
+ for (i = 0; i < num_mis; ++i)
if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
av1_enable_segmentation(seg);
av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index dd91bb1..5b59289 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h
@@ -42,9 +42,10 @@
static AOM_INLINE void suppress_active_map(AV1_COMP *cpi) {
unsigned char *const seg_map = cpi->enc_seg.map;
int i;
+ const int num_mis =
+ cpi->common.mi_params.mi_rows * cpi->common.mi_params.mi_cols;
if (cpi->active_map.enabled || cpi->active_map.update)
- for (i = 0;
- i < cpi->common.mi_params.mi_rows * cpi->common.mi_params.mi_cols; ++i)
+ for (i = 0; i < num_mis; ++i)
if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
seg_map[i] = AM_SEGMENT_ID_ACTIVE;
}