rtc: Cleanup/fixes for aq-mode=3
Remove the last_coded_q, as this was not
currently being updated and hence not used.
This removes an allocation for 1 layer and
also for the multi-layer case.
Also skip looping over 4x4 blocks: the aq-mode=3
segment feature for RTC is only used for blocks >= 8X8,
so skipping over 4x4 block can gives a small speedup.
But there is a small bdrate loss (~0.2%), since for more
efficient coding of the seg map, the cur_frame->seg_map
needs to set at 4x4, along with the function
av1_cyclic_reset_segment_skip().
This was mentioned here:
https://aomedia-review.googlesource.com/c/aom/+/136503
Speedup is ~1.5% on vga, speed 10.
Change-Id: I15efe2909aab108678fa15f043fd1ab00f3eec8c
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
index 01afb99..452a66f 100644
--- a/av1/encoder/aq_cyclicrefresh.c
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -21,7 +21,6 @@
#include "aom_dsp/aom_dsp_common.h"
CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
- size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr));
if (cr == NULL) return NULL;
@@ -30,21 +29,12 @@
av1_cyclic_refresh_free(cr);
return NULL;
}
- last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
- cr->last_coded_q_map = aom_malloc(last_coded_q_map_size);
- if (cr->last_coded_q_map == NULL) {
- av1_cyclic_refresh_free(cr);
- return NULL;
- }
- assert(MAXQ <= 255);
- memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
return cr;
}
void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
if (cr != NULL) {
aom_free(cr->map);
- aom_free(cr->last_coded_q_map);
aom_free(cr);
}
}
@@ -155,6 +145,7 @@
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
+ int sh = cpi->cyclic_refresh->skip_over4x4 ? 2 : 1;
const int prev_segment_id = mbmi->segment_id;
mbmi->segment_id = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
if (prev_segment_id != mbmi->segment_id) {
@@ -164,8 +155,8 @@
const int xmis = AOMMIN(cm->mi_params.mi_cols - mi_col, bw);
const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
- for (int mi_y = 0; mi_y < ymis; mi_y++) {
- for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ for (int mi_y = 0; mi_y < ymis; mi_y += sh) {
+ for (int mi_x = 0; mi_x < xmis; mi_x += sh) {
const int map_offset =
block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = 0;
@@ -200,6 +191,7 @@
const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
const int refresh_this_block =
candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
+ int sh = cpi->cyclic_refresh->skip_over4x4 ? 2 : 1;
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
@@ -229,8 +221,8 @@
// Update entries in the cyclic refresh map with new_map_value, and
// copy mbmi->segment_id into global segmentation map.
- for (int mi_y = 0; mi_y < ymis; mi_y++) {
- for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ for (int mi_y = 0; mi_y < ymis; mi_y += sh) {
+ for (int mi_x = 0; mi_x < xmis; mi_x += sh) {
const int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = new_map_value;
cpi->enc_seg.map[map_offset] = mbmi->segment_id;
@@ -342,13 +334,6 @@
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * cm->seq_params->mib_size;
int mi_col = sb_col_index * cm->seq_params->mib_size;
- // TODO(any): Ensure the population of
- // cpi->common.features.allow_screen_content_tools and use the same instead
- // of cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
- int qindex_thresh = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
- ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2,
- cm->quant_params.base_qindex)
- : 0;
assert(mi_row >= 0 && mi_row < mi_params->mi_rows);
assert(mi_col >= 0 && mi_col < mi_params->mi_cols);
bl_index = mi_row * mi_params->mi_cols + mi_col;
@@ -363,7 +348,7 @@
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than GLOBALMV.
if (cr->map[bl_index2] == 0) {
- if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map += 4;
+ sum_map += 4;
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@@ -399,6 +384,15 @@
double weight_segment = 0;
int qp_thresh = AOMMIN(20, rc->best_quality << 1);
int qp_max_thresh = 118 * MAXQ >> 7;
+ // Although this segment feature for RTC is only used for
+ // blocks >= 8X8, for more efficient coding of the seg map
+ // cur_frame->seg_map needs to set at 4x4 along with the
+ // function av1_cyclic_reset_segment_skip(). Skipping over
+ // 4x4 will therefore have small bdrate loss (~0.2%), so
+ // we use it only for speed > 9 for now.
+ // Also if loop-filter deltas is applied via segment, then
+ // we need to set cr->skip_over4x4 = 1.
+ cr->skip_over4x4 = (cpi->oxcf.speed > 9) ? 1 : 0;
cr->apply_cyclic_refresh = 1;
if (frame_is_intra_only(cm) || is_lossless_requested(&cpi->oxcf.rc_cfg) ||
cpi->svc.temporal_layer_id > 0 ||
@@ -481,9 +475,6 @@
memset(seg_map, 0, cm->mi_params.mi_rows * cm->mi_params.mi_cols);
av1_disable_segmentation(&cm->seg);
if (cm->current_frame.frame_type == KEY_FRAME) {
- memset(cr->last_coded_q_map, MAXQ,
- cm->mi_params.mi_rows * cm->mi_params.mi_cols *
- sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
}
return;
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
index 1c0d5cb..4e4e1f2 100644
--- a/av1/encoder/aq_cyclicrefresh.h
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -80,10 +80,6 @@
*/
int8_t *map;
/*!
- * Map of the last q a block was coded at.
- */
- uint8_t *last_coded_q_map;
- /*!
* Threshold applied to the projected rate of the coding block,
* when deciding whether block should be refreshed.
*/
@@ -111,6 +107,7 @@
int qindex_delta[3];
double weight_segment;
int apply_cyclic_refresh;
+ int skip_over4x4;
/*!\endcond */
};
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 8c5fa63..1bc6b32 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -2083,7 +2083,8 @@
}
if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
}
- if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm)
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm &&
+ !cpi->cyclic_refresh->skip_over4x4)
av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize);
// TODO(Ravi/Remya): Move this copy function to a better logical place
// This function will copy the best mode information from block
diff --git a/av1/encoder/svc_layercontext.c b/av1/encoder/svc_layercontext.c
index 67b3082..4e48218 100644
--- a/av1/encoder/svc_layercontext.c
+++ b/av1/encoder/svc_layercontext.c
@@ -62,7 +62,6 @@
// (i.e., ss_number_layers > 1), these need to be updated per spatial
// layer. Cyclic refresh is only applied on base temporal layer.
if (svc->number_spatial_layers > 1 && tl == 0) {
- size_t last_coded_q_map_size;
lc->sb_index = 0;
lc->actual_num_seg1_blocks = 0;
lc->actual_num_seg2_blocks = 0;
@@ -71,13 +70,6 @@
CHECK_MEM_ERROR(cm, lc->map,
aom_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
- last_coded_q_map_size =
- mi_rows * mi_cols * sizeof(*lc->last_coded_q_map);
- if (lc->last_coded_q_map) aom_free(lc->last_coded_q_map);
- CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
- aom_malloc(last_coded_q_map_size));
- assert(MAXQ <= 255);
- memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
}
}
svc->downsample_filter_type[sl] = BILINEAR;
@@ -195,7 +187,6 @@
svc->number_spatial_layers > 1 && svc->temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
swap_ptr(&cr->map, &lc->map);
- swap_ptr(&cr->last_coded_q_map, &lc->last_coded_q_map);
cr->sb_index = lc->sb_index;
cr->actual_num_seg1_blocks = lc->actual_num_seg1_blocks;
cr->actual_num_seg2_blocks = lc->actual_num_seg2_blocks;
@@ -234,11 +225,8 @@
cpi->svc.number_spatial_layers > 1 && svc->temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
signed char *temp = lc->map;
- uint8_t *temp2 = lc->last_coded_q_map;
lc->map = cr->map;
cr->map = temp;
- lc->last_coded_q_map = cr->last_coded_q_map;
- cr->last_coded_q_map = temp2;
lc->sb_index = cr->sb_index;
lc->actual_num_seg1_blocks = cr->actual_num_seg1_blocks;
lc->actual_num_seg2_blocks = cr->actual_num_seg2_blocks;
@@ -301,7 +289,6 @@
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
if (lc->map) aom_free(lc->map);
- if (lc->last_coded_q_map) aom_free(lc->last_coded_q_map);
}
}
}
diff --git a/av1/encoder/svc_layercontext.h b/av1/encoder/svc_layercontext.h
index a4fe654..310d08a 100644
--- a/av1/encoder/svc_layercontext.h
+++ b/av1/encoder/svc_layercontext.h
@@ -49,11 +49,6 @@
*/
int8_t *map;
/*!
- * Segmentation map for last coded quantization paramters.
- */
- uint8_t *last_coded_q_map;
-
- /*!
* Number of blocks on segment 1
*/
int actual_num_seg1_blocks;
diff --git a/test/rt_end_to_end_test.cc b/test/rt_end_to_end_test.cc
index bf8443c..5e360f2 100644
--- a/test/rt_end_to_end_test.cc
+++ b/test/rt_end_to_end_test.cc
@@ -37,21 +37,21 @@
{ 7, { { 0, 34.9 }, { 3, 35.8 } } },
{ 8, { { 0, 35.0 }, { 3, 35.8 } } },
{ 9, { { 0, 34.9 }, { 3, 35.5 } } },
- { 10, { { 0, 34.8 }, { 3, 35.4 } } } } },
+ { 10, { { 0, 34.7 }, { 3, 35.3 } } } } },
{ "paris_352_288_30.y4m",
{ { 5, { { 0, 36.2 }, { 3, 36.7 } } },
{ 6, { { 0, 36.1 }, { 3, 36.5 } } },
{ 7, { { 0, 35.5 }, { 3, 36.0 } } },
{ 8, { { 0, 36.0 }, { 3, 36.5 } } },
{ 9, { { 0, 35.5 }, { 3, 36.0 } } },
- { 10, { { 0, 35.4 }, { 3, 36.0 } } } } },
+ { 10, { { 0, 35.3 }, { 3, 35.9 } } } } },
{ "niklas_1280_720_30.y4m",
{ { 5, { { 0, 34.4 }, { 3, 34.30 } } },
{ 6, { { 0, 34.2 }, { 3, 34.2 } } },
{ 7, { { 0, 33.6 }, { 3, 33.6 } } },
{ 8, { { 0, 33.48 }, { 3, 33.48 } } },
{ 9, { { 0, 33.4 }, { 3, 33.4 } } },
- { 10, { { 0, 33.3 }, { 3, 33.3 } } } } } };
+ { 10, { { 0, 33.2 }, { 3, 33.2 } } } } } };
typedef struct {
const char *filename;