Move large buffers from stack to heap This commit moves a number of large buffers from stack to heap to fix crashes due to stack overflow. Change-Id: I9d1592e4f6dbfa18a475d0fc5674f6d3632f39ed
diff --git a/av1/common/restoration.c b/av1/common/restoration.c index a85d597..c4f79c3 100644 --- a/av1/common/restoration.c +++ b/av1/common/restoration.c
@@ -641,8 +641,8 @@ void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, int stride, int param, uint8_t *dst, - int dst_stride) { - int32_t dat[RESTORATION_TILEPELS_MAX]; + int dst_stride, int32_t *tmpbuf) { + int32_t *dat = tmpbuf; int i, j, t; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { @@ -664,7 +664,8 @@ static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx, int width, int height, int stride, RestorationInternal *rst, - uint8_t *dst, int dst_stride) { + uint8_t *dst, int dst_stride, + int32_t *tmpbuf) { const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_height = rst->tile_height >> rst->subsampling_y; int h_start, h_end, v_start, v_end; @@ -680,17 +681,21 @@ av1_domaintxfmrf_restoration( data + h_start + v_start * stride, h_end - h_start, v_end - v_start, stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, - dst + h_start + v_start * dst_stride, dst_stride); + dst + h_start + v_start * dst_stride, dst_stride, tmpbuf); } static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height, int stride, RestorationInternal *rst, uint8_t *dst, int dst_stride) { int tile_idx; + int32_t *tmpbuf = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf)); + for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst, - dst, dst_stride); + dst, dst_stride, tmpbuf); } + aom_free(tmpbuf); } static void loop_switchable_filter(uint8_t *data, int width, int height, @@ -698,6 +703,8 @@ uint8_t *dst, int dst_stride) { int tile_idx; uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); + int32_t *tmpbuf32 = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32)); extend_frame(data, width, height, stride); copy_border(data, width, height, stride, dst, dst_stride); for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { @@ -712,10 +719,11 @@ tmpbuf, dst, dst_stride); } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) { loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst, - dst, dst_stride); + dst, dst_stride, tmpbuf32); } } aom_free(tmpbuf); + aom_free(tmpbuf32); } #if CONFIG_AOM_HIGHBITDEPTH @@ -955,8 +963,9 @@ void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height, int stride, int param, int bit_depth, - uint16_t *dst, int dst_stride) { - int32_t dat[RESTORATION_TILEPELS_MAX]; + uint16_t *dst, int dst_stride, + int32_t *tmpbuf) { + int32_t *dat = tmpbuf; int i, j, t; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { @@ -980,7 +989,8 @@ static void loop_domaintxfmrf_filter_tile_highbd( uint16_t *data, int tile_idx, int width, int height, int stride, - RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) { + RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride, + int32_t *tmpbuf) { const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_height = rst->tile_height >> rst->subsampling_y; int h_start, h_end, v_start, v_end; @@ -996,7 +1006,7 @@ av1_domaintxfmrf_restoration_highbd( data + h_start + v_start * stride, h_end - h_start, v_end - v_start, stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth, - dst + h_start + v_start * dst_stride, dst_stride); + dst + h_start + v_start * dst_stride, dst_stride, tmpbuf); } static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width, @@ -1005,12 +1015,16 @@ int bit_depth, uint8_t *dst8, int dst_stride) { int tile_idx; + int32_t *tmpbuf = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf)); uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride, - rst, bit_depth, dst, dst_stride); + rst, bit_depth, dst, dst_stride, + tmpbuf); } + aom_free(tmpbuf); } static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height, @@ -1019,6 +1033,8 @@ int dst_stride) { uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); + int32_t *tmpbuf32 = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32)); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); int i, tile_idx; copy_border_highbd(data, width, height, stride, dst, dst_stride); @@ -1036,10 +1052,11 @@ } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) { loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride, rst, bit_depth, dst, - dst_stride); + dst_stride, tmpbuf32); } } aom_free(tmpbuf); + aom_free(tmpbuf32); } #endif // CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/restoration.h b/av1/common/restoration.h index 2c5f32f..5773c77 100644 --- a/av1/common/restoration.h +++ b/av1/common/restoration.h
@@ -193,11 +193,12 @@ int bit_depth, int r, int eps, void *tmpbuf); void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, int stride, int param, uint8_t *dst, - int dst_stride); + int dst_stride, int32_t *tmpbuf); #if CONFIG_AOM_HIGHBITDEPTH void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height, int stride, int param, int bit_depth, - uint16_t *dst, int dst_stride); + uint16_t *dst, int dst_stride, + int32_t *tmpbuf); #endif // CONFIG_AOM_HIGHBITDEPTH void decode_xq(int *xqd, int *xq); void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index ed4a849..3b25efa 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c
@@ -178,11 +178,11 @@ int dat_stride, uint8_t *src8, int src_stride, int bit_depth, int *eps, int *xqd, void *tmpbuf) { - int64_t *flt1 = (int64_t *)tmpbuf; + int64_t *srd = (int64_t *)tmpbuf; + int64_t *dgd = srd + RESTORATION_TILEPELS_MAX; + int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX; int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); - int64_t srd[RESTORATION_TILEPELS_MAX]; - int64_t dgd[RESTORATION_TILEPELS_MAX]; int i, j, ep, bestep = 0; int64_t err, besterr = -1; int exqd[2], bestxqd[2] = { 0, 0 }; @@ -249,7 +249,8 @@ RestorationInfo rsi; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; - uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); + uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE + + RESTORATION_TILEPELS_MAX * sizeof(int64_t) * 2); const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width, &tile_height, &nhtiles, &nvtiles); // Make a copy of the unfiltered / processed recon buffer @@ -370,12 +371,14 @@ int64_t best_sse = INT64_MAX, sse; if (bit_depth == 8) { uint8_t *tmp = (uint8_t *)aom_malloc(width * height * sizeof(*tmp)); + int32_t *tmpbuf = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf)); uint8_t *dgd = dgd8; uint8_t *src = src8; // First phase for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) { av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, - width); + width, tmpbuf); sse = compute_sse(tmp, width, height, width, src, src_stride); if (sse < best_sse || best_p == -1) { best_p = p; @@ -388,7 +391,7 @@ p += second_p_step) { if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, - width); + width, tmpbuf); sse = compute_sse(tmp, width, height, width, src, src_stride); if (sse < best_sse) { best_p = p; @@ -401,7 +404,7 @@ p += third_p_step) { if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, - width); + width, tmpbuf); sse = compute_sse(tmp, width, height, width, src, src_stride); if (sse < best_sse) { best_p = p; @@ -412,12 +415,14 @@ } else { #if CONFIG_AOM_HIGHBITDEPTH uint16_t *tmp = (uint16_t *)aom_malloc(width * height * sizeof(*tmp)); + int32_t *tmpbuf = + (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf)); uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); uint16_t *src = CONVERT_TO_SHORTPTR(src8); // First phase for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) { av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, - bit_depth, tmp, width); + bit_depth, tmp, width, tmpbuf); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); if (sse < best_sse || best_p == -1) { best_p = p; @@ -430,7 +435,7 @@ p += second_p_step) { if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, - bit_depth, tmp, width); + bit_depth, tmp, width, tmpbuf); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); if (sse < best_sse) { best_p = p; @@ -443,7 +448,7 @@ p += third_p_step) { if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, - bit_depth, tmp, width); + bit_depth, tmp, width, tmpbuf); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); if (sse < best_sse) { best_p = p;