Move large buffers from stack to heap
This commit moves a number of large buffers from stack to heap to fix
crashes due to stack overflow.
Change-Id: I9d1592e4f6dbfa18a475d0fc5674f6d3632f39ed
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index a85d597..c4f79c3 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -641,8 +641,8 @@
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst,
- int dst_stride) {
- int32_t dat[RESTORATION_TILEPELS_MAX];
+ int dst_stride, int32_t *tmpbuf) {
+ int32_t *dat = tmpbuf;
int i, j, t;
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
@@ -664,7 +664,8 @@
static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
int width, int height, int stride,
RestorationInternal *rst,
- uint8_t *dst, int dst_stride) {
+ uint8_t *dst, int dst_stride,
+ int32_t *tmpbuf) {
const int tile_width = rst->tile_width >> rst->subsampling_x;
const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end;
@@ -680,17 +681,21 @@
av1_domaintxfmrf_restoration(
data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r,
- dst + h_start + v_start * dst_stride, dst_stride);
+ dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
}
static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height,
int stride, RestorationInternal *rst,
uint8_t *dst, int dst_stride) {
int tile_idx;
+ int32_t *tmpbuf =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
+
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
- dst, dst_stride);
+ dst, dst_stride, tmpbuf);
}
+ aom_free(tmpbuf);
}
static void loop_switchable_filter(uint8_t *data, int width, int height,
@@ -698,6 +703,8 @@
uint8_t *dst, int dst_stride) {
int tile_idx;
uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+ int32_t *tmpbuf32 =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
extend_frame(data, width, height, stride);
copy_border(data, width, height, stride, dst, dst_stride);
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
@@ -712,10 +719,11 @@
tmpbuf, dst, dst_stride);
} else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
- dst, dst_stride);
+ dst, dst_stride, tmpbuf32);
}
}
aom_free(tmpbuf);
+ aom_free(tmpbuf32);
}
#if CONFIG_AOM_HIGHBITDEPTH
@@ -955,8 +963,9 @@
void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
int stride, int param, int bit_depth,
- uint16_t *dst, int dst_stride) {
- int32_t dat[RESTORATION_TILEPELS_MAX];
+ uint16_t *dst, int dst_stride,
+ int32_t *tmpbuf) {
+ int32_t *dat = tmpbuf;
int i, j, t;
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
@@ -980,7 +989,8 @@
static void loop_domaintxfmrf_filter_tile_highbd(
uint16_t *data, int tile_idx, int width, int height, int stride,
- RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) {
+ RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride,
+ int32_t *tmpbuf) {
const int tile_width = rst->tile_width >> rst->subsampling_x;
const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end;
@@ -996,7 +1006,7 @@
av1_domaintxfmrf_restoration_highbd(
data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth,
- dst + h_start + v_start * dst_stride, dst_stride);
+ dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
}
static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
@@ -1005,12 +1015,16 @@
int bit_depth, uint8_t *dst8,
int dst_stride) {
int tile_idx;
+ int32_t *tmpbuf =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint16_t *data = CONVERT_TO_SHORTPTR(data8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride,
- rst, bit_depth, dst, dst_stride);
+ rst, bit_depth, dst, dst_stride,
+ tmpbuf);
}
+ aom_free(tmpbuf);
}
static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
@@ -1019,6 +1033,8 @@
int dst_stride) {
uint16_t *data = CONVERT_TO_SHORTPTR(data8);
uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+ int32_t *tmpbuf32 =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
int i, tile_idx;
copy_border_highbd(data, width, height, stride, dst, dst_stride);
@@ -1036,10 +1052,11 @@
} else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height,
stride, rst, bit_depth, dst,
- dst_stride);
+ dst_stride, tmpbuf32);
}
}
aom_free(tmpbuf);
+ aom_free(tmpbuf32);
}
#endif // CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 2c5f32f..5773c77 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -193,11 +193,12 @@
int bit_depth, int r, int eps, void *tmpbuf);
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst,
- int dst_stride);
+ int dst_stride, int32_t *tmpbuf);
#if CONFIG_AOM_HIGHBITDEPTH
void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
int stride, int param, int bit_depth,
- uint16_t *dst, int dst_stride);
+ uint16_t *dst, int dst_stride,
+ int32_t *tmpbuf);
#endif // CONFIG_AOM_HIGHBITDEPTH
void decode_xq(int *xqd, int *xq);
void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index ed4a849..3b25efa 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -178,11 +178,11 @@
int dat_stride, uint8_t *src8,
int src_stride, int bit_depth,
int *eps, int *xqd, void *tmpbuf) {
- int64_t *flt1 = (int64_t *)tmpbuf;
+ int64_t *srd = (int64_t *)tmpbuf;
+ int64_t *dgd = srd + RESTORATION_TILEPELS_MAX;
+ int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
- int64_t srd[RESTORATION_TILEPELS_MAX];
- int64_t dgd[RESTORATION_TILEPELS_MAX];
int i, j, ep, bestep = 0;
int64_t err, besterr = -1;
int exqd[2], bestxqd[2] = { 0, 0 };
@@ -249,7 +249,8 @@
RestorationInfo rsi;
int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end;
- uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+ uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE +
+ RESTORATION_TILEPELS_MAX * sizeof(int64_t) * 2);
const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
&tile_height, &nhtiles, &nvtiles);
// Make a copy of the unfiltered / processed recon buffer
@@ -370,12 +371,14 @@
int64_t best_sse = INT64_MAX, sse;
if (bit_depth == 8) {
uint8_t *tmp = (uint8_t *)aom_malloc(width * height * sizeof(*tmp));
+ int32_t *tmpbuf =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint8_t *dgd = dgd8;
uint8_t *src = src8;
// First phase
for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
- width);
+ width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse || best_p == -1) {
best_p = p;
@@ -388,7 +391,7 @@
p += second_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
- width);
+ width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse) {
best_p = p;
@@ -401,7 +404,7 @@
p += third_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
- width);
+ width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse) {
best_p = p;
@@ -412,12 +415,14 @@
} else {
#if CONFIG_AOM_HIGHBITDEPTH
uint16_t *tmp = (uint16_t *)aom_malloc(width * height * sizeof(*tmp));
+ int32_t *tmpbuf =
+ (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
// First phase
for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
- bit_depth, tmp, width);
+ bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse || best_p == -1) {
best_p = p;
@@ -430,7 +435,7 @@
p += second_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
- bit_depth, tmp, width);
+ bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse) {
best_p = p;
@@ -443,7 +448,7 @@
p += third_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
- bit_depth, tmp, width);
+ bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse) {
best_p = p;