Move large buffers from stack to heap

This commit moves a number of large buffers from stack to heap to fix
crashes due to stack overflow.

Change-Id: I9d1592e4f6dbfa18a475d0fc5674f6d3632f39ed
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index a85d597..c4f79c3 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -641,8 +641,8 @@
 
 void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
                                   int stride, int param, uint8_t *dst,
-                                  int dst_stride) {
-  int32_t dat[RESTORATION_TILEPELS_MAX];
+                                  int dst_stride, int32_t *tmpbuf) {
+  int32_t *dat = tmpbuf;
   int i, j, t;
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
@@ -664,7 +664,8 @@
 static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
                                           int width, int height, int stride,
                                           RestorationInternal *rst,
-                                          uint8_t *dst, int dst_stride) {
+                                          uint8_t *dst, int dst_stride,
+                                          int32_t *tmpbuf) {
   const int tile_width = rst->tile_width >> rst->subsampling_x;
   const int tile_height = rst->tile_height >> rst->subsampling_y;
   int h_start, h_end, v_start, v_end;
@@ -680,17 +681,21 @@
   av1_domaintxfmrf_restoration(
       data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
       stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r,
-      dst + h_start + v_start * dst_stride, dst_stride);
+      dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
 }
 
 static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height,
                                      int stride, RestorationInternal *rst,
                                      uint8_t *dst, int dst_stride) {
   int tile_idx;
+  int32_t *tmpbuf =
+      (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
+
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
-                                  dst, dst_stride);
+                                  dst, dst_stride, tmpbuf);
   }
+  aom_free(tmpbuf);
 }
 
 static void loop_switchable_filter(uint8_t *data, int width, int height,
@@ -698,6 +703,8 @@
                                    uint8_t *dst, int dst_stride) {
   int tile_idx;
   uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+  int32_t *tmpbuf32 =
+      (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
   extend_frame(data, width, height, stride);
   copy_border(data, width, height, stride, dst, dst_stride);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
@@ -712,10 +719,11 @@
                                tmpbuf, dst, dst_stride);
     } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
       loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
-                                    dst, dst_stride);
+                                    dst, dst_stride, tmpbuf32);
     }
   }
   aom_free(tmpbuf);
+  aom_free(tmpbuf32);
 }
 
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -955,8 +963,9 @@
 
 void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
                                          int stride, int param, int bit_depth,
-                                         uint16_t *dst, int dst_stride) {
-  int32_t dat[RESTORATION_TILEPELS_MAX];
+                                         uint16_t *dst, int dst_stride,
+                                         int32_t *tmpbuf) {
+  int32_t *dat = tmpbuf;
   int i, j, t;
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
@@ -980,7 +989,8 @@
 
 static void loop_domaintxfmrf_filter_tile_highbd(
     uint16_t *data, int tile_idx, int width, int height, int stride,
-    RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) {
+    RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride,
+    int32_t *tmpbuf) {
   const int tile_width = rst->tile_width >> rst->subsampling_x;
   const int tile_height = rst->tile_height >> rst->subsampling_y;
   int h_start, h_end, v_start, v_end;
@@ -996,7 +1006,7 @@
   av1_domaintxfmrf_restoration_highbd(
       data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
       stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth,
-      dst + h_start + v_start * dst_stride, dst_stride);
+      dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
 }
 
 static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
@@ -1005,12 +1015,16 @@
                                             int bit_depth, uint8_t *dst8,
                                             int dst_stride) {
   int tile_idx;
+  int32_t *tmpbuf =
+      (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
     loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride,
-                                         rst, bit_depth, dst, dst_stride);
+                                         rst, bit_depth, dst, dst_stride,
+                                         tmpbuf);
   }
+  aom_free(tmpbuf);
 }
 
 static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
@@ -1019,6 +1033,8 @@
                                           int dst_stride) {
   uint16_t *data = CONVERT_TO_SHORTPTR(data8);
   uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+  int32_t *tmpbuf32 =
+      (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
   int i, tile_idx;
   copy_border_highbd(data, width, height, stride, dst, dst_stride);
@@ -1036,10 +1052,11 @@
     } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
       loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height,
                                            stride, rst, bit_depth, dst,
-                                           dst_stride);
+                                           dst_stride, tmpbuf32);
     }
   }
   aom_free(tmpbuf);
+  aom_free(tmpbuf32);
 }
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 2c5f32f..5773c77 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -193,11 +193,12 @@
                                 int bit_depth, int r, int eps, void *tmpbuf);
 void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
                                   int stride, int param, uint8_t *dst,
-                                  int dst_stride);
+                                  int dst_stride, int32_t *tmpbuf);
 #if CONFIG_AOM_HIGHBITDEPTH
 void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
                                          int stride, int param, int bit_depth,
-                                         uint16_t *dst, int dst_stride);
+                                         uint16_t *dst, int dst_stride,
+                                         int32_t *tmpbuf);
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 void decode_xq(int *xqd, int *xq);
 void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index ed4a849..3b25efa 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -178,11 +178,11 @@
                                           int dat_stride, uint8_t *src8,
                                           int src_stride, int bit_depth,
                                           int *eps, int *xqd, void *tmpbuf) {
-  int64_t *flt1 = (int64_t *)tmpbuf;
+  int64_t *srd = (int64_t *)tmpbuf;
+  int64_t *dgd = srd + RESTORATION_TILEPELS_MAX;
+  int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
   int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
   uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
-  int64_t srd[RESTORATION_TILEPELS_MAX];
-  int64_t dgd[RESTORATION_TILEPELS_MAX];
   int i, j, ep, bestep = 0;
   int64_t err, besterr = -1;
   int exqd[2], bestxqd[2] = { 0, 0 };
@@ -249,7 +249,8 @@
   RestorationInfo rsi;
   int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
   int h_start, h_end, v_start, v_end;
-  uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
+  uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE +
+                               RESTORATION_TILEPELS_MAX * sizeof(int64_t) * 2);
   const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
                                          &tile_height, &nhtiles, &nvtiles);
   //  Make a copy of the unfiltered / processed recon buffer
@@ -370,12 +371,14 @@
   int64_t best_sse = INT64_MAX, sse;
   if (bit_depth == 8) {
     uint8_t *tmp = (uint8_t *)aom_malloc(width * height * sizeof(*tmp));
+    int32_t *tmpbuf =
+        (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
     uint8_t *dgd = dgd8;
     uint8_t *src = src8;
     // First phase
     for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
       av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
-                                   width);
+                                   width, tmpbuf);
       sse = compute_sse(tmp, width, height, width, src, src_stride);
       if (sse < best_sse || best_p == -1) {
         best_p = p;
@@ -388,7 +391,7 @@
          p += second_p_step) {
       if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
       av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
-                                   width);
+                                   width, tmpbuf);
       sse = compute_sse(tmp, width, height, width, src, src_stride);
       if (sse < best_sse) {
         best_p = p;
@@ -401,7 +404,7 @@
          p += third_p_step) {
       if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
       av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
-                                   width);
+                                   width, tmpbuf);
       sse = compute_sse(tmp, width, height, width, src, src_stride);
       if (sse < best_sse) {
         best_p = p;
@@ -412,12 +415,14 @@
   } else {
 #if CONFIG_AOM_HIGHBITDEPTH
     uint16_t *tmp = (uint16_t *)aom_malloc(width * height * sizeof(*tmp));
+    int32_t *tmpbuf =
+        (int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
     uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
     uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     // First phase
     for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
       av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
-                                          bit_depth, tmp, width);
+                                          bit_depth, tmp, width, tmpbuf);
       sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
       if (sse < best_sse || best_p == -1) {
         best_p = p;
@@ -430,7 +435,7 @@
          p += second_p_step) {
       if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
       av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
-                                          bit_depth, tmp, width);
+                                          bit_depth, tmp, width, tmpbuf);
       sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
       if (sse < best_sse) {
         best_p = p;
@@ -443,7 +448,7 @@
          p += third_p_step) {
       if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
       av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
-                                          bit_depth, tmp, width);
+                                          bit_depth, tmp, width, tmpbuf);
       sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
       if (sse < best_sse) {
         best_p = p;