Move this_dqcoeff in search_tx_type to MACROBLOCK

Consume less stack memory by moving the 64 KiB local array this_dqcoeff
in search_tx_type() to the MACROBLOCK struct. Rename it dqcoeff_buf.

Bug: aomedia:2703
Change-Id: I520b759fb1ea23db356191e8e596c4e16fc204c9
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 0fe362d..395ab73 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1324,6 +1324,10 @@
   uint8_t color_sensitivity[MAX_MB_PLANE - 1];
   //! Coding block distortion value for uv/color, minimum over the inter modes.
   int64_t min_dist_inter_uv;
+
+  //! The buffer used by search_tx_type() to swap dqcoeff in macroblockd_plane
+  // so we can keep dqcoeff of the best tx_type.
+  tran_low_t *dqcoeff_buf;
   /**@}*/
 
   /*****************************************************************************
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index a195952..14c71b8 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -447,6 +447,9 @@
 
   aom_free(mb->winner_mode_stats);
   mb->winner_mode_stats = NULL;
+
+  aom_free(mb->dqcoeff_buf);
+  mb->dqcoeff_buf = NULL;
 }
 
 static AOM_INLINE void allocate_winner_mode_stats(const AV1_COMP *cpi,
@@ -491,6 +494,12 @@
                       16, 2 * MAX_SB_SQUARE * sizeof(mb->e_mbd.seg_mask[0])));
 
   allocate_winner_mode_stats(cpi, mb);
+
+  const int max_sb_square_y = 1
+                              << num_pels_log2_lookup[cm->seq_params->sb_size];
+  CHECK_MEM_ERROR(
+      cm, mb->dqcoeff_buf,
+      (tran_low_t *)aom_memalign(32, max_sb_square_y * sizeof(tran_low_t)));
 }
 
 // This function will compute the number of reference frames to be disabled
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 245cf3a..7292c01 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -2027,12 +2027,9 @@
   uint16_t best_eob = 0;
   TX_TYPE best_tx_type = DCT_DCT;
   int rate_cost = 0;
-  // The buffer used to swap dqcoeff in macroblockd_plane so we can keep dqcoeff
-  // of the best tx_type
-  DECLARE_ALIGNED(32, tran_low_t, this_dqcoeff[MAX_SB_SQUARE]);
   struct macroblock_plane *const p = &x->plane[plane];
   tran_low_t *orig_dqcoeff = p->dqcoeff;
-  tran_low_t *best_dqcoeff = this_dqcoeff;
+  tran_low_t *best_dqcoeff = x->dqcoeff_buf;
   const int tx_type_map_idx =
       plane ? 0 : blk_row * xd->tx_type_map_stride + blk_col;
   av1_invalid_rd_stats(best_rd_stats);