Fix bustage caused by 8089315a6c2b with daala_tx.

The inverse transform API was changed to pass in an unpadded 32x32
block of coefficients for transforms larger than 32x32, but the
code path actually used for daala_tx was not modified to pad it out
to the full size like the others were.

Change-Id: Ibda5d20a9d839ba41f8a1a0308c414111219da92
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index eb5dbb8..3645f61 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -1430,7 +1430,7 @@
 }
 
 static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
-#if CONFIG_TX64X64 && !CONFIG_DAALA_TX
+#if CONFIG_TX64X64
   if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
     return 1024;
   }
diff --git a/av1/common/daala_inv_txfm.c b/av1/common/daala_inv_txfm.c
index 6b75cf6..04092e0 100644
--- a/av1/common/daala_inv_txfm.c
+++ b/av1/common/daala_inv_txfm.c
@@ -87,12 +87,42 @@
     int col_flip = tx_flip(vtx_tab[tx_type]);
     int row_flip = tx_flip(htx_tab[tx_type]);
     od_coeff tmpsq[MAX_TX_SQUARE];
+#if CONFIG_TX64X64
+    tran_low_t pad_input[MAX_TX_SQUARE];
+#endif
     int r;
     int c;
 
     assert(col_tx);
     assert(row_tx);
 
+#if CONFIG_TX64X64
+    if (rows > 32 || cols > 32) {
+      int avail_rows;
+      int avail_cols;
+      // TODO(urvang): Can the same array be reused, instead of using a new
+      // array?
+      // Remap 32x32 input into a modified input by:
+      // - Copying over these values in top-left 32x32 locations.
+      // - Setting the rest of the locations to 0.
+      avail_rows = AOMMIN(rows, 32);
+      avail_cols = AOMMIN(cols, 32);
+      for (r = 0; r < avail_rows; r++) {
+        memcpy(pad_input + r * cols, input_coeffs + r * avail_cols,
+               avail_cols * sizeof(*pad_input));
+        if (cols > avail_cols) {
+          memset(pad_input + r * cols + avail_cols, 0,
+                 (cols - avail_cols) * sizeof(*pad_input));
+        }
+      }
+      if (rows > avail_rows) {
+        memset(pad_input + avail_rows * cols, 0,
+               (rows - avail_rows) * cols * sizeof(*pad_input));
+      }
+      input_coeffs = pad_input;
+    }
+#endif
+
     // Inverse-transform rows
     for (r = 0; r < rows; ++r) {
       // The output addressing transposes