ext-tile: output decoded tiles in output frame

As defined in the Spec, the decoded tiles in a tile list are stored in
the output frame buffer.

The variable outputW is defined as
  ( 1 + output_frame_width_in_tiles_minus_1 ) * TileWidth.
The variable outputH is defined as
  ( 1 + output_frame_height_in_tiles_minus_1 ) * TileHeight.
The operation of writing a decoded tile into the output frame in raster
order.

This patch output the decoded tiles as defined above.

BUG=aomedia:2047

Change-Id: I4eabfe406575487555b507a9e334b40840a1fcd1
diff --git a/aom/src/aom_image.c b/aom/src/aom_image.c
index 521eade..6504cdd 100644
--- a/aom/src/aom_image.c
+++ b/aom/src/aom_image.c
@@ -120,6 +120,7 @@
 
     img->img_data = (uint8_t *)aom_memalign(buf_align, (size_t)alloc_size);
     img->img_data_owner = 1;
+    img->sz = (size_t)alloc_size;
   }
 
   if (!img->img_data) goto fail;
diff --git a/aom_scale/aom_scale_rtcd.pl b/aom_scale/aom_scale_rtcd.pl
index c5990b1..27378c7e 100644
--- a/aom_scale/aom_scale_rtcd.pl
+++ b/aom_scale/aom_scale_rtcd.pl
@@ -36,11 +36,12 @@
 
 add_proto qw/void aom_yv12_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc";
 
-add_proto qw/void aom_yv12_partial_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend";
-
-add_proto qw/void aom_yv12_partial_copy_u/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
-
-add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
+add_proto qw/void aom_yv12_partial_copy_y/, "const struct yv12_buffer_config *src_ybc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_ybc, int hstart2, int vstart2";
+add_proto qw/void aom_yv12_partial_coloc_copy_y/, "const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc, int hstart, int hend, int vstart, int vend";
+add_proto qw/void aom_yv12_partial_copy_u/, "const struct yv12_buffer_config *src_bc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_bc, int hstart2, int vstart2";
+add_proto qw/void aom_yv12_partial_coloc_copy_u/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
+add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_bc, int hstart2, int vstart2";
+add_proto qw/void aom_yv12_partial_coloc_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
 
 add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
 specialize qw/aom_extend_frame_borders dspr2/;
diff --git a/aom_scale/generic/yv12extend.c b/aom_scale/generic/yv12extend.c
index ba18352..127ca23 100644
--- a/aom_scale/generic/yv12extend.c
+++ b/aom_scale/generic/yv12extend.c
@@ -321,91 +321,116 @@
   }
 }
 
-void aom_yv12_partial_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc,
-                               YV12_BUFFER_CONFIG *dst_ybc, int hstart,
-                               int hend, int vstart, int vend) {
+void aom_yv12_partial_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, int hstart1,
+                               int hend1, int vstart1, int vend1,
+                               YV12_BUFFER_CONFIG *dst_ybc, int hstart2,
+                               int vstart2) {
   int row;
   const uint8_t *src = src_ybc->y_buffer;
   uint8_t *dst = dst_ybc->y_buffer;
 
   if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) {
     const uint16_t *src16 =
-        CONVERT_TO_SHORTPTR(src + vstart * src_ybc->y_stride + hstart);
+        CONVERT_TO_SHORTPTR(src + vstart1 * src_ybc->y_stride + hstart1);
     uint16_t *dst16 =
-        CONVERT_TO_SHORTPTR(dst + vstart * dst_ybc->y_stride + hstart);
-    for (row = vstart; row < vend; ++row) {
-      memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+        CONVERT_TO_SHORTPTR(dst + vstart2 * dst_ybc->y_stride + hstart2);
+
+    for (row = vstart1; row < vend1; ++row) {
+      memcpy(dst16, src16, (hend1 - hstart1) * sizeof(uint16_t));
       src16 += src_ybc->y_stride;
       dst16 += dst_ybc->y_stride;
     }
     return;
   }
-  src = (src + vstart * src_ybc->y_stride + hstart);
-  dst = (dst + vstart * dst_ybc->y_stride + hstart);
+  src = (src + vstart1 * src_ybc->y_stride + hstart1);
+  dst = (dst + vstart2 * dst_ybc->y_stride + hstart2);
 
-  for (row = vstart; row < vend; ++row) {
-    memcpy(dst, src, (hend - hstart));
+  for (row = vstart1; row < vend1; ++row) {
+    memcpy(dst, src, (hend1 - hstart1));
     src += src_ybc->y_stride;
     dst += dst_ybc->y_stride;
   }
 }
 
-void aom_yv12_partial_copy_u_c(const YV12_BUFFER_CONFIG *src_bc,
-                               YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
-                               int vstart, int vend) {
+void aom_yv12_partial_coloc_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc,
+                                     YV12_BUFFER_CONFIG *dst_ybc, int hstart,
+                                     int hend, int vstart, int vend) {
+  aom_yv12_partial_copy_y_c(src_ybc, hstart, hend, vstart, vend, dst_ybc,
+                            hstart, vstart);
+}
+
+void aom_yv12_partial_copy_u_c(const YV12_BUFFER_CONFIG *src_bc, int hstart1,
+                               int hend1, int vstart1, int vend1,
+                               YV12_BUFFER_CONFIG *dst_bc, int hstart2,
+                               int vstart2) {
   int row;
   const uint8_t *src = src_bc->u_buffer;
   uint8_t *dst = dst_bc->u_buffer;
 
   if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
     const uint16_t *src16 =
-        CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
+        CONVERT_TO_SHORTPTR(src + vstart1 * src_bc->uv_stride + hstart1);
     uint16_t *dst16 =
-        CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
-    for (row = vstart; row < vend; ++row) {
-      memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+        CONVERT_TO_SHORTPTR(dst + vstart2 * dst_bc->uv_stride + hstart2);
+    for (row = vstart1; row < vend1; ++row) {
+      memcpy(dst16, src16, (hend1 - hstart1) * sizeof(uint16_t));
       src16 += src_bc->uv_stride;
       dst16 += dst_bc->uv_stride;
     }
     return;
   }
 
-  src = (src + vstart * src_bc->uv_stride + hstart);
-  dst = (dst + vstart * dst_bc->uv_stride + hstart);
+  src = (src + vstart1 * src_bc->uv_stride + hstart1);
+  dst = (dst + vstart2 * dst_bc->uv_stride + hstart2);
 
-  for (row = vstart; row < vend; ++row) {
-    memcpy(dst, src, (hend - hstart));
+  for (row = vstart1; row < vend1; ++row) {
+    memcpy(dst, src, (hend1 - hstart1));
     src += src_bc->uv_stride;
     dst += dst_bc->uv_stride;
   }
 }
 
-void aom_yv12_partial_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
-                               YV12_BUFFER_CONFIG *dst_bc, int hstart, int hend,
-                               int vstart, int vend) {
+void aom_yv12_partial_coloc_copy_u_c(const YV12_BUFFER_CONFIG *src_bc,
+                                     YV12_BUFFER_CONFIG *dst_bc, int hstart,
+                                     int hend, int vstart, int vend) {
+  aom_yv12_partial_copy_u_c(src_bc, hstart, hend, vstart, vend, dst_bc, hstart,
+                            vstart);
+}
+
+void aom_yv12_partial_copy_v_c(const YV12_BUFFER_CONFIG *src_bc, int hstart1,
+                               int hend1, int vstart1, int vend1,
+                               YV12_BUFFER_CONFIG *dst_bc, int hstart2,
+                               int vstart2) {
   int row;
   const uint8_t *src = src_bc->v_buffer;
   uint8_t *dst = dst_bc->v_buffer;
 
   if (src_bc->flags & YV12_FLAG_HIGHBITDEPTH) {
     const uint16_t *src16 =
-        CONVERT_TO_SHORTPTR(src + vstart * src_bc->uv_stride + hstart);
+        CONVERT_TO_SHORTPTR(src + vstart1 * src_bc->uv_stride + hstart1);
     uint16_t *dst16 =
-        CONVERT_TO_SHORTPTR(dst + vstart * dst_bc->uv_stride + hstart);
-    for (row = vstart; row < vend; ++row) {
-      memcpy(dst16, src16, (hend - hstart) * sizeof(uint16_t));
+        CONVERT_TO_SHORTPTR(dst + vstart2 * dst_bc->uv_stride + hstart2);
+    for (row = vstart1; row < vend1; ++row) {
+      memcpy(dst16, src16, (hend1 - hstart1) * sizeof(uint16_t));
       src16 += src_bc->uv_stride;
       dst16 += dst_bc->uv_stride;
     }
     return;
   }
 
-  src = (src + vstart * src_bc->uv_stride + hstart);
-  dst = (dst + vstart * dst_bc->uv_stride + hstart);
+  src = (src + vstart1 * src_bc->uv_stride + hstart1);
+  dst = (dst + vstart2 * dst_bc->uv_stride + hstart2);
 
-  for (row = vstart; row < vend; ++row) {
-    memcpy(dst, src, (hend - hstart));
+  for (row = vstart1; row < vend1; ++row) {
+    memcpy(dst, src, (hend1 - hstart1));
     src += src_bc->uv_stride;
     dst += dst_bc->uv_stride;
   }
 }
+
+void aom_yv12_partial_coloc_copy_v_c(const YV12_BUFFER_CONFIG *src_bc,
+                                     YV12_BUFFER_CONFIG *dst_bc, int hstart,
+                                     int hend, int vstart, int vend) {
+  aom_yv12_partial_copy_v_c(src_bc, hstart, hend, vstart, vend, dst_bc, hstart,
+                            vstart);
+}
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 0c14a97..c68cdee 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -713,9 +713,9 @@
 
           if (!pbi->ext_tile_debug && cm->large_scale_tile) {
             *index += 1;  // Advance the iterator to point to the next image
+
+            yuvconfig2image(&ctx->img, &pbi->tile_list_outbuf, NULL);
             img = &ctx->img;
-            img->img_data = pbi->tile_list_output;
-            img->sz = pbi->tile_list_size;
             return img;
           }
 
diff --git a/av1/av1_iface_common.h b/av1/av1_iface_common.h
index 32a207d..713d8c3 100644
--- a/av1/av1_iface_common.h
+++ b/av1/av1_iface_common.h
@@ -72,6 +72,7 @@
   img->img_data = yv12->buffer_alloc;
   img->img_data_owner = 0;
   img->self_allocd = 0;
+  img->sz = yv12->frame_size;
 }
 
 static aom_codec_err_t image2yuvconfig(const aom_image_t *img,
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 0727cf9..c62862b 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -1143,9 +1143,9 @@
   typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
                            YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
                            int vstart, int vend);
-  static const copy_fun copy_funs[3] = {
-    aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
-  };
+  static const copy_fun copy_funs[3] = { aom_yv12_partial_coloc_copy_y,
+                                         aom_yv12_partial_coloc_copy_u,
+                                         aom_yv12_partial_coloc_copy_v };
 
   for (int plane = 0; plane < num_planes; ++plane) {
     if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 258a8d5..14406e6 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -801,9 +801,9 @@
   typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src_ybc,
                            YV12_BUFFER_CONFIG *dst_ybc, int hstart, int hend,
                            int vstart, int vend);
-  static const copy_fun copy_funs[3] = {
-    aom_yv12_partial_copy_y, aom_yv12_partial_copy_u, aom_yv12_partial_copy_v
-  };
+  static const copy_fun copy_funs[3] = { aom_yv12_partial_coloc_copy_y,
+                                         aom_yv12_partial_coloc_copy_u,
+                                         aom_yv12_partial_coloc_copy_v };
 
   while (1) {
     AV1LrMTInfo *cur_job_info = get_lr_job_info(lr_sync);
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index c0b55b6..50bee81 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -158,8 +158,7 @@
   if (!pbi) return;
 
   // Free the tile list output buffer.
-  if (pbi->tile_list_output != NULL) aom_free(pbi->tile_list_output);
-  pbi->tile_list_output = NULL;
+  aom_free_frame_buffer(&pbi->tile_list_outbuf);
 
   aom_get_worker_interface()->end(&pbi->lf_worker);
   aom_free(pbi->lf_worker.data1);
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index cc34bbe..ca1fcd6 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -242,9 +242,7 @@
   unsigned int ext_tile_debug;  // for ext-tile software debug & testing
   unsigned int row_mt;
   EXTERNAL_REFERENCES ext_refs;
-  size_t tile_list_size;
-  uint8_t *tile_list_output;
-  size_t buffer_sz;
+  YV12_BUFFER_CONFIG tile_list_outbuf;
 
   CB_BUFFER *cb_buffer_base;
   int cb_buffer_alloc_size;
diff --git a/av1/decoder/obu.c b/av1/decoder/obu.c
index 28e3d57..b148bd6 100644
--- a/av1/decoder/obu.c
+++ b/av1/decoder/obu.c
@@ -12,6 +12,7 @@
 #include <assert.h>
 
 #include "config/aom_config.h"
+#include "config/aom_scale_rtcd.h"
 
 #include "aom/aom_codec.h"
 #include "aom_dsp/bitreader_buffer.h"
@@ -358,38 +359,34 @@
 }
 
 static void alloc_tile_list_buffer(AV1Decoder *pbi) {
-  // TODO(yunqing): for now, copy each tile's decoded YUV data directly to the
-  // output buffer. This needs to be modified according to the application
-  // requirement.
+  // The resolution of the output frame is read out from the bitstream. The data
+  // are stored in the order of Y plane, U plane and V plane. As an example, for
+  // image format 4:2:0, the output frame of U plane and V plane is 1/4 of the
+  // output frame.
   AV1_COMMON *const cm = &pbi->common;
   const int tile_width_in_pixels = cm->tile_width * MI_SIZE;
   const int tile_height_in_pixels = cm->tile_height * MI_SIZE;
-  const int ssy = cm->seq_params.subsampling_y;
-  const int ssx = cm->seq_params.subsampling_x;
-  const int num_planes = av1_num_planes(cm);
-  const size_t yplane_tile_size = tile_height_in_pixels * tile_width_in_pixels;
-  const size_t uvplane_tile_size =
-      (num_planes > 1)
-          ? (tile_height_in_pixels >> ssy) * (tile_width_in_pixels >> ssx)
-          : 0;
-  const size_t tile_size = (cm->seq_params.use_highbitdepth ? 2 : 1) *
-                           (yplane_tile_size + 2 * uvplane_tile_size);
-  pbi->tile_list_size = tile_size * (pbi->tile_count_minus_1 + 1);
+  const int output_frame_width =
+      (pbi->output_frame_width_in_tiles_minus_1 + 1) * tile_width_in_pixels;
+  const int output_frame_height =
+      (pbi->output_frame_height_in_tiles_minus_1 + 1) * tile_height_in_pixels;
+  // The output frame is used to store the decoded tile list. The decoded tile
+  // list has to fit into 1 output frame.
+  assert((pbi->tile_count_minus_1 + 1) <=
+         (pbi->output_frame_width_in_tiles_minus_1 + 1) *
+             (pbi->output_frame_height_in_tiles_minus_1 + 1));
 
-  if (pbi->tile_list_size > pbi->buffer_sz) {
-    if (pbi->tile_list_output != NULL) aom_free(pbi->tile_list_output);
-    pbi->tile_list_output = NULL;
-
-    pbi->tile_list_output = (uint8_t *)aom_memalign(32, pbi->tile_list_size);
-    if (pbi->tile_list_output == NULL)
-      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
-                         "Failed to allocate the tile list output buffer");
-    pbi->buffer_sz = pbi->tile_list_size;
-  }
+  // Allocate the tile list output buffer.
+  if (aom_alloc_frame_buffer(
+          &pbi->tile_list_outbuf, output_frame_width, output_frame_height,
+          cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
+          cm->seq_params.use_highbitdepth, 0, cm->byte_alignment))
+    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                       "Failed to allocate the tile list output buffer");
 }
 
 static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi,
-                                                  uint8_t **output) {
+                                                  int tile_idx) {
   AV1_COMMON *const cm = &pbi->common;
   const int tile_width_in_pixels = cm->tile_width * MI_SIZE;
   const int tile_height_in_pixels = cm->tile_height * MI_SIZE;
@@ -397,43 +394,41 @@
   const int ssx = cm->seq_params.subsampling_x;
   const int num_planes = av1_num_planes(cm);
 
-  // Copy decoded tile to the tile list output buffer.
   YV12_BUFFER_CONFIG *cur_frame = get_frame_new_buffer(cm);
-  const int mi_row = pbi->dec_tile_row * cm->tile_height;
-  const int mi_col = pbi->dec_tile_col * cm->tile_width;
-  const int is_hbd = (cur_frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
-  uint8_t *bufs[MAX_MB_PLANE] = { NULL, NULL, NULL };
-  int strides[MAX_MB_PLANE] = { 0, 0, 0 };
+  const int tr = tile_idx / (pbi->output_frame_width_in_tiles_minus_1 + 1);
+  const int tc = tile_idx % (pbi->output_frame_width_in_tiles_minus_1 + 1);
   int plane;
 
+  // Copy decoded tile to the tile list output buffer.
   for (plane = 0; plane < num_planes; ++plane) {
-    int shift_x = plane > 0 ? ssx : 0;
-    int shift_y = plane > 0 ? ssy : 0;
+    const int shift_x = plane > 0 ? ssx : 0;
+    const int shift_y = plane > 0 ? ssy : 0;
+    const int h = tile_height_in_pixels >> shift_y;
+    const int w = tile_width_in_pixels >> shift_x;
 
-    bufs[plane] = cur_frame->buffers[plane];
-    strides[plane] =
-        (plane > 0) ? cur_frame->strides[1] : cur_frame->strides[0];
+    // src offset
+    int vstart1 = pbi->dec_tile_row * h;
+    int vend1 = vstart1 + h;
+    int hstart1 = pbi->dec_tile_col * w;
+    int hend1 = hstart1 + w;
+    // dst offset
+    int vstart2 = tr * h;
+    int hstart2 = tc * w;
 
-    bufs[plane] += mi_row * (MI_SIZE >> shift_y) * strides[plane] +
-                   mi_col * (MI_SIZE >> shift_x);
-
-    if (is_hbd) {
-      bufs[plane] = (uint8_t *)CONVERT_TO_SHORTPTR(bufs[plane]);
-      strides[plane] *= 2;
-    }
-
-    int w, h;
-    w = (plane > 0 && shift_x > 0) ? ((tile_width_in_pixels + 1) >> shift_x)
-                                   : tile_width_in_pixels;
-    w *= (1 + is_hbd);
-    h = (plane > 0 && shift_y > 0) ? ((tile_height_in_pixels + 1) >> shift_y)
-                                   : tile_height_in_pixels;
-    int j;
-
-    for (j = 0; j < h; ++j) {
-      memcpy(*output, bufs[plane], w);
-      bufs[plane] += strides[plane];
-      *output += w;
+    switch (plane) {
+      case 0:
+        aom_yv12_partial_copy_y(cur_frame, hstart1, hend1, vstart1, vend1,
+                                &pbi->tile_list_outbuf, hstart2, vstart2);
+        break;
+      case 1:
+        aom_yv12_partial_copy_u(cur_frame, hstart1, hend1, vstart1, vend1,
+                                &pbi->tile_list_outbuf, hstart2, vstart2);
+        break;
+      case 2:
+        aom_yv12_partial_copy_v(cur_frame, hstart1, hend1, vstart1, vend1,
+                                &pbi->tile_list_outbuf, hstart2, vstart2);
+        break;
+      default: assert(0);
     }
   }
 }
@@ -470,8 +465,8 @@
   uint32_t tile_list_info_bytes = 4;
   tile_list_payload_size += tile_list_info_bytes;
   data += tile_list_info_bytes;
-  uint8_t *output = pbi->tile_list_output;
 
+  int tile_idx = 0;
   for (i = 0; i <= pbi->tile_count_minus_1; i++) {
     // Process 1 tile.
     // Reset the bit reader.
@@ -515,7 +510,8 @@
     assert(data <= data_end);
 
     // Copy the decoded tile to the tile list output buffer.
-    copy_decoded_tile_to_tile_list_buffer(pbi, &output);
+    copy_decoded_tile_to_tile_list_buffer(pbi, tile_idx);
+    tile_idx++;
   }
 
   *frame_decoding_finished = 1;
diff --git a/examples/lightfield_bitstream_parsing.c b/examples/lightfield_bitstream_parsing.c
index 5d2c79c..9c90671 100644
--- a/examples/lightfield_bitstream_parsing.c
+++ b/examples/lightfield_bitstream_parsing.c
@@ -62,9 +62,8 @@
 #define ALIGN_POWER_OF_TWO(value, n) \
   (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
 
-// SB size: 64x64
-const uint8_t output_frame_width_in_tiles_minus_1 = 512 / 64 - 1;
-const uint8_t output_frame_height_in_tiles_minus_1 = 512 / 64 - 1;
+const int output_frame_width = 512;
+const int output_frame_height = 512;
 
 // Spec:
 // typedef struct {
@@ -99,7 +98,9 @@
 void process_tile_list(const TILE_LIST_INFO *tiles, int num_tiles,
                        aom_codec_pts_t tl_pts, unsigned char **frames,
                        const size_t *frame_sizes, aom_codec_ctx_t *codec,
-                       unsigned char *tl_buf, AvxVideoWriter *writer) {
+                       unsigned char *tl_buf, AvxVideoWriter *writer,
+                       uint8_t output_frame_width_in_tiles_minus_1,
+                       uint8_t output_frame_height_in_tiles_minus_1) {
   unsigned char *tl = tl_buf;
   struct aom_write_bit_buffer wb = { tl, 0 };
   unsigned char *saved_obu_size_loc = NULL;
@@ -345,6 +346,10 @@
   if (tl_buf == NULL) die_codec(&codec, "Failed to allocate tile list buffer.");
 
   aom_codec_pts_t tl_pts = num_references;
+  const uint8_t output_frame_width_in_tiles_minus_1 =
+      output_frame_width / tile_width - 1;
+  const uint8_t output_frame_height_in_tiles_minus_1 =
+      output_frame_height / tile_height - 1;
 
   printf("Reading tile list from file.\n");
   char line[1024];
@@ -358,7 +363,8 @@
       // new render frame or because we've hit our max number of tiles per list.
       if (num_tiles > 0) {
         process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec,
-                          tl_buf, writer);
+                          tl_buf, writer, output_frame_width_in_tiles_minus_1,
+                          output_frame_height_in_tiles_minus_1);
         ++tl_pts;
       }
       num_tiles = 0;
@@ -383,7 +389,8 @@
   if (num_tiles > 0) {
     // Flush out the last tile list.
     process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec,
-                      tl_buf, writer);
+                      tl_buf, writer, output_frame_width_in_tiles_minus_1,
+                      output_frame_height_in_tiles_minus_1);
     ++tl_pts;
   }
 
diff --git a/examples/lightfield_decoder.c b/examples/lightfield_decoder.c
index 22dc430..83542ba 100644
--- a/examples/lightfield_decoder.c
+++ b/examples/lightfield_decoder.c
@@ -52,9 +52,44 @@
   exit(EXIT_FAILURE);
 }
 
+// Output frame size
+const int output_frame_width = 512;
+const int output_frame_height = 512;
+
+static void aom_img_copy_tile(const aom_image_t *src, const aom_image_t *dst,
+                              int dst_row_offset, int dst_col_offset) {
+  const int shift = (src->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0;
+  int plane;
+
+  for (plane = 0; plane < 3; ++plane) {
+    const unsigned char *src_buf = src->planes[plane];
+    const int src_stride = src->stride[plane];
+    unsigned char *dst_buf = dst->planes[plane];
+    const int dst_stride = dst->stride[plane];
+    const int roffset =
+        (plane > 0) ? dst_row_offset >> dst->y_chroma_shift : dst_row_offset;
+    const int coffset =
+        (plane > 0) ? dst_col_offset >> dst->x_chroma_shift : dst_col_offset;
+
+    // col offset needs to be adjusted for HBD.
+    dst_buf += roffset * dst_stride + (coffset << shift);
+
+    const int w = (aom_img_plane_width(src, plane) << shift);
+    const int h = aom_img_plane_height(src, plane);
+    int y;
+
+    for (y = 0; y < h; ++y) {
+      memcpy(dst_buf, src_buf, w);
+      src_buf += src_stride;
+      dst_buf += dst_stride;
+    }
+  }
+}
+
 void decode_tile(aom_codec_ctx_t *codec, const unsigned char *frame,
                  size_t frame_size, int tr, int tc, int ref_idx,
-                 aom_image_t *reference_images, FILE *outfile) {
+                 aom_image_t *reference_images, aom_image_t *output,
+                 int *tile_idx) {
   aom_codec_control_(codec, AV1_SET_TILE_MODE, 1);
   aom_codec_control_(codec, AV1D_EXT_TILE_DEBUG, 1);
   aom_codec_control_(codec, AV1_SET_DECODE_TILE_ROW, tr);
@@ -73,7 +108,23 @@
 
   aom_codec_iter_t iter = NULL;
   aom_image_t *img = aom_codec_get_frame(codec, &iter);
-  aom_img_write(img, outfile);
+  if (!img) die_codec(codec, "Failed to get frame.");
+
+  // read out the tile size.
+  unsigned int tile_size = 0;
+  if (aom_codec_control(codec, AV1D_GET_TILE_SIZE, &tile_size))
+    die_codec(codec, "Failed to get the tile size");
+  const unsigned int tile_width = tile_size >> 16;
+  const unsigned int tile_height = tile_size & 65535;
+  const uint8_t output_frame_width_in_tiles = output_frame_width / tile_width;
+
+  // Copy the tile to the output frame.
+  const int row_offset =
+      (*tile_idx / output_frame_width_in_tiles) * tile_height;
+  const int col_offset = (*tile_idx % output_frame_width_in_tiles) * tile_width;
+
+  aom_img_copy_tile(img, output, row_offset, col_offset);
+  (*tile_idx)++;
 }
 
 int main(int argc, char **argv) {
@@ -83,7 +134,9 @@
   const AvxInterface *decoder = NULL;
   const AvxVideoInfo *info = NULL;
   int num_references;
+  aom_img_fmt_t ref_fmt = 0;
   aom_image_t reference_images[MAX_EXTERNAL_REFERENCES];
+  aom_image_t output;
   size_t frame_size = 0;
   const unsigned char *frame = NULL;
   int i, j;
@@ -123,7 +176,6 @@
       die_codec(&codec, "Failed to decode frame.");
 
     if (i == 0) {
-      aom_img_fmt_t ref_fmt = 0;
       if (aom_codec_control(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt))
         die_codec(&codec, "Failed to get the image format");
 
@@ -187,11 +239,31 @@
   }
   printf("Read %d frames.\n", num_frames);
 
+  // Allocate the output frame.
+  aom_img_fmt_t out_fmt = ref_fmt;
+  if (!CONFIG_LOWBITDEPTH) out_fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
+  if (!aom_img_alloc(&output, out_fmt, output_frame_width, output_frame_height,
+                     32))
+    die("Failed to allocate output image.");
+
   printf("Decoding tile list from file.\n");
   char line[1024];
   FILE *tile_list_fptr = fopen(tile_list_file, "r");
+  int tile_list_cnt = 0;
+  int tile_list_writes = 0;
+  int tile_idx = 0;
   while ((fgets(line, 1024, tile_list_fptr)) != NULL) {
     if (line[0] == 'F') {
+      // Write out the tile list.
+      if (tile_list_cnt) {
+        aom_img_write(&output, outfile);
+        tile_list_writes++;
+      }
+
+      tile_list_cnt++;
+      tile_idx = 0;
+      // Then memset the frame.
+      memset(output.img_data, 0, output.sz);
       continue;
     }
 
@@ -212,9 +284,13 @@
     frame = frames[image_idx];
     frame_size = frame_sizes[image_idx];
     decode_tile(&codec, frame, frame_size, tr, tc, ref_idx, reference_images,
-                outfile);
+                &output, &tile_idx);
   }
 
+  // Write out the last tile list.
+  if (tile_list_writes < tile_list_cnt) aom_img_write(&output, outfile);
+
+  aom_img_free(&output);
   for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]);
   for (int f = 0; f < num_frames; ++f) {
     free(frames[f]);
diff --git a/examples/lightfield_tile_list_decoder.c b/examples/lightfield_tile_list_decoder.c
index 5556bf0..d8f244f 100644
--- a/examples/lightfield_tile_list_decoder.c
+++ b/examples/lightfield_tile_list_decoder.c
@@ -147,9 +147,8 @@
     if (aom_codec_decode(&codec, frame, frame_size, NULL))
       die_codec(&codec, "Failed to decode the tile list.");
     aom_codec_iter_t iter = NULL;
-    aom_image_t *img;
-    while ((img = aom_codec_get_frame(&codec, &iter)))
-      fwrite(img->img_data, 1, img->sz, outfile);
+    aom_image_t *img = aom_codec_get_frame(&codec, &iter);
+    aom_img_write(img, outfile);
   }
 
   for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]);