ext-tile: output 8bit content at 1 byte per pixel

BUG=aomedia:1999

Change-Id: Icd119460f4d56da913685532c0b0cc60ccda509b
diff --git a/av1/decoder/obu.c b/av1/decoder/obu.c
index b148bd6..e182e50 100644
--- a/av1/decoder/obu.c
+++ b/av1/decoder/obu.c
@@ -377,14 +377,41 @@
              (pbi->output_frame_height_in_tiles_minus_1 + 1));
 
   // Allocate the tile list output buffer.
-  if (aom_alloc_frame_buffer(
-          &pbi->tile_list_outbuf, output_frame_width, output_frame_height,
-          cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
-          cm->seq_params.use_highbitdepth, 0, cm->byte_alignment))
+  // Note: if cm->seq_params.use_highbitdepth is 1 and cm->seq_params.bit_depth
+  // is 8, we could allocate less memory, namely, 8 bits/pixel.
+  if (aom_alloc_frame_buffer(&pbi->tile_list_outbuf, output_frame_width,
+                             output_frame_height, cm->seq_params.subsampling_x,
+                             cm->seq_params.subsampling_y,
+                             (cm->seq_params.use_highbitdepth &&
+                              (cm->seq_params.bit_depth > AOM_BITS_8)),
+                             0, cm->byte_alignment))
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate the tile list output buffer");
 }
 
+static void yv12_tile_copy(const YV12_BUFFER_CONFIG *src, int hstart1,
+                           int hend1, int vstart1, int vend1,
+                           YV12_BUFFER_CONFIG *dst, int hstart2, int vstart2,
+                           int plane) {
+  const int src_stride = (plane > 0) ? src->strides[1] : src->strides[0];
+  const int dst_stride = (plane > 0) ? dst->strides[1] : dst->strides[0];
+  int row, col;
+
+  assert(src->flags & YV12_FLAG_HIGHBITDEPTH);
+  assert(!(dst->flags & YV12_FLAG_HIGHBITDEPTH));
+
+  const uint16_t *src16 =
+      CONVERT_TO_SHORTPTR(src->buffers[plane] + vstart1 * src_stride + hstart1);
+  uint8_t *dst8 = dst->buffers[plane] + vstart2 * dst_stride + hstart2;
+
+  for (row = vstart1; row < vend1; ++row) {
+    for (col = 0; col < (hend1 - hstart1); ++col) *dst8++ = *src16++;
+    src16 += src_stride - (hend1 - hstart1);
+    dst8 += dst_stride - (hend1 - hstart1);
+  }
+  return;
+}
+
 static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi,
                                                   int tile_idx) {
   AV1_COMMON *const cm = &pbi->common;
@@ -415,20 +442,26 @@
     int vstart2 = tr * h;
     int hstart2 = tc * w;
 
-    switch (plane) {
-      case 0:
-        aom_yv12_partial_copy_y(cur_frame, hstart1, hend1, vstart1, vend1,
-                                &pbi->tile_list_outbuf, hstart2, vstart2);
-        break;
-      case 1:
-        aom_yv12_partial_copy_u(cur_frame, hstart1, hend1, vstart1, vend1,
-                                &pbi->tile_list_outbuf, hstart2, vstart2);
-        break;
-      case 2:
-        aom_yv12_partial_copy_v(cur_frame, hstart1, hend1, vstart1, vend1,
-                                &pbi->tile_list_outbuf, hstart2, vstart2);
-        break;
-      default: assert(0);
+    if (cm->seq_params.use_highbitdepth &&
+        cm->seq_params.bit_depth == AOM_BITS_8) {
+      yv12_tile_copy(cur_frame, hstart1, hend1, vstart1, vend1,
+                     &pbi->tile_list_outbuf, hstart2, vstart2, plane);
+    } else {
+      switch (plane) {
+        case 0:
+          aom_yv12_partial_copy_y(cur_frame, hstart1, hend1, vstart1, vend1,
+                                  &pbi->tile_list_outbuf, hstart2, vstart2);
+          break;
+        case 1:
+          aom_yv12_partial_copy_u(cur_frame, hstart1, hend1, vstart1, vend1,
+                                  &pbi->tile_list_outbuf, hstart2, vstart2);
+          break;
+        case 2:
+          aom_yv12_partial_copy_v(cur_frame, hstart1, hend1, vstart1, vend1,
+                                  &pbi->tile_list_outbuf, hstart2, vstart2);
+          break;
+        default: assert(0);
+      }
     }
   }
 }
diff --git a/examples/lightfield_decoder.c b/examples/lightfield_decoder.c
index 83542ba..b57accb 100644
--- a/examples/lightfield_decoder.c
+++ b/examples/lightfield_decoder.c
@@ -89,7 +89,7 @@
 void decode_tile(aom_codec_ctx_t *codec, const unsigned char *frame,
                  size_t frame_size, int tr, int tc, int ref_idx,
                  aom_image_t *reference_images, aom_image_t *output,
-                 int *tile_idx) {
+                 int *tile_idx, unsigned int *output_bit_depth) {
   aom_codec_control_(codec, AV1_SET_TILE_MODE, 1);
   aom_codec_control_(codec, AV1D_EXT_TILE_DEBUG, 1);
   aom_codec_control_(codec, AV1_SET_DECODE_TILE_ROW, tr);
@@ -110,6 +110,13 @@
   aom_image_t *img = aom_codec_get_frame(codec, &iter);
   if (!img) die_codec(codec, "Failed to get frame.");
 
+  // aom_img_alloc() sets bit_depth as follows:
+  // output->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8;
+  // Use img->bit_depth(read from bitstream), so that aom_shift_img()
+  // works as expected.
+  output->bit_depth = img->bit_depth;
+  *output_bit_depth = img->bit_depth;
+
   // read out the tile size.
   unsigned int tile_size = 0;
   if (aom_codec_control(codec, AV1D_GET_TILE_SIZE, &tile_size))
@@ -137,6 +144,7 @@
   aom_img_fmt_t ref_fmt = 0;
   aom_image_t reference_images[MAX_EXTERNAL_REFERENCES];
   aom_image_t output;
+  aom_image_t *output_shifted = NULL;
   size_t frame_size = 0;
   const unsigned char *frame = NULL;
   int i, j;
@@ -252,11 +260,18 @@
   int tile_list_cnt = 0;
   int tile_list_writes = 0;
   int tile_idx = 0;
+  aom_image_t *out = NULL;
+  unsigned int output_bit_depth = 0;
+
   while ((fgets(line, 1024, tile_list_fptr)) != NULL) {
     if (line[0] == 'F') {
       // Write out the tile list.
       if (tile_list_cnt) {
-        aom_img_write(&output, outfile);
+        out = &output;
+        // Shift up or down if necessary
+        if (output_bit_depth != 0)
+          aom_shift_img(output_bit_depth, &out, &output_shifted);
+        aom_img_write(out, outfile);
         tile_list_writes++;
       }
 
@@ -267,11 +282,7 @@
       continue;
     }
 
-    int image_idx;
-    int ref_idx;
-    int tc;
-    int tr;
-
+    int image_idx, ref_idx, tc, tr;
     sscanf(line, "%d %d %d %d", &image_idx, &ref_idx, &tc, &tr);
     if (image_idx >= num_frames) {
       die("Tile list image_idx out of bounds: %d >= %d.", image_idx,
@@ -284,12 +295,19 @@
     frame = frames[image_idx];
     frame_size = frame_sizes[image_idx];
     decode_tile(&codec, frame, frame_size, tr, tc, ref_idx, reference_images,
-                &output, &tile_idx);
+                &output, &tile_idx, &output_bit_depth);
   }
 
   // Write out the last tile list.
-  if (tile_list_writes < tile_list_cnt) aom_img_write(&output, outfile);
+  if (tile_list_writes < tile_list_cnt) {
+    out = &output;
+    // Shift up or down if necessary
+    if (output_bit_depth != 0)
+      aom_shift_img(output_bit_depth, &out, &output_shifted);
+    aom_img_write(out, outfile);
+  }
 
+  if (output_shifted) aom_img_free(output_shifted);
   aom_img_free(&output);
   for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]);
   for (int f = 0; f < num_frames; ++f) {