OBU: Fix a few bugs

* The function av1_decode_tg_tiles_and_wrapup performs some per-frame
  initialization; some of this was mistakenly being performed
  once per tile group instead, leading to strange behaviour
  (eg, forgetting loop-restoration coefficients, forgetting
  the boundary information for all but the last tile group, etc.)

  Fix this by pulling all of the initialization code into its
  own function and calling it only if the initialize_flag is set.

* While fixing the above, I realized that the 'context_updated'
  flag in av1_decode_tg_tile_and_wrapup was not behaving as intended:
  The idea is that, when using frame parallel mode, we save the
  frame context early so that the next frame can start decoding.
  Then we don't need to store the frame context at the end of
  the frame, since we already dealt with it at the start of the frame.

  However, this 'context_updated' flag was local to one tile group,
  ie. it got reset to 0 once we started decoding the second tile group.
  So we'd end up storing the frame context again at the end of the frame
  if there was >1 tile group.

  This didn't break anything, but it is a bit weird. So, to match
  the original intent, we ditch the 'context_updated' flag and
  directly check if we're in frame parallel mode when necessary.

* Fix a bug where we read one byte too much from a tile group
  OBU when the extended OBU header was used.

BUG=aomedia:892

Change-Id: Ifbe561de0de35525d809e23915ac5263273e8de7
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d8356fa..2bf8918 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1749,6 +1749,7 @@
   pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
+#if !CONFIG_OBU
 static void read_tile_group_range(AV1Decoder *pbi,
                                   struct aom_read_bit_buffer *const rb) {
   AV1_COMMON *const cm = &pbi->common;
@@ -1761,6 +1762,7 @@
     aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                        "Tile group extends past last tile in frame");
 }
+#endif  // !CONFIG_OBU
 
 #if CONFIG_MAX_TILE
 
@@ -2152,13 +2154,11 @@
   const int tile_rows = cm->tile_rows;
   int tc = 0;
   int first_tile_in_tg = 0;
+#if !CONFIG_OBU
   struct aom_read_bit_buffer rb_tg_hdr;
   uint8_t clear_data[MAX_AV1_HEADER_SIZE];
-#if !CONFIG_OBU
   const size_t hdr_size = pbi->uncomp_hdr_size + pbi->first_partition_size;
   const int tg_size_bit_offset = pbi->tg_size_bit_offset;
-#else
-  const int tg_size_bit_offset = 0;
 #endif
 
 #if CONFIG_DEPENDENT_HORZTILES
@@ -2187,6 +2187,14 @@
         aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                            "Data ended before all tiles were read.");
       buf->col = c;
+#if CONFIG_OBU
+#if CONFIG_DEPENDENT_HORZTILES
+      if (tc == startTile) {
+        tile_group_start_row = r;
+        tile_group_start_col = c;
+      }
+#endif  // CONFIG_DEPENDENT_HORZTILES
+#else   // CONFIG_OBU
       if (hdr_offset) {
         init_read_bit_buffer(pbi, &rb_tg_hdr, data, data_end, clear_data);
         rb_tg_hdr.bit_offset = tg_size_bit_offset;
@@ -2196,6 +2204,7 @@
         tile_group_start_col = c;
 #endif
       }
+#endif  // CONFIG_OBU
       first_tile_in_tg += tc == first_tile_in_tg ? pbi->tg_size : 0;
       data += hdr_offset;
       get_tile_buffer(data_end, pbi->tile_size_bytes, is_last,
@@ -3771,13 +3780,9 @@
   return first_partition_size;
 }
 
-void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
-                                    const uint8_t *data_end,
-                                    const uint8_t **p_data_end, int startTile,
-                                    int endTile, int initialize_flag) {
+// Once-per-frame initialization
+static void setup_frame_info(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
-  MACROBLOCKD *const xd = &pbi->mb;
-  int context_updated = 0;
 
 #if CONFIG_LOOP_RESTORATION
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
@@ -3795,12 +3800,11 @@
 
   // If encoded in frame parallel mode, frame context is ready after decoding
   // the frame header.
-  if (cm->frame_parallel_decode && initialize_flag &&
+  if (cm->frame_parallel_decode &&
       cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD) {
     AVxWorker *const worker = pbi->frame_worker_owner;
     FrameWorkerData *const frame_worker_data = worker->data1;
     if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD) {
-      context_updated = 1;
 #if CONFIG_NO_FRAME_CONTEXT_SIGNALING
       cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
 #else
@@ -3817,6 +3821,16 @@
   }
 
   dec_setup_frame_boundary_info(cm);
+}
+
+void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
+                                    const uint8_t *data_end,
+                                    const uint8_t **p_data_end, int startTile,
+                                    int endTile, int initialize_flag) {
+  AV1_COMMON *const cm = &pbi->common;
+  MACROBLOCKD *const xd = &pbi->mb;
+
+  if (initialize_flag) setup_frame_info(pbi);
 
 #if CONFIG_OBU
   *p_data_end = decode_tiles(pbi, data, data_end, startTile, endTile);
@@ -3914,13 +3928,15 @@
   }
 #endif
 
-// Non frame parallel update frame context here.
+  // Non frame parallel update frame context here.
+  if (cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_FORWARD) {
 #if CONFIG_NO_FRAME_CONTEXT_SIGNALING
-  if (!context_updated) cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
+    cm->frame_contexts[cm->new_fb_idx] = *cm->fc;
 #else
-  if (!cm->error_resilient_mode && !context_updated)
-    cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+    if (!cm->error_resilient_mode)
+      cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
 #endif
+  }
 }
 
 #if CONFIG_OBU
@@ -4127,9 +4143,10 @@
         if (cm->show_existing_frame) frame_decoding_finished = 1;
         break;
       case OBU_TILE_GROUP:
-        obu_payload_size = read_one_tile_group_obu(
-            pbi, &rb, is_first_tg_obu_received, data, data + obu_size - 1,
-            p_data_end, &frame_decoding_finished);
+        obu_payload_size =
+            read_one_tile_group_obu(pbi, &rb, is_first_tg_obu_received, data,
+                                    data + obu_size - obu_header_size,
+                                    p_data_end, &frame_decoding_finished);
         is_first_tg_obu_received = 0;
         break;
       case OBU_METADATA: