Remove code used by frame_parallel_decode option.

Commit 5521a18fcd6c0248119f34a2584c174f183f4163 removed the
frame_parallel_decode option on the decoder side:
https://aomedia-review.googlesource.com/c/aom/+/50001

Remove more code used by the frame_parallel_decode option on the decoder
side.

Remove the AOM_CODEC_CAP_FRAME_THREADING flag. Change the value of the
AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER flag to 0x200000.

Remove next_submit_worker_id, last_submit_worker_id, and
available_threads. This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/557041.

Remove dthread.c and the related prototypes in dthread.h. This is the
counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560621.

Remove the frame_worker_owner, 'row', and 'col' fields of RefCntBuffer.
This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560622.

Reduce FRAME_BUFFERS by 3. This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560620.

BUG=aomedia:2110

Change-Id: Iaec00020e5b2926507fea849fc3ab976042e1f81
diff --git a/aom/aom_decoder.h b/aom/aom_decoder.h
index 212b3b2..70420c3 100644
--- a/aom/aom_decoder.h
+++ b/aom/aom_decoder.h
@@ -63,10 +63,8 @@
  *
  *  The available flags are specified by AOM_CODEC_USE_* defines.
  */
-/*!\brief Can support frame-based multi-threading */
-#define AOM_CODEC_CAP_FRAME_THREADING 0x200000
 /*!brief Can support external frame buffers */
-#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
+#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x200000
 
 #define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
 
diff --git a/apps/aomdec.c b/apps/aomdec.c
index 848fb35..4b7fc40 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c
@@ -779,7 +779,7 @@
     aom_usec_timer_start(&timer);
 
     if (flush_decoder) {
-      // Flush the decoder in frame parallel decode.
+      // Flush the decoder.
       if (aom_codec_decode(&decoder, NULL, 0, NULL)) {
         warn("Failed to flush decoder: %s", aom_codec_error(&decoder));
       }
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 3a7cd7e..13bdee9 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -100,7 +100,6 @@
             "${AOM_ROOT}/av1/decoder/decodetxb.h"
             "${AOM_ROOT}/av1/decoder/detokenize.c"
             "${AOM_ROOT}/av1/decoder/detokenize.h"
-            "${AOM_ROOT}/av1/decoder/dthread.c"
             "${AOM_ROOT}/av1/decoder/dthread.h"
             "${AOM_ROOT}/av1/decoder/obu.h"
             "${AOM_ROOT}/av1/decoder/obu.c")
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 4a8817c..904d12f 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -58,12 +58,13 @@
   int operating_point;
   int output_all_layers;
 
+  // TODO(wtc): This can be simplified. num_frame_workers is always 1, and
+  // next_output_worker_id is always 0. The frame_workers array of size 1 can
+  // be replaced by a single AVxWorker.
   AVxWorker *frame_workers;
   int num_frame_workers;
-  int next_submit_worker_id;
-  int last_submit_worker_id;
   int next_output_worker_id;
-  int available_threads;
+
   aom_image_t *image_with_grain[MAX_NUM_SPATIAL_LAYERS];
   int need_resync;  // wait for key/intra-only frame
   // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
@@ -132,11 +133,6 @@
       av1_remove_common(&frame_worker_data->pbi->common);
       av1_free_restoration_buffers(&frame_worker_data->pbi->common);
       av1_decoder_remove(frame_worker_data->pbi);
-      aom_free(frame_worker_data->scratch_buffer);
-#if CONFIG_MULTITHREAD
-      pthread_mutex_destroy(&frame_worker_data->stats_mutex);
-      pthread_cond_destroy(&frame_worker_data->stats_cond);
-#endif
       aom_free(frame_worker_data);
     }
 #if CONFIG_MULTITHREAD
@@ -371,14 +367,11 @@
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
 
   ctx->last_show_frame = -1;
-  ctx->next_submit_worker_id = 0;
-  ctx->last_submit_worker_id = 0;
   ctx->next_output_worker_id = 0;
   ctx->need_resync = 1;
   ctx->num_frame_workers = 1;
   if (ctx->num_frame_workers > MAX_DECODE_THREADS)
     ctx->num_frame_workers = MAX_DECODE_THREADS;
-  ctx->available_threads = ctx->num_frame_workers;
   ctx->flushed = 0;
 
   ctx->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
@@ -415,23 +408,9 @@
       return AOM_CODEC_MEM_ERROR;
     }
     frame_worker_data->pbi->common.options = &ctx->cfg.cfg;
-    frame_worker_data->pbi->frame_worker_owner = worker;
     frame_worker_data->worker_id = i;
-    frame_worker_data->scratch_buffer = NULL;
-    frame_worker_data->scratch_buffer_size = 0;
     frame_worker_data->frame_context_ready = 0;
     frame_worker_data->received_frame = 0;
-#if CONFIG_MULTITHREAD
-    if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
-      set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
-      return AOM_CODEC_MEM_ERROR;
-    }
-
-    if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
-      set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
-      return AOM_CODEC_MEM_ERROR;
-    }
-#endif
     frame_worker_data->pbi->allow_lowbitdepth = ctx->cfg.allow_lowbitdepth;
 
     // If decoding in serial mode, FrameWorker thread could create tile worker
@@ -684,7 +663,6 @@
       if (winterface->sync(worker)) {
         // Check if worker has received any frames.
         if (frame_worker_data->received_frame == 1) {
-          ++ctx->available_threads;
           frame_worker_data->received_frame = 0;
           check_resync(ctx, frame_worker_data->pbi);
         }
@@ -760,11 +738,10 @@
       } else {
         // Decoding failed. Release the worker thread.
         frame_worker_data->received_frame = 0;
-        ++ctx->available_threads;
         ctx->need_resync = 1;
         if (ctx->flushed != 1) return NULL;
       }
-    } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
+    } while (ctx->next_output_worker_id != 0);
   }
   return NULL;
 }
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 869c06e..8a991e3 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -66,13 +66,10 @@
 #define REF_FRAMES_LOG2 3
 #define REF_FRAMES (1 << REF_FRAMES_LOG2)
 
-// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
-// in parallel, 3 for scaled references on the encoder.
-// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
-// of framebuffers.
+// 1 scratch frame for the new frame, 3 for scaled references on the encoder.
 // TODO(jkoleszar): These 3 extra references could probably come from the
 // normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 7)
+#define FRAME_BUFFERS (REF_FRAMES + 4)
 
 // 4 frame filter levels: y plane vertical, y plane horizontal,
 // u plane, and v plane
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index d906dc6..cf600d1 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -133,17 +133,6 @@
   hash_table hash_table;
   uint8_t intra_only;
   FRAME_TYPE frame_type;
-  // The Following variables will only be used in frame parallel decode.
-
-  // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
-  // that no FrameWorker owns, or is decoding, this buffer.
-  AVxWorker *frame_worker_owner;
-
-  // row and col indicate which position frame has been decoded to in real
-  // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
-  // when the frame is fully decoded.
-  int row;
-  int col;
 
   // Inter frame reference frame delta for loop filter
   int8_t ref_deltas[REF_FRAMES];
@@ -156,6 +145,8 @@
 // Protect BufferPool from being accessed by several FrameWorkers at
 // the same time during frame parallel decode.
 // TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
+// TODO(wtc): Remove this. See
+// https://chromium-review.googlesource.com/c/webm/libvpx/+/560630.
 #if CONFIG_MULTITHREAD
   pthread_mutex_t pool_mutex;
 #endif
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index b715b6e..dcd8dcc 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -169,7 +169,6 @@
   // the same.
   RefCntBuffer *cur_buf;  //  Current decoding frame buffer.
 
-  AVxWorker *frame_worker_owner;  // frame_worker that owns this pbi.
   AVxWorker lf_worker;
   AV1LfSync lf_row_sync;
   AV1LrSync lr_row_sync;
diff --git a/av1/decoder/dthread.c b/av1/decoder/dthread.c
deleted file mode 100644
index 3946c78..0000000
--- a/av1/decoder/dthread.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-#include "av1/common/reconinter.h"
-#include "av1/decoder/dthread.h"
-#include "av1/decoder/decoder.h"
-
-// #define DEBUG_THREAD
-
-// TODO(hkuang): Clean up all the #ifdef in this file.
-void av1_frameworker_lock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-  pthread_mutex_lock(&worker_data->stats_mutex);
-#else
-  (void)worker;
-#endif
-}
-
-void av1_frameworker_unlock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-  pthread_mutex_unlock(&worker_data->stats_mutex);
-#else
-  (void)worker;
-#endif
-}
-
-void av1_frameworker_signal_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const worker_data = worker->data1;
-
-// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
-  pthread_cond_signal(&worker_data->stats_cond);
-#else
-  pthread_cond_broadcast(&worker_data->stats_cond);
-#endif
-
-#else
-  (void)worker;
-#endif
-}
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#define BUILDING_WITH_TSAN
-#endif
-#endif
-
-// TODO(hkuang): Remove worker parameter as it is only used in debug code.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
-                          int row) {
-#if CONFIG_MULTITHREAD
-  if (!ref_buf) return;
-
-#ifndef BUILDING_WITH_TSAN
-  // The following line of code will get harmless tsan error but it is the key
-  // to get best performance.
-  if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
-#endif
-
-  {
-    // Find the worker thread that owns the reference frame. If the reference
-    // frame has been fully decoded, it may not have owner.
-    AVxWorker *const ref_worker = ref_buf->frame_worker_owner;
-    FrameWorkerData *const ref_worker_data =
-        (FrameWorkerData *)ref_worker->data1;
-    const AV1Decoder *const pbi = ref_worker_data->pbi;
-
-#ifdef DEBUG_THREAD
-    {
-      FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-      printf("%d %p worker is waiting for %d %p worker (%d)  ref %d \r\n",
-             worker_data->worker_id, worker, ref_worker_data->worker_id,
-             ref_buf->frame_worker_owner, row, ref_buf->row);
-    }
-#endif
-
-    av1_frameworker_lock_stats(ref_worker);
-    while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
-           ref_buf->buf.corrupted != 1) {
-      pthread_cond_wait(&ref_worker_data->stats_cond,
-                        &ref_worker_data->stats_mutex);
-    }
-
-    if (ref_buf->buf.corrupted == 1) {
-      FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-      av1_frameworker_unlock_stats(ref_worker);
-      aom_internal_error(&worker_data->pbi->common.error,
-                         AOM_CODEC_CORRUPT_FRAME,
-                         "Worker %p failed to decode frame", worker);
-    }
-    av1_frameworker_unlock_stats(ref_worker);
-  }
-#else
-  (void)worker;
-  (void)ref_buf;
-  (void)row;
-  (void)ref_buf;
-#endif  // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row) {
-#if CONFIG_MULTITHREAD
-  AVxWorker *worker = buf->frame_worker_owner;
-
-#ifdef DEBUG_THREAD
-  {
-    FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
-    printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
-           buf->frame_worker_owner, row);
-  }
-#endif
-
-  av1_frameworker_lock_stats(worker);
-  buf->row = row;
-  av1_frameworker_signal_stats(worker);
-  av1_frameworker_unlock_stats(worker);
-#else
-  (void)buf;
-  (void)row;
-#endif  // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
-                                  AVxWorker *const src_worker) {
-#if CONFIG_MULTITHREAD
-  FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
-  FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
-  AV1_COMMON *const src_cm = &src_worker_data->pbi->common;
-  AV1_COMMON *const dst_cm = &dst_worker_data->pbi->common;
-  int i;
-
-  // Wait until source frame's context is ready.
-  av1_frameworker_lock_stats(src_worker);
-  while (!src_worker_data->frame_context_ready) {
-    pthread_cond_wait(&src_worker_data->stats_cond,
-                      &src_worker_data->stats_mutex);
-  }
-
-  dst_cm->last_frame_seg_map = src_cm->seg.enabled
-                                   ? src_cm->current_frame_seg_map
-                                   : src_cm->last_frame_seg_map;
-  dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
-  av1_frameworker_unlock_stats(src_worker);
-
-  dst_cm->seq_params.bit_depth = src_cm->seq_params.bit_depth;
-  dst_cm->seq_params.use_highbitdepth = src_cm->seq_params.use_highbitdepth;
-  // TODO(zoeliu): To handle parallel decoding
-  dst_cm->prev_frame =
-      src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame;
-  dst_cm->last_width =
-      !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width;
-  dst_cm->last_height =
-      !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height;
-  dst_cm->seq_params.subsampling_x = src_cm->seq_params.subsampling_x;
-  dst_cm->seq_params.subsampling_y = src_cm->seq_params.subsampling_y;
-  dst_cm->frame_type = src_cm->frame_type;
-  dst_cm->last_show_frame = !src_cm->show_existing_frame
-                                ? src_cm->show_frame
-                                : src_cm->last_show_frame;
-  for (i = 0; i < REF_FRAMES; ++i)
-    dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
-
-  memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
-         (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
-  dst_cm->lf.sharpness_level = src_cm->lf.sharpness_level;
-  dst_cm->lf.filter_level[0] = src_cm->lf.filter_level[0];
-  dst_cm->lf.filter_level[1] = src_cm->lf.filter_level[1];
-  memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, REF_FRAMES);
-  memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
-  dst_cm->seg = src_cm->seg;
-  memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
-         FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
-#else
-  (void)dst_worker;
-  (void)src_worker;
-#endif  // CONFIG_MULTITHREAD
-}
diff --git a/av1/decoder/dthread.h b/av1/decoder/dthread.h
index 1d264b07..c1b8719 100644
--- a/av1/decoder/dthread.h
+++ b/av1/decoder/dthread.h
@@ -41,40 +41,10 @@
   void *user_priv;
   int worker_id;
   int received_frame;
-
-  // scratch_buffer is used in frame parallel mode only.
-  // It is used to make a copy of the compressed data.
-  uint8_t *scratch_buffer;
-  size_t scratch_buffer_size;
-
-#if CONFIG_MULTITHREAD
-  pthread_mutex_t stats_mutex;
-  pthread_cond_t stats_cond;
-#endif
-
   int frame_context_ready;  // Current frame's context is ready to read.
   int frame_decoded;        // Finished decoding current frame.
 } FrameWorkerData;
 
-void av1_frameworker_lock_stats(AVxWorker *const worker);
-void av1_frameworker_unlock_stats(AVxWorker *const worker);
-void av1_frameworker_signal_stats(AVxWorker *const worker);
-
-// Wait until ref_buf has been decoded to row in real pixel unit.
-// Note: worker may already finish decoding ref_buf and release it in order to
-// start decoding next frame. So need to check whether worker is still decoding
-// ref_buf.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
-                          int row);
-
-// FrameWorker broadcasts its decoding progress so other workers that are
-// waiting on it can resume decoding.
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row);
-
-// Copy necessary decoding context from src worker to dst worker.
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
-                                  AVxWorker *const src_worker);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif