Remove code used by frame_parallel_decode option.
Commit 5521a18fcd6c0248119f34a2584c174f183f4163 removed the
frame_parallel_decode option on the decoder side:
https://aomedia-review.googlesource.com/c/aom/+/50001
Remove more code used by the frame_parallel_decode option on the decoder
side.
Remove the AOM_CODEC_CAP_FRAME_THREADING flag. Change the value of the
AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER flag to 0x200000.
Remove next_submit_worker_id, last_submit_worker_id, and
available_threads. This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/557041.
Remove dthread.c and the related prototypes in dthread.h. This is the
counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560621.
Remove the frame_worker_owner, 'row', and 'col' fields of RefCntBuffer.
This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560622.
Reduce FRAME_BUFFERS by 3. This is the counterpart of
https://chromium-review.googlesource.com/c/webm/libvpx/+/560620.
BUG=aomedia:2110
Change-Id: Iaec00020e5b2926507fea849fc3ab976042e1f81
diff --git a/aom/aom_decoder.h b/aom/aom_decoder.h
index 212b3b2..70420c3 100644
--- a/aom/aom_decoder.h
+++ b/aom/aom_decoder.h
@@ -63,10 +63,8 @@
*
* The available flags are specified by AOM_CODEC_USE_* defines.
*/
-/*!\brief Can support frame-based multi-threading */
-#define AOM_CODEC_CAP_FRAME_THREADING 0x200000
/*!brief Can support external frame buffers */
-#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000
+#define AOM_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x200000
#define AOM_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */
diff --git a/apps/aomdec.c b/apps/aomdec.c
index 848fb35..4b7fc40 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c
@@ -779,7 +779,7 @@
aom_usec_timer_start(&timer);
if (flush_decoder) {
- // Flush the decoder in frame parallel decode.
+ // Flush the decoder.
if (aom_codec_decode(&decoder, NULL, 0, NULL)) {
warn("Failed to flush decoder: %s", aom_codec_error(&decoder));
}
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 3a7cd7e..13bdee9 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -100,7 +100,6 @@
"${AOM_ROOT}/av1/decoder/decodetxb.h"
"${AOM_ROOT}/av1/decoder/detokenize.c"
"${AOM_ROOT}/av1/decoder/detokenize.h"
- "${AOM_ROOT}/av1/decoder/dthread.c"
"${AOM_ROOT}/av1/decoder/dthread.h"
"${AOM_ROOT}/av1/decoder/obu.h"
"${AOM_ROOT}/av1/decoder/obu.c")
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 4a8817c..904d12f 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -58,12 +58,13 @@
int operating_point;
int output_all_layers;
+ // TODO(wtc): This can be simplified. num_frame_workers is always 1, and
+ // next_output_worker_id is always 0. The frame_workers array of size 1 can
+ // be replaced by a single AVxWorker.
AVxWorker *frame_workers;
int num_frame_workers;
- int next_submit_worker_id;
- int last_submit_worker_id;
int next_output_worker_id;
- int available_threads;
+
aom_image_t *image_with_grain[MAX_NUM_SPATIAL_LAYERS];
int need_resync; // wait for key/intra-only frame
// BufferPool that holds all reference frames. Shared by all the FrameWorkers.
@@ -132,11 +133,6 @@
av1_remove_common(&frame_worker_data->pbi->common);
av1_free_restoration_buffers(&frame_worker_data->pbi->common);
av1_decoder_remove(frame_worker_data->pbi);
- aom_free(frame_worker_data->scratch_buffer);
-#if CONFIG_MULTITHREAD
- pthread_mutex_destroy(&frame_worker_data->stats_mutex);
- pthread_cond_destroy(&frame_worker_data->stats_cond);
-#endif
aom_free(frame_worker_data);
}
#if CONFIG_MULTITHREAD
@@ -371,14 +367,11 @@
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
ctx->last_show_frame = -1;
- ctx->next_submit_worker_id = 0;
- ctx->last_submit_worker_id = 0;
ctx->next_output_worker_id = 0;
ctx->need_resync = 1;
ctx->num_frame_workers = 1;
if (ctx->num_frame_workers > MAX_DECODE_THREADS)
ctx->num_frame_workers = MAX_DECODE_THREADS;
- ctx->available_threads = ctx->num_frame_workers;
ctx->flushed = 0;
ctx->buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
@@ -415,23 +408,9 @@
return AOM_CODEC_MEM_ERROR;
}
frame_worker_data->pbi->common.options = &ctx->cfg.cfg;
- frame_worker_data->pbi->frame_worker_owner = worker;
frame_worker_data->worker_id = i;
- frame_worker_data->scratch_buffer = NULL;
- frame_worker_data->scratch_buffer_size = 0;
frame_worker_data->frame_context_ready = 0;
frame_worker_data->received_frame = 0;
-#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
- return AOM_CODEC_MEM_ERROR;
- }
-
- if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
- set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
- return AOM_CODEC_MEM_ERROR;
- }
-#endif
frame_worker_data->pbi->allow_lowbitdepth = ctx->cfg.allow_lowbitdepth;
// If decoding in serial mode, FrameWorker thread could create tile worker
@@ -684,7 +663,6 @@
if (winterface->sync(worker)) {
// Check if worker has received any frames.
if (frame_worker_data->received_frame == 1) {
- ++ctx->available_threads;
frame_worker_data->received_frame = 0;
check_resync(ctx, frame_worker_data->pbi);
}
@@ -760,11 +738,10 @@
} else {
// Decoding failed. Release the worker thread.
frame_worker_data->received_frame = 0;
- ++ctx->available_threads;
ctx->need_resync = 1;
if (ctx->flushed != 1) return NULL;
}
- } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
+ } while (ctx->next_output_worker_id != 0);
}
return NULL;
}
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 869c06e..8a991e3 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -66,13 +66,10 @@
#define REF_FRAMES_LOG2 3
#define REF_FRAMES (1 << REF_FRAMES_LOG2)
-// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
-// in parallel, 3 for scaled references on the encoder.
-// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
-// of framebuffers.
+// 1 scratch frame for the new frame, 3 for scaled references on the encoder.
// TODO(jkoleszar): These 3 extra references could probably come from the
// normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 7)
+#define FRAME_BUFFERS (REF_FRAMES + 4)
// 4 frame filter levels: y plane vertical, y plane horizontal,
// u plane, and v plane
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index d906dc6..cf600d1 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -133,17 +133,6 @@
hash_table hash_table;
uint8_t intra_only;
FRAME_TYPE frame_type;
- // The Following variables will only be used in frame parallel decode.
-
- // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
- // that no FrameWorker owns, or is decoding, this buffer.
- AVxWorker *frame_worker_owner;
-
- // row and col indicate which position frame has been decoded to in real
- // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
- // when the frame is fully decoded.
- int row;
- int col;
// Inter frame reference frame delta for loop filter
int8_t ref_deltas[REF_FRAMES];
@@ -156,6 +145,8 @@
// Protect BufferPool from being accessed by several FrameWorkers at
// the same time during frame parallel decode.
// TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
+// TODO(wtc): Remove this. See
+// https://chromium-review.googlesource.com/c/webm/libvpx/+/560630.
#if CONFIG_MULTITHREAD
pthread_mutex_t pool_mutex;
#endif
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index b715b6e..dcd8dcc 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -169,7 +169,6 @@
// the same.
RefCntBuffer *cur_buf; // Current decoding frame buffer.
- AVxWorker *frame_worker_owner; // frame_worker that owns this pbi.
AVxWorker lf_worker;
AV1LfSync lf_row_sync;
AV1LrSync lr_row_sync;
diff --git a/av1/decoder/dthread.c b/av1/decoder/dthread.c
deleted file mode 100644
index 3946c78..0000000
--- a/av1/decoder/dthread.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_config.h"
-
-#include "aom_mem/aom_mem.h"
-#include "av1/common/reconinter.h"
-#include "av1/decoder/dthread.h"
-#include "av1/decoder/decoder.h"
-
-// #define DEBUG_THREAD
-
-// TODO(hkuang): Clean up all the #ifdef in this file.
-void av1_frameworker_lock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
- pthread_mutex_lock(&worker_data->stats_mutex);
-#else
- (void)worker;
-#endif
-}
-
-void av1_frameworker_unlock_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
- pthread_mutex_unlock(&worker_data->stats_mutex);
-#else
- (void)worker;
-#endif
-}
-
-void av1_frameworker_signal_stats(AVxWorker *const worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const worker_data = worker->data1;
-
-// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper.
-#if defined(_WIN32) && !HAVE_PTHREAD_H
- pthread_cond_signal(&worker_data->stats_cond);
-#else
- pthread_cond_broadcast(&worker_data->stats_cond);
-#endif
-
-#else
- (void)worker;
-#endif
-}
-
-// This macro prevents thread_sanitizer from reporting known concurrent writes.
-#if defined(__has_feature)
-#if __has_feature(thread_sanitizer)
-#define BUILDING_WITH_TSAN
-#endif
-#endif
-
-// TODO(hkuang): Remove worker parameter as it is only used in debug code.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
- int row) {
-#if CONFIG_MULTITHREAD
- if (!ref_buf) return;
-
-#ifndef BUILDING_WITH_TSAN
- // The following line of code will get harmless tsan error but it is the key
- // to get best performance.
- if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
-#endif
-
- {
- // Find the worker thread that owns the reference frame. If the reference
- // frame has been fully decoded, it may not have owner.
- AVxWorker *const ref_worker = ref_buf->frame_worker_owner;
- FrameWorkerData *const ref_worker_data =
- (FrameWorkerData *)ref_worker->data1;
- const AV1Decoder *const pbi = ref_worker_data->pbi;
-
-#ifdef DEBUG_THREAD
- {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n",
- worker_data->worker_id, worker, ref_worker_data->worker_id,
- ref_buf->frame_worker_owner, row, ref_buf->row);
- }
-#endif
-
- av1_frameworker_lock_stats(ref_worker);
- while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
- ref_buf->buf.corrupted != 1) {
- pthread_cond_wait(&ref_worker_data->stats_cond,
- &ref_worker_data->stats_mutex);
- }
-
- if (ref_buf->buf.corrupted == 1) {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- av1_frameworker_unlock_stats(ref_worker);
- aom_internal_error(&worker_data->pbi->common.error,
- AOM_CODEC_CORRUPT_FRAME,
- "Worker %p failed to decode frame", worker);
- }
- av1_frameworker_unlock_stats(ref_worker);
- }
-#else
- (void)worker;
- (void)ref_buf;
- (void)row;
- (void)ref_buf;
-#endif // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row) {
-#if CONFIG_MULTITHREAD
- AVxWorker *worker = buf->frame_worker_owner;
-
-#ifdef DEBUG_THREAD
- {
- FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
- printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
- buf->frame_worker_owner, row);
- }
-#endif
-
- av1_frameworker_lock_stats(worker);
- buf->row = row;
- av1_frameworker_signal_stats(worker);
- av1_frameworker_unlock_stats(worker);
-#else
- (void)buf;
- (void)row;
-#endif // CONFIG_MULTITHREAD
-}
-
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
- AVxWorker *const src_worker) {
-#if CONFIG_MULTITHREAD
- FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
- FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
- AV1_COMMON *const src_cm = &src_worker_data->pbi->common;
- AV1_COMMON *const dst_cm = &dst_worker_data->pbi->common;
- int i;
-
- // Wait until source frame's context is ready.
- av1_frameworker_lock_stats(src_worker);
- while (!src_worker_data->frame_context_ready) {
- pthread_cond_wait(&src_worker_data->stats_cond,
- &src_worker_data->stats_mutex);
- }
-
- dst_cm->last_frame_seg_map = src_cm->seg.enabled
- ? src_cm->current_frame_seg_map
- : src_cm->last_frame_seg_map;
- dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
- av1_frameworker_unlock_stats(src_worker);
-
- dst_cm->seq_params.bit_depth = src_cm->seq_params.bit_depth;
- dst_cm->seq_params.use_highbitdepth = src_cm->seq_params.use_highbitdepth;
- // TODO(zoeliu): To handle parallel decoding
- dst_cm->prev_frame =
- src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame;
- dst_cm->last_width =
- !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width;
- dst_cm->last_height =
- !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height;
- dst_cm->seq_params.subsampling_x = src_cm->seq_params.subsampling_x;
- dst_cm->seq_params.subsampling_y = src_cm->seq_params.subsampling_y;
- dst_cm->frame_type = src_cm->frame_type;
- dst_cm->last_show_frame = !src_cm->show_existing_frame
- ? src_cm->show_frame
- : src_cm->last_show_frame;
- for (i = 0; i < REF_FRAMES; ++i)
- dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
-
- memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
- (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
- dst_cm->lf.sharpness_level = src_cm->lf.sharpness_level;
- dst_cm->lf.filter_level[0] = src_cm->lf.filter_level[0];
- dst_cm->lf.filter_level[1] = src_cm->lf.filter_level[1];
- memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, REF_FRAMES);
- memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
- dst_cm->seg = src_cm->seg;
- memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
- FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
-#else
- (void)dst_worker;
- (void)src_worker;
-#endif // CONFIG_MULTITHREAD
-}
diff --git a/av1/decoder/dthread.h b/av1/decoder/dthread.h
index 1d264b07..c1b8719 100644
--- a/av1/decoder/dthread.h
+++ b/av1/decoder/dthread.h
@@ -41,40 +41,10 @@
void *user_priv;
int worker_id;
int received_frame;
-
- // scratch_buffer is used in frame parallel mode only.
- // It is used to make a copy of the compressed data.
- uint8_t *scratch_buffer;
- size_t scratch_buffer_size;
-
-#if CONFIG_MULTITHREAD
- pthread_mutex_t stats_mutex;
- pthread_cond_t stats_cond;
-#endif
-
int frame_context_ready; // Current frame's context is ready to read.
int frame_decoded; // Finished decoding current frame.
} FrameWorkerData;
-void av1_frameworker_lock_stats(AVxWorker *const worker);
-void av1_frameworker_unlock_stats(AVxWorker *const worker);
-void av1_frameworker_signal_stats(AVxWorker *const worker);
-
-// Wait until ref_buf has been decoded to row in real pixel unit.
-// Note: worker may already finish decoding ref_buf and release it in order to
-// start decoding next frame. So need to check whether worker is still decoding
-// ref_buf.
-void av1_frameworker_wait(AVxWorker *const worker, RefCntBuffer *const ref_buf,
- int row);
-
-// FrameWorker broadcasts its decoding progress so other workers that are
-// waiting on it can resume decoding.
-void av1_frameworker_broadcast(RefCntBuffer *const buf, int row);
-
-// Copy necessary decoding context from src worker to dst worker.
-void av1_frameworker_copy_context(AVxWorker *const dst_worker,
- AVxWorker *const src_worker);
-
#ifdef __cplusplus
} // extern "C"
#endif