Merge "Renaming "mbskip" to "skip"."
diff --git a/ivfenc.c b/ivfenc.c
index 0041ff0..4a97c42 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -10,7 +10,6 @@
#include "./ivfenc.h"
-#include "./tools_common.h"
#include "vpx/vpx_encoder.h"
#include "vpx_ports/mem_ops.h"
@@ -24,33 +23,31 @@
header[1] = 'K';
header[2] = 'I';
header[3] = 'F';
- mem_put_le16(header + 4, 0); /* version */
- mem_put_le16(header + 6, 32); /* headersize */
- mem_put_le32(header + 8, fourcc); /* four CC */
- mem_put_le16(header + 12, cfg->g_w); /* width */
- mem_put_le16(header + 14, cfg->g_h); /* height */
- mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
- mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
- mem_put_le32(header + 24, frame_cnt); /* length */
- mem_put_le32(header + 28, 0); /* unused */
+ mem_put_le16(header + 4, 0); // version
+ mem_put_le16(header + 6, 32); // header size
+ mem_put_le32(header + 8, fourcc); // fourcc
+ mem_put_le16(header + 12, cfg->g_w); // width
+ mem_put_le16(header + 14, cfg->g_h); // height
+ mem_put_le32(header + 16, cfg->g_timebase.den); // rate
+ mem_put_le32(header + 20, cfg->g_timebase.num); // scale
+ mem_put_le32(header + 24, frame_cnt); // length
+ mem_put_le32(header + 28, 0); // unused
- (void) fwrite(header, 1, 32, outfile);
+ fwrite(header, 1, 32, outfile);
}
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt) {
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) {
char header[12];
- vpx_codec_pts_t pts;
- pts = pkt->data.frame.pts;
- mem_put_le32(header, (int)pkt->data.frame.sz);
- mem_put_le32(header + 4, pts & 0xFFFFFFFF);
- mem_put_le32(header + 8, pts >> 32);
-
- (void) fwrite(header, 1, 12, outfile);
+ mem_put_le32(header, (int)frame_size);
+ mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
+ mem_put_le32(header + 8, (int)(pts >> 32));
+ fwrite(header, 1, 12, outfile);
}
-void ivf_write_frame_size(FILE *outfile, size_t size) {
+void ivf_write_frame_size(FILE *outfile, size_t frame_size) {
char header[4];
- mem_put_le32(header, (int)size);
- (void) fwrite(header, 1, 4, outfile);
+
+ mem_put_le32(header, (int)frame_size);
+ fwrite(header, 1, 4, outfile);
}
diff --git a/ivfenc.h b/ivfenc.h
index b486bc8..6623687 100644
--- a/ivfenc.h
+++ b/ivfenc.h
@@ -23,8 +23,10 @@
const struct vpx_codec_enc_cfg *cfg,
uint32_t fourcc,
int frame_cnt);
-void ivf_write_frame_header(FILE *outfile, const struct vpx_codec_cx_pkt *pkt);
-void ivf_write_frame_size(FILE *outfile, size_t size);
+
+void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size);
+
+void ivf_write_frame_size(FILE *outfile, size_t frame_size);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/libs.mk b/libs.mk
index 470066a..cc40451 100644
--- a/libs.mk
+++ b/libs.mk
@@ -214,8 +214,11 @@
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
ifeq ($(CONFIG_MSVS),yes)
+obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
+ @cp $^ $@
+
+obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
- @cp $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat .
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
--exe \
diff --git a/test/i420_video_source.h b/test/i420_video_source.h
index 2bf2a03..c3315f9 100644
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -52,7 +52,7 @@
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
<< file_name_;
if (start_) {
- fseek(input_file_, raw_sz_ * start_, SEEK_SET);
+ fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
}
frame_ = start_;
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 3211c5c..9e242a2 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -59,7 +59,7 @@
int16_t *src_diff = be.src_diff;
for (int r = 0; r < kBlockHeight; ++r) {
for (int c = 0; c < kBlockWidth; ++c) {
- src_diff[c] = static_cast<int16_t>(0xa5a5);
+ src_diff[c] = static_cast<int16_t>(0xa5a5u);
}
src_diff += kDiffPredStride;
}
diff --git a/test/svc_test.cc b/test/svc_test.cc
index 3ddd9c1..75659d5 100644
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -234,7 +234,7 @@
video.Begin();
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
EXPECT_EQ(VPX_CODEC_OK, res);
const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
@@ -262,7 +262,7 @@
video.Begin();
// This frame is a keyframe.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
@@ -275,7 +275,7 @@
video.Next();
// This is a P-frame.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
@@ -288,7 +288,7 @@
video.Next();
// This is a P-frame.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
- video.duration(), VPX_DL_REALTIME);
+ video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 6daf69e..a287731 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -574,3 +574,5 @@
84c1599298aac78f2fc05ae2274575d10569dfa0 vp90-2-09-aq2.webm.md5
55fc55ed73d578ed60fad05692579873f8bad758 vp90-2-09-lf_deltas.webm
54638c38009198c38c8f3b25c182b709b6c1fd2e vp90-2-09-lf_deltas.webm.md5
+510d95f3beb3b51c572611fdaeeece12277dac30 vp90-2-10-show-existing-frame.webm
+14d631096f4bfa2d71f7f739aec1448fb3c33bad vp90-2-10-show-existing-frame.webm.md5
diff --git a/test/test.mk b/test/test.mk
index cb62615..a65decf 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -676,6 +676,8 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-lf_deltas.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5
ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
# BBB VP9 streams
diff --git a/test/test_vector_test.cc b/test/test_vector_test.cc
index 4adf9af..53b7636 100644
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -35,7 +35,7 @@
void OpenMD5File(const std::string& md5_file_name_) {
md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
- ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: "
+ ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
<< md5_file_name_;
}
diff --git a/test/test_vectors.cc b/test/test_vectors.cc
index 16298d0..3227f52 100644
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -160,6 +160,7 @@
"vp90-2-02-size-lf-1920x1080.webm",
"vp90-2-09-aq2.webm",
"vp90-2-09-lf_deltas.webm",
+ "vp90-2-10-show-existing-frame.webm",
#if CONFIG_NON420
"vp91-2-04-yv444.webm"
#endif
diff --git a/test/test_vectors.h b/test/test_vectors.h
index 5f62e99..eb592de 100644
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -22,9 +22,9 @@
#if CONFIG_VP9_DECODER
#if CONFIG_NON420
-const int kNumVp9TestVectors = 216;
+const int kNumVp9TestVectors = 217;
#else
-const int kNumVp9TestVectors = 215;
+const int kNumVp9TestVectors = 216;
#endif
extern const char *kVP9TestVectors[kNumVp9TestVectors];
diff --git a/test/vp8_boolcoder_test.cc b/test/vp8_boolcoder_test.cc
index fa7ee6e..7c6c601 100644
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -43,7 +43,7 @@
void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
uint8_t *output, int count) {
- int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
+ const size_t offset = input - reinterpret_cast<uint8_t*>(decrypt_state);
for (int i = 0; i < count; i++) {
output[i] = input[i] ^ secret_key[(offset + i) & 15];
}
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 2266e0e..dd304c9 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -16,26 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
-// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
-// Each 1 bit represents a position in which we want to apply the loop filter.
-// Left_ entries refer to whether we apply a filter on the border to the
-// left of the block. Above_ entries refer to whether or not to apply a
-// filter on the above border. Int_ entries refer to whether or not to
-// apply borders on the 4x4 edges within the 8x8 block that each bit
-// represents.
-// Since each transform is accompanied by a potentially different type of
-// loop filter there is a different entry in the array for each transform size.
-typedef struct {
- uint64_t left_y[TX_SIZES];
- uint64_t above_y[TX_SIZES];
- uint64_t int_4x4_y;
- uint16_t left_uv[TX_SIZES];
- uint16_t above_uv[TX_SIZES];
- uint16_t int_4x4_uv;
- uint8_t lfl_y[64];
- uint8_t lfl_uv[16];
-} LOOP_FILTER_MASK;
-
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
@@ -638,9 +618,9 @@
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
-static void setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi_8x8, const int mode_info_stride,
- LOOP_FILTER_MASK *lfm) {
+void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm) {
int idx_32, idx_16, idx_8;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
MODE_INFO **mip = mi_8x8;
@@ -1069,10 +1049,10 @@
}
#endif
-static void filter_block_plane(VP9_COMMON *const cm,
- struct macroblockd_plane *const plane,
- int mi_row,
- LOOP_FILTER_MASK *lfm) {
+void vp9_filter_block_plane(VP9_COMMON *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t* const dst0 = dst->buf;
int r, c;
@@ -1244,14 +1224,14 @@
#if CONFIG_NON420
if (use_420)
#endif
- setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
- &lfm);
+ vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col,
+ cm->mode_info_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
#if CONFIG_NON420
if (use_420)
#endif
- filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+ vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
#if CONFIG_NON420
else
filter_block_plane_non420(cm, &xd->plane[plane], mi_8x8 + mi_col,
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 43373f4..668e898 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -60,9 +60,42 @@
uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS];
} loop_filter_info_n;
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block. Above_ entries refer to whether or not to apply a
+// filter on the above border. Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+ uint64_t left_y[TX_SIZES];
+ uint64_t above_y[TX_SIZES];
+ uint64_t int_4x4_y;
+ uint16_t left_uv[TX_SIZES];
+ uint16_t above_uv[TX_SIZES];
+ uint16_t int_4x4_uv;
+ uint8_t lfl_y[64];
+ uint8_t lfl_uv[16];
+} LOOP_FILTER_MASK;
+
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
+struct VP9LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp9_setup_mask(struct VP9Common *const cm,
+ const int mi_row, const int mi_col,
+ MODE_INFO **mi_8x8, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm);
+
+void vp9_filter_block_plane(struct VP9Common *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row,
+ LOOP_FILTER_MASK *lfm);
void vp9_loop_filter_init(struct VP9Common *cm);
@@ -90,6 +123,9 @@
int start;
int stop;
int y_only;
+
+ struct VP9LfSyncData *lf_sync;
+ int num_lf_workers;
} LFWorkerData;
// Operates on the rows described by LFWorkerData passed as 'arg1'.
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h
index 9df76de..564e419 100644
--- a/vp9/common/vp9_onyx.h
+++ b/vp9/common/vp9_onyx.h
@@ -55,6 +55,7 @@
MODE_FIRSTPASS = 0x3,
MODE_SECONDPASS = 0x4,
MODE_SECONDPASS_BEST = 0x5,
+ MODE_REALTIME = 0x6,
} MODE;
typedef enum {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 3b2b48c..a4df051 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -33,18 +33,12 @@
#include "vp9/decoder/vp9_detokenize.h"
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_dsubexp.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_onyxd_int.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_reader.h"
#include "vp9/decoder/vp9_thread.h"
-typedef struct TileWorkerData {
- VP9_COMMON *cm;
- vp9_reader bit_reader;
- DECLARE_ALIGNED(16, MACROBLOCKD, xd);
- DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
-} TileWorkerData;
-
static int read_be32(const uint8_t *p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
@@ -982,7 +976,6 @@
++pbi->num_tile_workers;
vp9_worker_init(worker);
- worker->hook = (VP9WorkerHook)tile_worker_hook;
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
@@ -993,6 +986,11 @@
}
}
+ // Reset tile decoding hook
+ for (n = 0; n < pbi->num_tile_workers; ++n) {
+ pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
+ }
+
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(pbi->above_context[0], 0,
@@ -1392,9 +1390,6 @@
*p_data_end = decode_tiles(pbi, data + first_partition_size);
}
- cm->last_width = cm->width;
- cm->last_height = cm->height;
-
new_fb->corrupted |= xd->corrupted;
if (!pbi->decoded_key_frame) {
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
new file mode 100644
index 0000000..280e351
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_onyxd_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+ const int kMaxTryLocks = 4000;
+ int locked = 0;
+ int i;
+
+ for (i = 0; i < kMaxTryLocks; ++i) {
+ if (!pthread_mutex_trylock(mutex)) {
+ locked = 1;
+ break;
+ }
+ }
+
+ if (!locked)
+ pthread_mutex_lock(mutex);
+}
+#endif // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+
+ if (r && !(c & (nsync - 1))) {
+ mutex_lock(&lf_sync->mutex_[r - 1]);
+
+ while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+ pthread_cond_wait(&lf_sync->cond_[r - 1],
+ &lf_sync->mutex_[r - 1]);
+ }
+ pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+#endif // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
+ const int sb_cols) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+ int cur;
+ // Only signal when there are enough filtered SB for next row to run.
+ int sig = 1;
+
+ if (c < sb_cols - 1) {
+ cur = c;
+ if (c % nsync)
+ sig = 0;
+ } else {
+ cur = sb_cols + nsync;
+ }
+
+ if (sig) {
+ mutex_lock(&lf_sync->mutex_[r]);
+
+ lf_sync->cur_sb_col[r] = cur;
+
+ pthread_cond_signal(&lf_sync->cond_[r]);
+ pthread_mutex_unlock(&lf_sync->mutex_[r]);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+#endif // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
+ VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ int start, int stop, int y_only,
+ VP9LfSync *const lf_sync, int num_lf_workers) {
+ const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int r, c; // SB row and col
+ LOOP_FILTER_MASK lfm;
+ const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+
+ for (r = start; r < stop; r += num_lf_workers) {
+ const int mi_row = r << MI_BLOCK_SIZE_LOG2;
+ MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride;
+
+ for (c = 0; c < sb_cols; ++c) {
+ const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+ int plane;
+
+ sync_read(lf_sync, r, c);
+
+ setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
+ vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride,
+ &lfm);
+
+ for (plane = 0; plane < num_planes; ++plane) {
+ vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
+ }
+
+ sync_write(lf_sync, r, c, sb_cols);
+ }
+ }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(void *arg1, void *arg2) {
+ TileWorkerData *const tile_data = (TileWorkerData*)arg1;
+ LFWorkerData *const lf_data = &tile_data->lfdata;
+
+ loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, &lf_data->xd,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_data->lf_sync, lf_data->num_lf_workers);
+ return 1;
+}
+
+// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
+// threads.
+void vp9_loop_filter_frame_mt(VP9D_COMP *pbi,
+ VP9_COMMON *cm,
+ MACROBLOCKD *xd,
+ int frame_filter_level,
+ int y_only, int partial) {
+ // Number of superblock rows and cols
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ int i;
+
+ // Allocate memory used in thread synchronization.
+ // This always needs to be done even if frame_filter_level is 0.
+ if (!cm->current_video_frame || cm->last_height != cm->height) {
+ VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+ if (cm->last_height != cm->height) {
+ const int aligned_last_height =
+ ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
+ const int last_sb_rows =
+ mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >>
+ MI_BLOCK_SIZE_LOG2;
+
+ vp9_loop_filter_dealloc(lf_sync, last_sb_rows);
+ }
+
+ vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width);
+ }
+
+ if (!frame_filter_level) return;
+
+ vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
+ sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+
+ // Set up loopfilter thread data.
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ VP9Worker *const worker = &pbi->tile_workers[i];
+ TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
+ LFWorkerData *const lf_data = &tile_data->lfdata;
+
+ worker->hook = (VP9WorkerHook)loop_filter_row_worker;
+
+ // Loopfilter data
+ lf_data->frame_buffer = get_frame_new_buffer(cm);
+ lf_data->cm = cm;
+ lf_data->xd = pbi->mb;
+ lf_data->start = i;
+ lf_data->stop = sb_rows;
+ lf_data->y_only = y_only; // always do all planes in decoder
+
+ lf_data->lf_sync = &pbi->lf_row_sync;
+ lf_data->num_lf_workers = pbi->num_tile_workers;
+
+ // Start loopfiltering
+ if (i == pbi->num_tile_workers - 1) {
+ vp9_worker_execute(worker);
+ } else {
+ vp9_worker_launch(worker);
+ }
+ }
+
+ // Wait till all rows are finished
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ vp9_worker_sync(&pbi->tile_workers[i]);
+ }
+}
+
+// Set up nsync by width.
+static int get_sync_range(int width) {
+ // nsync numbers are picked by testing. For example, for 4k
+ // video, using 4 gives best performance.
+ if (width < 640)
+ return 1;
+ else if (width <= 1280)
+ return 2;
+ else if (width <= 4096)
+ return 4;
+ else
+ return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
+ int width) {
+#if CONFIG_MULTITHREAD
+ int i;
+
+ CHECK_MEM_ERROR(cm, lf_sync->mutex_,
+ vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->cond_,
+ vpx_malloc(sizeof(*lf_sync->cond_) * rows));
+
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ pthread_cond_init(&lf_sync->cond_[i], NULL);
+ }
+#endif // CONFIG_MULTITHREAD
+
+ CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
+ vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+ // Set up nsync.
+ lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
+#if CONFIG_MULTITHREAD
+ if (lf_sync != NULL) {
+ int i;
+
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ pthread_cond_destroy(&lf_sync->cond_[i]);
+ }
+
+ vpx_free(lf_sync->mutex_);
+ vpx_free(lf_sync->cond_);
+ vpx_free(lf_sync->cur_sb_col);
+ }
+#else
+ (void)rows;
+ if (lf_sync != NULL)
+ vpx_free(lf_sync->cur_sb_col);
+#endif // CONFIG_MULTITHREAD
+}
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
new file mode 100644
index 0000000..4478354
--- /dev/null
+++ b/vp9/decoder/vp9_dthread.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_DECODER_VP9_DTHREAD_H_
+#define VP9_DECODER_VP9_DTHREAD_H_
+
+#include "./vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
+#include "vp9/decoder/vp9_reader.h"
+#include "vp9/decoder/vp9_thread.h"
+
+struct macroblockd;
+struct VP9Common;
+struct VP9Decompressor;
+
+typedef struct TileWorkerData {
+ struct VP9Common *cm;
+ vp9_reader bit_reader;
+ DECLARE_ALIGNED(16, struct macroblockd, xd);
+ DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]);
+
+ // Row-based parallel loopfilter data
+ LFWorkerData lfdata;
+} TileWorkerData;
+
+// Loopfilter row synchronization
+typedef struct VP9LfSyncData {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *mutex_;
+ pthread_cond_t *cond_;
+#endif
+ // Allocate memory to store the loop-filtered superblock index in each row.
+ int *cur_sb_col;
+ // The optimal sync_range for different resolution and platform should be
+ // determined by testing. Currently, it is chosen to be a power-of-2 number.
+ int sync_range;
+} VP9LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+ int rows, int width);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi,
+ struct VP9Common *cm,
+ struct macroblockd *xd,
+ int frame_filter_level,
+ int y_only, int partial);
+
+#endif // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 75d52c2..c14a05d 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -27,6 +27,7 @@
#include "vpx_ports/vpx_timer.h"
#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_detokenize.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "./vpx_scale_rtcd.h"
#define WRITE_RECON_BUFFER 0
@@ -177,6 +178,16 @@
vpx_free(worker->data2);
}
vpx_free(pbi->tile_workers);
+
+ if (pbi->num_tile_workers) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int sb_rows =
+ mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+
+ vp9_loop_filter_dealloc(lf_sync, sb_rows);
+ }
+
vpx_free(pbi->mi_streams);
vpx_free(pbi->above_context[0]);
vpx_free(pbi->above_seg_context);
@@ -370,7 +381,13 @@
#endif
if (!pbi->do_loopfilter_inline) {
- vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0);
+ // If multiple threads are used to decode tiles, then we use those threads
+ // to do parallel loopfiltering.
+ if (pbi->num_tile_workers) {
+ vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+ } else {
+ vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+ }
}
#if WRITE_RECON_BUFFER == 2
@@ -390,7 +407,11 @@
vp9_clear_system_state();
- cm->last_show_frame = cm->show_frame;
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
+ if (!cm->show_existing_frame)
+ cm->last_show_frame = cm->show_frame;
if (cm->show_frame) {
if (!cm->show_existing_frame) {
// current mip will be the prev_mip for the next frame
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 242c600..6c6c239 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_onyxd.h"
#include "vp9/decoder/vp9_thread.h"
@@ -49,6 +50,8 @@
VP9Worker *tile_workers;
int num_tile_workers;
+ VP9LfSync lf_row_sync;
+
/* Each tile column has its own MODE_INFO stream. This array indexes them by
tile column index. */
MODE_INFO **mi_streams;
diff --git a/vp9/decoder/vp9_thread.c b/vp9/decoder/vp9_thread.c
index d953e72..5d31d3d 100644
--- a/vp9/decoder/vp9_thread.c
+++ b/vp9/decoder/vp9_thread.c
@@ -24,116 +24,6 @@
#if CONFIG_MULTITHREAD
-#if defined(_WIN32)
-
-//------------------------------------------------------------------------------
-// simplistic pthread emulation layer
-
-#include <process.h> // NOLINT
-
-// _beginthreadex requires __stdcall
-#define THREADFN unsigned int __stdcall
-#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
-
-static int pthread_create(pthread_t* const thread, const void* attr,
- unsigned int (__stdcall *start)(void*), void* arg) {
- (void)attr;
- *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
- 0, /* unsigned stack_size */
- start,
- arg,
- 0, /* unsigned initflag */
- NULL); /* unsigned *thrdaddr */
- if (*thread == NULL) return 1;
- SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
- return 0;
-}
-
-static int pthread_join(pthread_t thread, void** value_ptr) {
- (void)value_ptr;
- return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
- CloseHandle(thread) == 0);
-}
-
-// Mutex
-static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
- (void)mutexattr;
- InitializeCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
- EnterCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
- LeaveCriticalSection(mutex);
- return 0;
-}
-
-static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
- DeleteCriticalSection(mutex);
- return 0;
-}
-
-// Condition
-static int pthread_cond_destroy(pthread_cond_t* const condition) {
- int ok = 1;
- ok &= (CloseHandle(condition->waiting_sem_) != 0);
- ok &= (CloseHandle(condition->received_sem_) != 0);
- ok &= (CloseHandle(condition->signal_event_) != 0);
- return !ok;
-}
-
-static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
- (void)cond_attr;
- condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
- condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
- condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
- if (condition->waiting_sem_ == NULL ||
- condition->received_sem_ == NULL ||
- condition->signal_event_ == NULL) {
- pthread_cond_destroy(condition);
- return 1;
- }
- return 0;
-}
-
-static int pthread_cond_signal(pthread_cond_t* const condition) {
- int ok = 1;
- if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
- // a thread is waiting in pthread_cond_wait: allow it to be notified
- ok = SetEvent(condition->signal_event_);
- // wait until the event is consumed so the signaler cannot consume
- // the event via its own pthread_cond_wait.
- ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
- WAIT_OBJECT_0);
- }
- return !ok;
-}
-
-static int pthread_cond_wait(pthread_cond_t* const condition,
- pthread_mutex_t* const mutex) {
- int ok;
- // note that there is a consumer available so the signal isn't dropped in
- // pthread_cond_signal
- if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
- return 1;
- // now unlock the mutex so pthread_cond_signal may be issued
- pthread_mutex_unlock(mutex);
- ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
- WAIT_OBJECT_0);
- ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
- pthread_mutex_lock(mutex);
- return !ok;
-}
-
-#else // _WIN32
-# define THREADFN void*
-# define THREAD_RETURN(val) val
-#endif
-
//------------------------------------------------------------------------------
static THREADFN thread_loop(void *ptr) { // thread loop
diff --git a/vp9/decoder/vp9_thread.h b/vp9/decoder/vp9_thread.h
index bc69cfa..2f8728d 100644
--- a/vp9/decoder/vp9_thread.h
+++ b/vp9/decoder/vp9_thread.h
@@ -26,7 +26,8 @@
#if CONFIG_MULTITHREAD
#if defined(_WIN32)
-
+#include <errno.h> // NOLINT
+#include <process.h> // NOLINT
#include <windows.h> // NOLINT
typedef HANDLE pthread_t;
typedef CRITICAL_SECTION pthread_mutex_t;
@@ -36,12 +37,120 @@
HANDLE signal_event_;
} pthread_cond_t;
-#else
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static INLINE int pthread_create(pthread_t* const thread, const void* attr,
+ unsigned int (__stdcall *start)(void*),
+ void* arg) {
+ (void)attr;
+ *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
+ 0, /* unsigned stack_size */
+ start,
+ arg,
+ 0, /* unsigned initflag */
+ NULL); /* unsigned *thrdaddr */
+ if (*thread == NULL) return 1;
+ SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+ return 0;
+}
+
+static INLINE int pthread_join(pthread_t thread, void** value_ptr) {
+ (void)value_ptr;
+ return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+ CloseHandle(thread) == 0);
+}
+
+// Mutex
+static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex,
+ void* mutexattr) {
+ (void)mutexattr;
+ InitializeCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) {
+ return TryEnterCriticalSection(mutex) ? 0 : EBUSY;
+}
+
+static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) {
+ EnterCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
+ LeaveCriticalSection(mutex);
+ return 0;
+}
+
+static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
+ DeleteCriticalSection(mutex);
+ return 0;
+}
+
+// Condition
+static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) {
+ int ok = 1;
+ ok &= (CloseHandle(condition->waiting_sem_) != 0);
+ ok &= (CloseHandle(condition->received_sem_) != 0);
+ ok &= (CloseHandle(condition->signal_event_) != 0);
+ return !ok;
+}
+
+static INLINE int pthread_cond_init(pthread_cond_t *const condition,
+ void* cond_attr) {
+ (void)cond_attr;
+ condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+ condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (condition->waiting_sem_ == NULL ||
+ condition->received_sem_ == NULL ||
+ condition->signal_event_ == NULL) {
+ pthread_cond_destroy(condition);
+ return 1;
+ }
+ return 0;
+}
+
+static INLINE int pthread_cond_signal(pthread_cond_t *const condition) {
+ int ok = 1;
+ if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+ // a thread is waiting in pthread_cond_wait: allow it to be notified
+ ok = SetEvent(condition->signal_event_);
+ // wait until the event is consumed so the signaler cannot consume
+ // the event via its own pthread_cond_wait.
+ ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+ WAIT_OBJECT_0);
+ }
+ return !ok;
+}
+
+static INLINE int pthread_cond_wait(pthread_cond_t *const condition,
+ pthread_mutex_t *const mutex) {
+ int ok;
+ // note that there is a consumer available so the signal isn't dropped in
+ // pthread_cond_signal
+ if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+ return 1;
+ // now unlock the mutex so pthread_cond_signal may be issued
+ pthread_mutex_unlock(mutex);
+ ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+ WAIT_OBJECT_0);
+ ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+ pthread_mutex_lock(mutex);
+ return !ok;
+}
+#else // _WIN32
#include <pthread.h> // NOLINT
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
-#endif /* _WIN32 */
-#endif /* CONFIG_MULTITHREAD */
+#endif // CONFIG_MULTITHREAD
// State of the worker thread object
typedef enum {
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 716ad61..713cc51 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -239,25 +239,6 @@
}
}
-struct rdcost_block_args {
- MACROBLOCK *x;
- ENTROPY_CONTEXT t_above[16];
- ENTROPY_CONTEXT t_left[16];
- TX_SIZE tx_size;
- int bw;
- int bh;
- int rate;
- int64_t dist;
- int64_t sse;
- int this_rate;
- int64_t this_dist;
- int64_t this_sse;
- int64_t this_rd;
- int64_t best_rd;
- int skip;
- const int16_t *scan, *nb;
-};
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 724a115..35c6d6f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1031,131 +1031,171 @@
}
return 0;
}
-
-// TODO(jingning) This currently serves as a test framework for non-RD mode
-// decision. To be continued on optimizing the partition type decisions.
-static void pick_partition_type(VP9_COMP *cpi,
- const TileInfo *const tile,
- MODE_INFO **mi_8x8, TOKENEXTRA **tp,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
- int do_recon) {
+static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
+ BLOCK_SIZE bsize, int output_enabled) {
+ int i;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
- const int mi_stride = cm->mode_info_stride;
- const int num_8x8_subsize = (num_8x8_blocks_wide_lookup[bsize] >> 1);
- int i;
- PARTITION_TYPE partition = PARTITION_NONE;
- BLOCK_SIZE subsize;
- BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
- int sub_rate[4] = {0};
- int64_t sub_dist[4] = {0};
- int mi_offset;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
- return;
+ const int mb_mode_index = ctx->best_mode_index;
+ int max_plane;
- partition = partition_lookup[b_width_log2(bsize)][bs_type];
- subsize = get_subsize(bsize, partition);
+ max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ for (i = 0; i < max_plane; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][1];
+ p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
+ p[i].eobs = ctx->eobs_pbuf[i][1];
+ }
+
+ for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff_pbuf[i][2];
+ p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
+ p[i].eobs = ctx->eobs_pbuf[i][2];
+ }
+
+ x->skip = ctx->skip;
+
+ if (frame_is_intra_only(cm)) {
+#if CONFIG_INTERNAL_STATS
+ static const int kf_mode_index[] = {
+ THR_DC /*DC_PRED*/,
+ THR_V_PRED /*V_PRED*/,
+ THR_H_PRED /*H_PRED*/,
+ THR_D45_PRED /*D45_PRED*/,
+ THR_D135_PRED /*D135_PRED*/,
+ THR_D117_PRED /*D117_PRED*/,
+ THR_D153_PRED /*D153_PRED*/,
+ THR_D207_PRED /*D207_PRED*/,
+ THR_D63_PRED /*D63_PRED*/,
+ THR_TM /*TM_PRED*/,
+ };
+ ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
+#endif
+ } else {
+ // Note how often each mode chosen as best
+ cpi->mode_chosen_counts[mb_mode_index]++;
+ if (is_inter_block(mbmi) &&
+ (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
+
+ if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
+ }
+}
+
+static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
+ MACROBLOCK *const x = &cpi->mb;
if (bsize < BLOCK_8X8) {
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
// there is nothing to be done.
- if (x->ab_index != 0) {
- *rate = 0;
- *dist = 0;
+ if (x->ab_index > 0)
return;
- }
- } else {
- *(get_sb_partitioning(x, bsize)) = subsize;
}
+ set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ update_state_rt(cpi, get_block_context(x, bsize), bsize, output_enabled);
+
+ encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
+ update_stats(cpi);
+
+ (*tp)->token = EOSB_TOKEN;
+ (*tp)++;
+}
+
+static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4;
+ int ctx;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ if (bsize >= BLOCK_8X8) {
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const int idx_str = xd->mode_info_stride * mi_row + mi_col;
+ MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
+ ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
+ mi_row, mi_col, bsize);
+ subsize = mi_8x8[0]->mbmi.sb_type;
+
+ } else {
+ ctx = 0;
+ subsize = BLOCK_4X4;
+ }
+
+ partition = partition_lookup[bsl][subsize];
switch (partition) {
case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist,
- bsize, get_block_context(x, bsize), INT64_MAX);
- break;
- case PARTITION_HORZ:
- *get_sb_index(x, subsize) = 0;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
- if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
- *get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
- }
- *rate = sub_rate[0] + sub_rate[1];
- *dist = sub_dist[0] + sub_dist[1];
+ if (output_enabled && bsize >= BLOCK_8X8)
+ cm->counts.partition[ctx][PARTITION_NONE]++;
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
break;
case PARTITION_VERT:
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_VERT]++;
*get_sb_index(x, subsize) = 0;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0],
- subsize, get_block_context(x, subsize), INT64_MAX);
- if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) {
- update_state(cpi, get_block_context(x, subsize), subsize, 0);
- encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ if (mi_col + hbs < cm->mi_cols) {
*get_sb_index(x, subsize) = 1;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize,
- &sub_rate[1], &sub_dist[1], subsize,
- get_block_context(x, subsize), INT64_MAX);
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize);
}
- *rate = sub_rate[0] + sub_rate[1];
- *dist = sub_dist[1] + sub_dist[1];
+ break;
+ case PARTITION_HORZ:
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_HORZ]++;
+ *get_sb_index(x, subsize) = 0;
+ encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ if (mi_row + hbs < cm->mi_rows) {
+ *get_sb_index(x, subsize) = 1;
+ encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize);
+ }
break;
case PARTITION_SPLIT:
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ if (output_enabled)
+ cm->counts.partition[ctx][PARTITION_SPLIT]++;
+
*get_sb_index(x, subsize) = 0;
- pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize,
- &sub_rate[0], &sub_dist[0], 0);
-
- if ((mi_col + num_8x8_subsize) < cm->mi_cols) {
- *get_sb_index(x, subsize) = 1;
- pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize, tp,
- mi_row, mi_col + num_8x8_subsize, subsize,
- &sub_rate[1], &sub_dist[1], 0);
- }
-
- if ((mi_row + num_8x8_subsize) < cm->mi_rows) {
- *get_sb_index(x, subsize) = 2;
- pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize * mi_stride, tp,
- mi_row + num_8x8_subsize, mi_col, subsize,
- &sub_rate[2], &sub_dist[2], 0);
- }
-
- if ((mi_col + num_8x8_subsize) < cm->mi_cols &&
- (mi_row + num_8x8_subsize) < cm->mi_rows) {
- *get_sb_index(x, subsize) = 3;
- mi_offset = num_8x8_subsize * mi_stride + num_8x8_subsize;
- pick_partition_type(cpi, tile, mi_8x8 + mi_offset, tp,
- mi_row + num_8x8_subsize, mi_col + num_8x8_subsize,
- subsize, &sub_rate[3], &sub_dist[3], 0);
- }
-
- for (i = 0; i < 4; ++i) {
- *rate += sub_rate[i];
- *dist += sub_dist[i];
- }
-
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize);
+ *get_sb_index(x, subsize) = 1;
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize);
+ *get_sb_index(x, subsize) = 2;
+ encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize);
+ *get_sb_index(x, subsize) = 3;
+ encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ subsize);
break;
default:
- assert(0);
+ assert("Invalid partition type.");
}
- if (do_recon) {
- int output_enabled = (bsize == BLOCK_64X64);
-
- // Check the projected output rate for this SB against it's target
- // and and if necessary apply a Q delta using segmentation to get
- // closer to the target.
- if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
- select_in_frame_q_segment(cpi, mi_row, mi_col,
- output_enabled, *rate);
- }
-
- encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize);
- }
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
+ mi_row, mi_col, subsize, bsize);
}
static void rd_use_partition(VP9_COMP *cpi,
@@ -1446,15 +1486,19 @@
}
static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
- BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
- BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
+ BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_16X16
};
static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
- BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
- BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
- BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
+ BLOCK_8X8, BLOCK_16X16, BLOCK_16X16,
+ BLOCK_16X16, BLOCK_32X32, BLOCK_32X32,
+ BLOCK_32X32, BLOCK_64X64, BLOCK_64X64,
+ BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
+ BLOCK_64X64
};
// Look at all the mode_info entries for blocks that are part of this
@@ -1540,9 +1584,11 @@
}
}
- // Give a bit of leaway either side of the observed min and max
- *min_block_size = min_partition_size[*min_block_size];
- *max_block_size = max_partition_size[*max_block_size];
+ // adjust observed min and max
+ if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+ *min_block_size = min_partition_size[*min_block_size];
+ *max_block_size = max_partition_size[*max_block_size];
+ }
// Check border cases where max and min from neighbours may not be legal.
*max_block_size = find_partition_size(*max_block_size,
@@ -1998,34 +2044,6 @@
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
}
-static void encode_sb_row_rt(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
- VP9_COMMON *const cm = &cpi->common;
- int mi_col;
-
- cpi->sf.always_this_block_size = BLOCK_8X8;
-
- // Initialize the left context for the new SB row
- vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
- vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
-
- // Code each SB in the row
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE) {
- int dummy_rate;
- int64_t dummy_dist;
- const int idx_str = cm->mode_info_stride * mi_row + mi_col;
- MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
-
- vp9_zero(cpi->mb.pred_mv);
-
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
- pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1);
- }
-}
-
static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
@@ -2252,11 +2270,7 @@
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += 8)
-#if 1
encode_sb_row(cpi, &tile, mi_row, &tp);
-#else
- encode_sb_row_rt(cpi, &tile, mi_row, &tp);
-#endif
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2397,15 +2411,15 @@
}
}
-static int get_frame_type(VP9_COMP *cpi) {
+static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) {
if (frame_is_intra_only(&cpi->common))
- return 0;
+ return INTRA_FRAME;
else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
- return 3;
+ return ALTREF_FRAME;
else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
- return 1;
+ return LAST_FRAME;
else
- return 2;
+ return GOLDEN_FRAME;
}
static void select_tx_mode(VP9_COMP *cpi) {
@@ -2435,6 +2449,264 @@
}
}
}
+// Start RTC Exploration
+typedef enum {
+ BOTH_ZERO = 0,
+ ZERO_PLUS_PREDICTED = 1,
+ BOTH_PREDICTED = 2,
+ NEW_PLUS_NON_INTRA = 3,
+ BOTH_NEW = 4,
+ INTRA_PLUS_NON_INTRA = 5,
+ BOTH_INTRA = 6,
+ INVALID_CASE = 9
+} motion_vector_context;
+
+static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
+ MB_PREDICTION_MODE mode, int mi_row, int mi_col) {
+ mbmi->interp_filter = EIGHTTAP;
+ mbmi->mode = mode;
+ mbmi->mv[0].as_int = 0;
+ mbmi->mv[1].as_int = 0;
+ if (mode < NEARESTMV) {
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ } else {
+ mbmi->ref_frame[0] = LAST_FRAME;
+ }
+
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->tx_size = max_txsize_lookup[bsize];
+ mbmi->uv_mode = mode;
+ mbmi->skip_coeff = 0;
+ mbmi->sb_type = bsize;
+ mbmi->segment_id = 0;
+}
+static INLINE int get_block_row(int b32i, int b16i, int b8i) {
+ return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1);
+}
+static INLINE int get_block_col(int b32i, int b16i, int b8i) {
+ return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
+}
+static void rtc_use_partition(VP9_COMP *cpi,
+ const TileInfo *const tile,
+ MODE_INFO **mi_8x8,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist,
+ int do_recon) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const int mis = cm->mode_info_stride;
+ int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size];
+ int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size];
+ int i, j;
+ int chosen_rate = INT_MAX;
+ int64_t chosen_dist = INT_MAX;
+ MB_PREDICTION_MODE mode = DC_PRED;
+ int row8x8_remaining = tile->mi_row_end - mi_row;
+ int col8x8_remaining = tile->mi_col_end - mi_col;
+ int b32i;
+ x->fast_ms = 0;
+ x->subblock_ref = 0;
+ for (b32i = 0; b32i < 4; b32i++) {
+ int b16i;
+ for (b16i = 0; b16i < 4; b16i++) {
+ int b8i;
+ int block_row = get_block_row(b32i, b16i, 0);
+ int block_col = get_block_col(b32i, b16i, 0);
+ int index = block_row * mis + block_col;
+ int rate;
+ int64_t dist;
+
+ int_mv frame_nearest_mv[MAX_REF_FRAMES];
+ int_mv frame_near_mv[MAX_REF_FRAMES];
+ struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE];
+
+ // Find a partition size that fits
+ bsize = find_partition_size(cpi->sf.always_this_block_size,
+ (row8x8_remaining - block_row),
+ (col8x8_remaining - block_col),
+ &mi_height, &mi_width);
+ mi_8x8[index] = mi_8x8[0] + index;
+
+ set_mi_row_col(xd, tile, mi_row + block_row, mi_height,
+ mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols);
+
+ xd->mi_8x8 = mi_8x8 + index;
+
+ if (cm->frame_type != KEY_FRAME) {
+ set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize);
+
+ vp9_pick_inter_mode(cpi, x, tile,
+ mi_row + block_row, mi_col + block_col,
+ &rate, &dist, cpi->sf.always_this_block_size);
+ } else {
+ set_mode_info(&mi_8x8[index]->mbmi, bsize, mode,
+ mi_row + block_row, mi_col + block_col);
+ vp9_setup_buffer_inter(cpi, x, tile,
+ LAST_FRAME, cpi->sf.always_this_block_size,
+ mi_row + block_row, mi_col + block_col,
+ frame_nearest_mv, frame_near_mv, yv12_mb);
+ }
+
+ for (j = 0; j < mi_height; j++)
+ for (i = 0; i < mi_width; i++)
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i
+ && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) {
+ mi_8x8[index+ i + j * mis] = mi_8x8[index];
+ }
+
+ for (b8i = 0; b8i < 4; b8i++) {
+ }
+ }
+ }
+ encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64);
+
+ *rate = chosen_rate;
+ *dist = chosen_dist;
+}
+
+static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
+ int mi_row, TOKENEXTRA **tp) {
+ VP9_COMMON * const cm = &cpi->common;
+ int mi_col;
+
+ // Initialize the left context for the new SB row
+ vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
+ vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
+
+ // Code each SB in the row
+ for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ mi_col += MI_BLOCK_SIZE) {
+ int dummy_rate;
+ int64_t dummy_dist;
+
+ const int idx_str = cm->mode_info_stride * mi_row + mi_col;
+ MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
+
+ cpi->mb.source_variance = UINT_MAX;
+ set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
+ rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
+ &dummy_rate, &dummy_dist, 1);
+ }
+}
+
+
+static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+ int mi_row;
+ MACROBLOCK * const x = &cpi->mb;
+ VP9_COMMON * const cm = &cpi->common;
+ MACROBLOCKD * const xd = &x->e_mbd;
+
+// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
+// cpi->common.current_video_frame, cpi->common.show_frame,
+// cm->frame_type);
+
+// debug output
+#if DBG_PRNT_SEGMAP
+ {
+ FILE *statsfile;
+ statsfile = fopen("segmap2.stt", "a");
+ fprintf(statsfile, "\n");
+ fclose(statsfile);
+ }
+#endif
+
+ vp9_zero(cm->counts.switchable_interp);
+ vp9_zero(cpi->tx_stepdown_count);
+
+ xd->mi_8x8 = cm->mi_grid_visible;
+ // required for vp9_frame_init_quantizer
+ xd->mi_8x8[0] = cm->mi;
+
+ xd->last_mi = cm->prev_mi;
+
+ vp9_zero(cpi->common.counts.mv);
+ vp9_zero(cpi->coef_counts);
+ vp9_zero(cm->counts.eob_branch);
+
+ cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
+ && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
+
+ vp9_frame_init_quantizer(cpi);
+
+ vp9_initialize_rd_consts(cpi);
+ vp9_initialize_me_consts(cpi, cm->base_qindex);
+ switch_tx_mode(cpi);
+ cpi->sf.always_this_block_size = BLOCK_16X16;
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ // Build a frame level activity map
+ build_activity_map(cpi);
+ }
+
+ // Re-initialize encode frame context.
+ init_encode_frame_mb_context(cpi);
+
+ vp9_zero(cpi->rd_comp_pred_diff);
+ vp9_zero(cpi->rd_filter_diff);
+ vp9_zero(cpi->rd_tx_select_diff);
+ vp9_zero(cpi->rd_tx_select_threshes);
+
+ set_prev_mi(cm);
+
+ {
+ struct vpx_usec_timer emr_timer;
+ vpx_usec_timer_start(&emr_timer);
+
+ {
+ // Take tiles into account and give start/end MB
+ int tile_col, tile_row;
+ TOKENEXTRA *tp = cpi->tok;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileInfo tile;
+ TOKENEXTRA *tp_old = tp;
+
+ // For each row of SBs in the frame
+ vp9_tile_init(&tile, cm, tile_row, tile_col);
+ for (mi_row = tile.mi_row_start;
+ mi_row < tile.mi_row_end; mi_row += 8)
+ encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+
+ cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
+ assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
+ }
+ }
+ }
+
+ vpx_usec_timer_mark(&emr_timer);
+ cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
+ }
+
+ if (cpi->sf.skip_encode_sb) {
+ int j;
+ unsigned int intra_count = 0, inter_count = 0;
+ for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+ intra_count += cm->counts.intra_inter[j][0];
+ inter_count += cm->counts.intra_inter[j][1];
+ }
+ cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
+ cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
+ cpi->sf.skip_encode_frame &= cm->show_frame;
+ } else {
+ cpi->sf.skip_encode_frame = 0;
+ }
+
+#if 0
+ // Keep record of the total distortion this time around for future use
+ cpi->last_frame_distortion = cpi->frame_distortion;
+#endif
+}
+// end RTC play code
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -2462,7 +2734,6 @@
if (cpi->sf.RD) {
int i;
REFERENCE_MODE reference_mode;
- INTERP_FILTER interp_filter;
/*
* This code does a single RD pass over the whole frame assuming
* either compound, single or hybrid prediction as per whatever has
@@ -2472,7 +2743,7 @@
* that for subsequent frames.
* It does the same analysis for transform size selection also.
*/
- const int frame_type = get_frame_type(cpi);
+ const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
@@ -2490,22 +2761,18 @@
else
reference_mode = REFERENCE_MODE_SELECT;
- /* filter type selection */
- // FIXME(rbultje) for some odd reason, we often select smooth_filter
- // as default filter for ARF overlay frames. This is a REALLY BAD
- // IDEA so we explicitly disable it here.
- if (frame_type != 3 &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
- filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP_SMOOTH;
- } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
- filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP_SHARP;
- } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
- interp_filter = EIGHTTAP;
- } else {
- interp_filter = SWITCHABLE;
+ if (cm->interp_filter == SWITCHABLE) {
+ if (frame_type != ALTREF_FRAME &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] &&
+ filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP_SMOOTH;
+ } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] &&
+ filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP_SHARP;
+ } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) {
+ cm->interp_filter = EIGHTTAP;
+ }
}
cpi->mb.e_mbd.lossless = cpi->oxcf.lossless;
@@ -2513,8 +2780,11 @@
/* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
select_tx_mode(cpi);
cm->reference_mode = reference_mode;
- cm->interp_filter = interp_filter;
- encode_frame_internal(cpi);
+
+ if (cpi->sf.super_fast_rtc)
+ encode_rtc_frame_internal(cpi);
+ else
+ encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i) {
const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2592,7 +2862,12 @@
}
}
} else {
- encode_frame_internal(cpi);
+ // Force the usage of the BILINEAR interp_filter.
+ cm->interp_filter = BILINEAR;
+ if (cpi->sf.super_fast_rtc)
+ encode_rtc_frame_internal(cpi);
+ else
+ encode_frame_internal(cpi);
}
}
@@ -2668,7 +2943,8 @@
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
- (cpi->oxcf.aq_mode != COMPLEXITY_AQ);
+ (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
+ !cpi->sf.super_fast_rtc;
x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 4488189..8ff23c7 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -338,7 +338,6 @@
pd->above_context, pd->left_context,
num_4x4_w, num_4x4_h);
}
-
void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index af9fa1b..a03cbdd 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -49,6 +49,9 @@
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
+#define MIN_BOOST 300
+#define KEY_FRAME_BOOST 2000
+
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
YV12_BUFFER_CONFIG temp = *a;
*a = *b;
@@ -2219,8 +2222,8 @@
if (kf_boost < (rc->frames_to_key * 3))
kf_boost = (rc->frames_to_key * 3);
- if (kf_boost < 300) // Min KF boost
- kf_boost = 300;
+ if (kf_boost < MIN_BOOST)
+ kf_boost = MIN_BOOST;
// Make a note of baseline boost and the zero motion
// accumulator value for use elsewhere.
@@ -2331,7 +2334,7 @@
cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = 2000;
+ cpi->rc.kf_boost = KEY_FRAME_BOOST;
cpi->rc.source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
@@ -2358,7 +2361,7 @@
cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
cpi->rc.frames_to_key == 0;
cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = 2000;
+ cpi->rc.kf_boost = KEY_FRAME_BOOST;
cpi->rc.source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index c28d01c..b135b67 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -455,14 +455,17 @@
cache_ptr += cm->mi_cols;
}
}
+static int is_slowest_mode(int mode) {
+ return (mode == MODE_SECONDPASS_BEST || mode == MODE_BESTQUALITY);
+}
-static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) {
+static void set_rd_speed_thresholds(VP9_COMP *cpi) {
SPEED_FEATURES *sf = &cpi->sf;
int i;
// Set baseline threshold values
for (i = 0; i < MAX_MODES; ++i)
- sf->thresh_mult[i] = mode == 0 ? -500 : 0;
+ sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
sf->thresh_mult[THR_NEARESTMV] = 0;
sf->thresh_mult[THR_NEARESTG] = 0;
@@ -538,12 +541,12 @@
}
}
-static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) {
+static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
SPEED_FEATURES *sf = &cpi->sf;
int i;
for (i = 0; i < MAX_REFS; ++i)
- sf->thresh_mult_sub8x8[i] = mode == 0 ? -500 : 0;
+ sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
sf->thresh_mult_sub8x8[THR_LAST] += 2500;
sf->thresh_mult_sub8x8[THR_GOLD] += 2500;
@@ -626,7 +629,7 @@
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -663,7 +666,7 @@
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -698,7 +701,7 @@
sf->disable_filter_search_var_thresh = 200;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -797,7 +800,7 @@
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
- sf->auto_min_max_partition_size = 1;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -839,19 +842,24 @@
if (speed >= 5) {
int i;
sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->auto_min_max_partition_size = frame_is_intra_only(cm) ?
+ RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
sf->subpel_force_stop = 1;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
}
sf->use_fast_lpf_pick = 2;
+ sf->RD = 0;
+ }
+ if (speed >= 6) {
+ sf->super_fast_rtc = 1;
}
}
void vp9_set_speed_features(VP9_COMP *cpi) {
SPEED_FEATURES *sf = &cpi->sf;
VP9_COMMON *cm = &cpi->common;
- int mode = cpi->compressor_speed;
int speed = cpi->speed;
int i;
@@ -884,7 +892,7 @@
sf->use_one_partition_size_always = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
- sf->auto_min_max_partition_size = 0;
+ sf->auto_min_max_partition_size = NOT_IN_USE;
sf->max_partition_size = BLOCK_64X64;
sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
@@ -904,23 +912,26 @@
sf->use_fast_coef_updates = 0;
sf->using_small_partition_info = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
+ sf->super_fast_rtc = 0;
- switch (mode) {
- case 0: // This is the best quality mode.
+ switch (cpi->oxcf.mode) {
+ case MODE_BESTQUALITY:
+ case MODE_SECONDPASS_BEST: // This is the best quality mode.
cpi->diamond_search_sad = vp9_full_range_search;
break;
- case 1:
+ case MODE_FIRSTPASS:
+ case MODE_GOODQUALITY:
+ case MODE_SECONDPASS:
set_good_speed_feature(cm, sf, speed);
break;
- break;
- case 2:
+ case MODE_REALTIME:
set_rt_speed_feature(cm, sf, speed);
break;
}; /* switch */
// Set rd thresholds based on mode and speed setting
- set_rd_speed_thresholds(cpi, mode);
- set_rd_speed_thresholds_sub8x8(cpi, mode);
+ set_rd_speed_thresholds(cpi);
+ set_rd_speed_thresholds_sub8x8(cpi);
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
@@ -1241,24 +1252,24 @@
// Real time and one pass deprecated in test code base
case MODE_GOODQUALITY:
cpi->pass = 0;
- cpi->compressor_speed = 2;
cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5);
break;
case MODE_FIRSTPASS:
cpi->pass = 1;
- cpi->compressor_speed = 1;
break;
case MODE_SECONDPASS:
cpi->pass = 2;
- cpi->compressor_speed = 1;
cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5);
break;
case MODE_SECONDPASS_BEST:
cpi->pass = 2;
- cpi->compressor_speed = 0;
+ break;
+
+ case MODE_REALTIME:
+ cpi->pass = 0;
break;
}
@@ -2732,7 +2743,9 @@
if (cpi->sf.recode_loop != 0) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
- vp9_pack_bitstream(cpi, dest, size);
+ if (!cpi->sf.super_fast_rtc)
+ vp9_pack_bitstream(cpi, dest, size);
+
cpi->rc.projected_frame_size = (*size) << 3;
vp9_restore_coding_context(cpi);
@@ -3081,11 +3094,22 @@
&frame_under_shoot_limit,
&frame_over_shoot_limit);
- // Decide q and q bounds
+ // Decide q and q bounds.
q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
&bottom_index,
&top_index);
+ // JBB : This is realtime mode. In real time mode the first frame
+ // should be larger. Q of 0 is disabled because we force tx size to be
+ // 16x16...
+ if (cpi->sf.super_fast_rtc) {
+ if (cpi->common.current_video_frame == 0)
+ q /= 3;
+
+ if (q == 0)
+ q++;
+ }
+
if (!frame_is_intra_only(cm)) {
cm->interp_filter = DEFAULT_INTERP_FILTER;
/* TODO: Decide this more intelligently */
@@ -3227,7 +3251,8 @@
cm->last_height = cm->height;
// reset to normal state now that we are done.
- cm->last_show_frame = cm->show_frame;
+ if (!cm->show_existing_frame)
+ cm->last_show_frame = cm->show_frame;
if (cm->show_frame) {
// current mip will be the prev_mip for the next frame
MODE_INFO *temp = cm->prev_mip;
@@ -3307,6 +3332,7 @@
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
VP9_COMP *cpi = (VP9_COMP *) ptr;
+ VP9_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
int res = 0;
const int subsampling_x = sd->uv_width < sd->y_width;
@@ -3320,6 +3346,12 @@
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
+ if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+ "Non-4:2:0 color space requires profile >= 1");
+ res = -1;
+ }
+
return res;
}
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index a4cd9bb..c3ecd7f 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -181,6 +181,12 @@
} TX_SIZE_SEARCH_METHOD;
typedef enum {
+ NOT_IN_USE = 0,
+ RELAXED_NEIGHBORING_MIN_MAX = 1,
+ STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
// Values should be powers of 2 so that they can be selected as bits of
// an integer flags field
@@ -337,9 +343,8 @@
BLOCK_SIZE always_this_block_size;
// Sets min and max partition sizes for this 64x64 region based on the
- // same superblock in last encoded frame, and the left and above neighbor
- // in this block.
- int auto_min_max_partition_size;
+ // same 64x64 in last encoded frame, and the left and above neighbor.
+ AUTO_MIN_MAX_MODE auto_min_max_partition_size;
// Min and max partition size we enable (block_size) as per auto
// min max, but also used by adjust partitioning, and pick_partitioning.
@@ -411,6 +416,9 @@
// This feature limits the number of coefficients updates we actually do
// by only looking at counts from 1/2 the bands.
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
+
+ // This flag control the use of the new super fast rtc mode
+ int super_fast_rtc;
} SPEED_FEATURES;
typedef struct VP9_COMP {
@@ -434,7 +442,6 @@
MACROBLOCK mb;
VP9_COMMON common;
VP9_CONFIG oxcf;
- struct rdcost_block_args rdcost_stack;
struct lookahead_ctx *lookahead;
struct lookahead_entry *source;
#if CONFIG_MULTIPLE_ARF
@@ -546,7 +553,6 @@
// for real time encoding
int speed;
- int compressor_speed;
int cpu_used;
int pass;
@@ -751,8 +757,10 @@
static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) {
- xd->block_refs[0] = &cm->frame_refs[ref0 - LAST_FRAME];
- xd->block_refs[1] = &cm->frame_refs[ref1 - LAST_FRAME];
+ xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
+ : 0];
+ xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
+ : 0];
}
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 210d15f..bd28ea5 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -80,7 +80,7 @@
step_param = 6;
further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
- for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
+ for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
tmp_mv->as_int = INVALID_MV;
@@ -124,8 +124,8 @@
stride, 0x7fffffff);
// scale to 1/8 pixel resolution
- tmp_mv->as_mv.row = tmp_mv->as_mv.row << 3;
- tmp_mv->as_mv.col = tmp_mv->as_mv.col << 3;
+ tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8;
+ tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8;
// calculate the bit cost on motion vector
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
@@ -142,8 +142,7 @@
int mi_row, int mi_col,
int *returnrate,
int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx) {
+ BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
@@ -155,6 +154,7 @@
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd;
+ int64_t cost[4]= { 0, 100, 150, 205 };
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -171,7 +171,7 @@
mbmi->tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
vp9_setup_buffer_inter(cpi, x, tile,
@@ -182,7 +182,7 @@
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
int rate_mv = 0;
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
@@ -191,29 +191,42 @@
// Select prediction reference frames.
xd->plane[0].pre[0] = yv12_mb[ref_frame][0];
-
- x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
- full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv);
-
- if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
- continue;
-
clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd);
clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
- int rate = x->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
- int64_t dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] *
- x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ int rate = cost[this_mode - NEARESTMV];
+ int64_t dist;
+
+ if (this_mode == NEWMV) {
+ if (this_rd < 300)
+ continue;
+
+ x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] =
+ full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv);
+
+ if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV)
+ continue;
+ }
+
+ dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)];
+ this_rd = rate + dist;
if (this_rd < best_rd) {
best_rd = this_rd;
mbmi->mode = this_mode;
mbmi->ref_frame[0] = ref_frame;
mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ mbmi->interp_filter = EIGHTTAP;
+
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->tx_size = max_txsize_lookup[bsize];
+ mbmi->uv_mode = this_mode;
+ mbmi->skip_coeff = 0;
+ mbmi->sb_type = bsize;
+ mbmi->segment_id = 0;
}
}
}
@@ -223,8 +236,5 @@
// TODO(jingning) intra prediction search, if the best SAD is above a certain
// threshold.
- // store mode decisions
- ctx->mic = *xd->mi_8x8[0];
-
return INT64_MAX;
}
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 82904ae..05ff187 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -22,8 +22,7 @@
int mi_row, int mi_col,
int *returnrate,
int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx);
+ BLOCK_SIZE bsize);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 24b41a9..9124880 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -55,6 +55,22 @@
MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;
+struct rdcost_block_args {
+ MACROBLOCK *x;
+ ENTROPY_CONTEXT t_above[16];
+ ENTROPY_CONTEXT t_left[16];
+ int rate;
+ int64_t dist;
+ int64_t sse;
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_sse;
+ int64_t this_rd;
+ int64_t best_rd;
+ int skip;
+ const int16_t *scan, *nb;
+};
+
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{NEARESTMV, {LAST_FRAME, NONE}},
{NEARESTMV, {ALTREF_FRAME, NONE}},
@@ -280,22 +296,24 @@
fill_token_costs(x->token_costs, cm->fc.coef_probs);
- for (i = 0; i < PARTITION_CONTEXTS; i++)
- vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
- vp9_partition_tree);
+ if (!cpi->sf.super_fast_rtc) {
+ for (i = 0; i < PARTITION_CONTEXTS; i++)
+ vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
+ vp9_partition_tree);
- fill_mode_costs(cpi);
+ fill_mode_costs(cpi);
- if (!frame_is_intra_only(cm)) {
- vp9_build_nmv_cost_table(x->nmvjointcost,
- cm->allow_high_precision_mv ? x->nmvcost_hp
- : x->nmvcost,
- &cm->fc.nmvc,
- cm->allow_high_precision_mv, 1, 1);
+ if (!frame_is_intra_only(cm)) {
+ vp9_build_nmv_cost_table(x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp
+ : x->nmvcost,
+ &cm->fc.nmvc,
+ cm->allow_high_precision_mv, 1, 1);
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- vp9_cost_tokens((int *)x->inter_mode_cost[i],
- cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp9_cost_tokens((int *)x->inter_mode_cost[i],
+ cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ }
}
}
@@ -425,7 +443,7 @@
if (i == 0)
x->pred_sse[ref] = sse;
- if (cpi->compressor_speed > 2) {
+ if (cpi->sf.super_fast_rtc) {
dist_sum += (int)sse;
} else {
int rate;
@@ -585,15 +603,15 @@
return cost;
}
-static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
+static void dist_block(int plane, int block, TX_SIZE tx_size,
+ struct rdcost_block_args* args) {
const int ss_txfrm_size = tx_size << 1;
- struct rdcost_block_args* args = arg;
MACROBLOCK* const x = args->x;
MACROBLOCKD* const xd = &x->e_mbd;
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
int64_t this_sse;
- int shift = args->tx_size == TX_32X32 ? 0 : 2;
+ int shift = tx_size == TX_32X32 ? 0 : 2;
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
@@ -610,14 +628,12 @@
}
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, void *arg) {
- struct rdcost_block_args* args = arg;
-
+ TX_SIZE tx_size, struct rdcost_block_args* args) {
int x_idx, y_idx;
- txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
- args->t_left + y_idx, args->tx_size,
+ args->t_left + y_idx, tx_size,
args->scan, args->nb);
}
@@ -694,24 +710,19 @@
}
}
-static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
- const int num_4x4_w, const int num_4x4_h,
- const int64_t ref_rdcost,
+static void init_rdcost_stack(MACROBLOCK *x, const int64_t ref_rdcost,
struct rdcost_block_args *arg) {
vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
arg->x = x;
- arg->tx_size = tx_size;
- arg->bw = num_4x4_w;
- arg->bh = num_4x4_h;
arg->best_rd = ref_rdcost;
}
static void txfm_rd_in_plane(MACROBLOCK *x,
- struct rdcost_block_args *rd_stack,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
+ struct rdcost_block_args rd_stack;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -719,30 +730,29 @@
const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
const scan_order *so;
- init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
- ref_best_rd, rd_stack);
+ init_rdcost_stack(x, ref_best_rd, &rd_stack);
if (plane == 0)
xd->mi_8x8[0]->mbmi.tx_size = tx_size;
- vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
+ vp9_get_entropy_contexts(tx_size, rd_stack.t_above, rd_stack.t_left,
pd->above_context, pd->left_context,
num_4x4_w, num_4x4_h);
so = get_scan(xd, tx_size, pd->plane_type, 0);
- rd_stack->scan = so->scan;
- rd_stack->nb = so->neighbors;
+ rd_stack.scan = so->scan;
+ rd_stack.nb = so->neighbors;
foreach_transformed_block_in_plane(xd, bsize, plane,
- block_rd_txfm, rd_stack);
- if (rd_stack->skip) {
+ block_rd_txfm, &rd_stack);
+ if (rd_stack.skip) {
*rate = INT_MAX;
*distortion = INT64_MAX;
*sse = INT64_MAX;
*skippable = 0;
} else {
- *distortion = rd_stack->this_dist;
- *rate = rd_stack->this_rate;
- *sse = rd_stack->this_sse;
+ *distortion = rd_stack.this_dist;
+ *rate = rd_stack.this_rate;
+ *sse = rd_stack.this_sse;
*skippable = vp9_is_skippable_in_plane(x, bsize, plane);
}
}
@@ -760,7 +770,7 @@
mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
- txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
+ txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs,
mbmi->tx_size);
cpi->tx_stepdown_count[0]++;
@@ -891,7 +901,7 @@
// Actually encode using the chosen mode if a model was used, but do not
// update the r, d costs
- txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
+ txfm_rd_in_plane(x, rate, distortion, skip,
&sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
@@ -914,7 +924,6 @@
int64_t d[TX_SIZES], sse[TX_SIZES];
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
- struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
const int b_inter_mode = is_inter_block(mbmi);
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE tx_size;
@@ -944,7 +953,7 @@
skip, sse, ref_best_rd, bs);
} else {
for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
- txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size],
+ txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
&s[tx_size], &sse[tx_size],
ref_best_rd, 0, bs, tx_size);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
@@ -1273,7 +1282,7 @@
*skippable = 1;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
+ txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
ref_best_rd, plane, bsize, uv_txfm_size);
if (pnrate == INT_MAX)
goto term;
@@ -1759,7 +1768,8 @@
if (best_rd < label_mv_thresh)
break;
- if (cpi->compressor_speed) {
+ if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
+ cpi->oxcf.mode != MODE_BESTQUALITY) {
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
@@ -1823,7 +1833,8 @@
}
// Should we do a full search (best quality only)
- if (cpi->compressor_speed == 0) {
+ if (cpi->oxcf.mode == MODE_BESTQUALITY ||
+ cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
/* Check if mvp_full is within the range. */
clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
x->mv_row_min, x->mv_row_max);
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 897ecd7..6b18171 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -563,10 +563,21 @@
unsigned int new_qc;
/* Use best quality mode if no deadline is given. */
- if (deadline)
- new_qc = MODE_GOODQUALITY;
- else
- new_qc = MODE_BESTQUALITY;
+ new_qc = MODE_BESTQUALITY;
+
+ if (deadline) {
+ uint64_t duration_us;
+
+ /* Convert duration parameter from stream timebase to microseconds */
+ duration_us = (uint64_t)duration * 1000000
+ * (uint64_t)ctx->cfg.g_timebase.num
+ / (uint64_t)ctx->cfg.g_timebase.den;
+
+ /* If the deadline is more that the duration this frame is to be shown,
+ * use good quality mode. Otherwise use realtime mode.
+ */
+ new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME;
+ }
if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
new_qc = MODE_FIRSTPASS;
diff --git a/vp9/vp9dx.mk b/vp9/vp9dx.mk
index b722200..de210f4 100644
--- a/vp9/vp9dx.mk
+++ b/vp9/vp9dx.mk
@@ -21,6 +21,8 @@
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.c
+VP9_DX_SRCS-yes += decoder/vp9_dthread.h
VP9_DX_SRCS-yes += decoder/vp9_reader.h
VP9_DX_SRCS-yes += decoder/vp9_reader.c
VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.h
diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c
index e71094a..50f45c2 100644
--- a/vp9_spatial_scalable_encoder.c
+++ b/vp9_spatial_scalable_encoder.c
@@ -193,8 +193,6 @@
vpx_codec_err_t res;
int pts = 0; /* PTS starts at 0 */
int frame_duration = 1; /* 1 timebase tick per frame */
- vpx_codec_cx_pkt_t packet = {0};
- packet.kind = VPX_CODEC_CX_FRAME_PKT;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
@@ -234,9 +232,7 @@
die_codec(&codec, "Failed to encode frame");
}
if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
- packet.data.frame.pts = pts;
- packet.data.frame.sz = vpx_svc_get_frame_size(&svc_ctx);
- ivf_write_frame_header(outfile, &packet);
+ ivf_write_frame_header(outfile, pts, vpx_svc_get_frame_size(&svc_ctx));
(void)fwrite(vpx_svc_get_buffer(&svc_ctx), 1,
vpx_svc_get_frame_size(&svc_ctx), outfile);
}
diff --git a/vpxenc.c b/vpxenc.c
index f1feb47..f772432 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1323,7 +1323,7 @@
ivf_header_pos = ftello(stream->file);
fsize = pkt->data.frame.sz;
- ivf_write_frame_header(stream->file, pkt);
+ ivf_write_frame_header(stream->file, pkt->data.frame.pts, fsize);
} else {
fsize += pkt->data.frame.sz;