Merge "Replacing (raster_block >> tx_size) with (block >> (tx_size << 1))."
diff --git a/test/vp9_lossless_test.cc b/test/vp9_lossless_test.cc
index 441cc44..30a3118 100644
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -35,7 +35,7 @@
}
virtual void BeginPassHook(unsigned int /*pass*/) {
- psnr_ = 0.0;
+ psnr_ = kMaxPsnr;
nframes_ = 0;
}
@@ -65,9 +65,9 @@
init_flags_ = VPX_CODEC_USE_PSNR;
// intentionally changed the dimension for better testing coverage
- libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
timebase.den, timebase.num, 0, 30);
-
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
const double psnr_lossless = GetMinPsnr();
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
diff --git a/tools_common.h b/tools_common.h
index 7dfd5ad..068e7b5 100644
--- a/tools_common.h
+++ b/tools_common.h
@@ -12,6 +12,46 @@
#include <stdio.h>
+#include "./vpx_config.h"
+
+#if defined(_MSC_VER)
+/* MSVS doesn't define off_t, and uses _f{seek,tell}i64. */
+typedef __int64 off_t;
+#define fseeko _fseeki64
+#define ftello _ftelli64
+#elif defined(_WIN32)
+/* MinGW defines off_t as long and uses f{seek,tell}o64/off64_t for large
+ * files. */
+#define fseeko fseeko64
+#define ftello ftello64
+#define off_t off64_t
+#endif /* _WIN32 */
+
+#if CONFIG_OS_SUPPORT
+#if defined(_MSC_VER)
+#include <io.h> /* NOLINT */
+#define snprintf _snprintf
+#define isatty _isatty
+#define fileno _fileno
+#else
+#include <unistd.h> /* NOLINT */
+#endif /* _MSC_VER */
+#endif /* CONFIG_OS_SUPPORT */
+
+/* Use 32-bit file operations in WebM file format when building ARM
+ * executables (.axf) with RVCT. */
+#if !CONFIG_OS_SUPPORT
+typedef long off_t; /* NOLINT */
+#define fseeko fseek
+#define ftello ftell
+#endif /* CONFIG_OS_SUPPORT */
+
+#define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo)
+
+#ifndef PATH_MAX
+#define PATH_MAX 512
+#endif
+
#define VP8_FOURCC (0x30385056)
#define VP9_FOURCC (0x30395056)
#define VP8_FOURCC_MASK (0x00385056)
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index d0d4852..f52adfc 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -82,9 +82,8 @@
#define INTER_MODES (1 + NEWMV - NEARESTMV)
-static INLINE int inter_mode_offset(MB_PREDICTION_MODE mode) {
- return (mode - NEARESTMV);
-}
+#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
+
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index d3a867c..2676762 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -332,7 +332,7 @@
if (l >= 3 && k == 0)
continue;
vp9_tree_probs_from_distribution(vp9_coefmodel_tree, branch_ct,
- coef_counts[i][j][k][l], 0);
+ coef_counts[i][j][k][l]);
branch_ct[0][1] = eob_branch_count[i][j][k][l] - branch_ct[0][0];
for (m = 0; m < UNCONSTRAINED_NODES; ++m)
dst_coef_probs[i][j][k][l][m] = merge_probs(
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index a963d55..3b2510d 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -235,9 +235,9 @@
struct vp9_token vp9_intra_mode_encodings[INTRA_MODES];
const vp9_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
- -ZEROMV, 2,
- -NEARESTMV, 4,
- -NEARMV, -NEWMV
+ -INTER_OFFSET(ZEROMV), 2,
+ -INTER_OFFSET(NEARESTMV), 4,
+ -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV)
};
struct vp9_token vp9_inter_mode_encodings[INTER_MODES];
@@ -343,8 +343,7 @@
vp9_tokens_from_tree(vp9_switchable_interp_encodings,
vp9_switchable_interp_tree);
vp9_tokens_from_tree(vp9_partition_encodings, vp9_partition_tree);
- vp9_tokens_from_tree_offset(vp9_inter_mode_encodings,
- vp9_inter_mode_tree, NEARESTMV);
+ vp9_tokens_from_tree(vp9_inter_mode_encodings, vp9_inter_mode_tree);
}
#define COUNT_SAT 20
@@ -356,9 +355,9 @@
static void adapt_probs(const vp9_tree_index *tree,
const vp9_prob *pre_probs, const unsigned int *counts,
- unsigned int offset, vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, offset,
- COUNT_SAT, MAX_UPDATE_FACTOR, probs);
+ vp9_prob *probs) {
+ tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
+ probs);
}
void vp9_adapt_mode_probs(VP9_COMMON *cm) {
@@ -383,25 +382,24 @@
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
adapt_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
- counts->inter_mode[i], NEARESTMV, fc->inter_mode_probs[i]);
+ counts->inter_mode[i], fc->inter_mode_probs[i]);
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
adapt_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
- counts->y_mode[i], 0, fc->y_mode_prob[i]);
+ counts->y_mode[i], fc->y_mode_prob[i]);
for (i = 0; i < INTRA_MODES; ++i)
adapt_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
- counts->uv_mode[i], 0, fc->uv_mode_prob[i]);
+ counts->uv_mode[i], fc->uv_mode_prob[i]);
for (i = 0; i < PARTITION_CONTEXTS; i++)
adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i],
- counts->partition[i], 0, fc->partition_prob[i]);
+ counts->partition[i], fc->partition_prob[i]);
if (cm->mcomp_filter_type == SWITCHABLE) {
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
- counts->switchable_interp[i], 0,
- fc->switchable_interp_prob[i]);
+ counts->switchable_interp[i], fc->switchable_interp_prob[i]);
}
if (cm->tx_mode == TX_MODE_SELECT) {
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index b061cdb..290dcdd 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -196,8 +196,8 @@
static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
const unsigned int *counts, vp9_prob *probs) {
- tree_merge_probs(tree, pre_probs, counts, 0,
- MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR, probs);
+ tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR,
+ probs);
}
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
@@ -207,8 +207,7 @@
const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
const nmv_context_counts *counts = &cm->counts.mv;
- adapt_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
- fc->joints);
+ adapt_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, fc->joints);
for (i = 0; i < 2; ++i) {
nmv_component *comp = &fc->comps[i];
diff --git a/vp9/common/vp9_treecoder.c b/vp9/common/vp9_treecoder.c
index 1805fb4..e2a5b9f 100644
--- a/vp9/common/vp9_treecoder.c
+++ b/vp9/common/vp9_treecoder.c
@@ -35,28 +35,20 @@
tree2tok(p, t, 0, 0, 0);
}
-void vp9_tokens_from_tree_offset(struct vp9_token *p, vp9_tree t,
- int offset) {
- tree2tok(p - offset, t, 0, 0, 0);
-}
-
static unsigned int convert_distribution(unsigned int i, vp9_tree tree,
unsigned int branch_ct[][2],
- const unsigned int num_events[],
- unsigned int tok0_offset) {
+ const unsigned int num_events[]) {
unsigned int left, right;
- if (tree[i] <= 0) {
- left = num_events[-tree[i] - tok0_offset];
- } else {
- left = convert_distribution(tree[i], tree, branch_ct, num_events,
- tok0_offset);
- }
- if (tree[i + 1] <= 0)
- right = num_events[-tree[i + 1] - tok0_offset];
+ if (tree[i] <= 0)
+ left = num_events[-tree[i]];
else
- right = convert_distribution(tree[i + 1], tree, branch_ct, num_events,
- tok0_offset);
+ left = convert_distribution(tree[i], tree, branch_ct, num_events);
+
+ if (tree[i + 1] <= 0)
+ right = num_events[-tree[i + 1]];
+ else
+ right = convert_distribution(tree[i + 1], tree, branch_ct, num_events);
branch_ct[i >> 1][0] = left;
branch_ct[i >> 1][1] = right;
@@ -65,9 +57,8 @@
void vp9_tree_probs_from_distribution(vp9_tree tree,
unsigned int branch_ct[/* n-1 */][2],
- const unsigned int num_events[/* n */],
- unsigned int tok0_offset) {
- convert_distribution(0, tree, branch_ct, num_events, tok0_offset);
+ const unsigned int num_events[/* n */]) {
+ convert_distribution(0, tree, branch_ct, num_events);
}
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
index 9c776d6..a79b156 100644
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -42,7 +42,6 @@
/* Construct encoding array from tree. */
void vp9_tokens_from_tree(struct vp9_token*, vp9_tree);
-void vp9_tokens_from_tree_offset(struct vp9_token*, vp9_tree, int offset);
/* Convert array of token occurrence counts into a table of probabilities
for the associated binary encoding tree. Also writes count of branches
@@ -51,8 +50,7 @@
void vp9_tree_probs_from_distribution(vp9_tree tree,
unsigned int branch_ct[ /* n - 1 */ ][2],
- const unsigned int num_events[ /* n */ ],
- unsigned int tok0_offset);
+ const unsigned int num_events[ /* n */ ]);
static INLINE vp9_prob clip_prob(int p) {
@@ -116,10 +114,10 @@
static void tree_merge_probs(const vp9_tree_index *tree,
const vp9_prob *pre_probs,
- const unsigned int *counts, int offset,
+ const unsigned int *counts,
unsigned int count_sat,
unsigned int max_update_factor, vp9_prob *probs) {
- tree_merge_probs_impl(0, tree, pre_probs, &counts[-offset],
+ tree_merge_probs_impl(0, tree, pre_probs, counts,
count_sat, max_update_factor, probs);
}
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index ccf5aac..2a33844 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -15,6 +15,16 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
+#define RECON_AND_STORE4X4(dest, in_x) \
+{ \
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
+ d0 = _mm_unpacklo_epi8(d0, zero); \
+ d0 = _mm_add_epi16(in_x, d0); \
+ d0 = _mm_packus_epi16(d0, d0); \
+ *(int *)dest = _mm_cvtsi128_si32(d0); \
+ dest += stride; \
+}
+
void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
@@ -26,21 +36,19 @@
__m128i input0, input1, input2, input3;
// Rows
- input0 = _mm_loadl_epi64((const __m128i *)input);
- input1 = _mm_loadl_epi64((const __m128i *)(input + 4));
- input2 = _mm_loadl_epi64((const __m128i *)(input + 8));
- input3 = _mm_loadl_epi64((const __m128i *)(input + 12));
+ input0 = _mm_load_si128((const __m128i *)input);
+ input2 = _mm_load_si128((const __m128i *)(input + 8));
// Construct i3, i1, i3, i1, i2, i0, i2, i0
input0 = _mm_shufflelo_epi16(input0, 0xd8);
- input1 = _mm_shufflelo_epi16(input1, 0xd8);
+ input0 = _mm_shufflehi_epi16(input0, 0xd8);
input2 = _mm_shufflelo_epi16(input2, 0xd8);
- input3 = _mm_shufflelo_epi16(input3, 0xd8);
+ input2 = _mm_shufflehi_epi16(input2, 0xd8);
+ input1 = _mm_unpackhi_epi32(input0, input0);
input0 = _mm_unpacklo_epi32(input0, input0);
- input1 = _mm_unpacklo_epi32(input1, input1);
+ input3 = _mm_unpackhi_epi32(input2, input2);
input2 = _mm_unpacklo_epi32(input2, input2);
- input3 = _mm_unpacklo_epi32(input3, input3);
// Stage 1
input0 = _mm_madd_epi16(input0, cst);
@@ -59,16 +67,14 @@
input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
// Stage 2
- input0 = _mm_packs_epi32(input0, zero);
- input1 = _mm_packs_epi32(input1, zero);
- input2 = _mm_packs_epi32(input2, zero);
- input3 = _mm_packs_epi32(input3, zero);
+ input0 = _mm_packs_epi32(input0, input1);
+ input1 = _mm_packs_epi32(input2, input3);
// Transpose
- input1 = _mm_unpacklo_epi16(input0, input1);
- input3 = _mm_unpacklo_epi16(input2, input3);
- input0 = _mm_unpacklo_epi32(input1, input3);
- input1 = _mm_unpackhi_epi32(input1, input3);
+ input2 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpackhi_epi16(input0, input1);
+ input0 = _mm_unpacklo_epi32(input2, input3);
+ input1 = _mm_unpackhi_epi32(input2, input3);
// Switch column2, column 3, and then, we got:
// input2: column1, column 0; input3: column2, column 3.
@@ -78,14 +84,9 @@
// Columns
// Construct i3, i1, i3, i1, i2, i0, i2, i0
- input0 = _mm_shufflelo_epi16(input2, 0xd8);
- input1 = _mm_shufflehi_epi16(input2, 0xd8);
- input2 = _mm_shufflehi_epi16(input3, 0xd8);
- input3 = _mm_shufflelo_epi16(input3, 0xd8);
-
- input0 = _mm_unpacklo_epi32(input0, input0);
- input1 = _mm_unpackhi_epi32(input1, input1);
- input2 = _mm_unpackhi_epi32(input2, input2);
+ input0 = _mm_unpacklo_epi32(input2, input2);
+ input1 = _mm_unpackhi_epi32(input2, input2);
+ input2 = _mm_unpackhi_epi32(input3, input3);
input3 = _mm_unpacklo_epi32(input3, input3);
// Stage 1
@@ -105,16 +106,14 @@
input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
// Stage 2
- input0 = _mm_packs_epi32(input0, zero);
- input1 = _mm_packs_epi32(input1, zero);
- input2 = _mm_packs_epi32(input2, zero);
- input3 = _mm_packs_epi32(input3, zero);
+ input0 = _mm_packs_epi32(input0, input2);
+ input1 = _mm_packs_epi32(input1, input3);
// Transpose
- input1 = _mm_unpacklo_epi16(input0, input1);
- input3 = _mm_unpacklo_epi16(input2, input3);
- input0 = _mm_unpacklo_epi32(input1, input3);
- input1 = _mm_unpackhi_epi32(input1, input3);
+ input2 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpackhi_epi16(input0, input1);
+ input0 = _mm_unpacklo_epi32(input2, input3);
+ input1 = _mm_unpackhi_epi32(input2, input3);
// Switch column2, column 3, and then, we got:
// input2: column1, column 0; input3: column2, column 3.
@@ -129,23 +128,31 @@
input2 = _mm_srai_epi16(input2, 4);
input3 = _mm_srai_epi16(input3, 4);
-#define RECON_AND_STORE4X4(dest, in_x) \
- { \
- __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
- d0 = _mm_unpacklo_epi8(d0, zero); \
- d0 = _mm_add_epi16(in_x, d0); \
- d0 = _mm_packus_epi16(d0, d0); \
- *(int *)dest = _mm_cvtsi128_si32(d0); \
- dest += stride; \
+ // Reconstruction and Store
+ {
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+ __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+ d0 = _mm_unpacklo_epi32(d0,
+ _mm_cvtsi32_si128(*(const int *) (dest + stride)));
+ d2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(
+ *(const int *) (dest + stride * 3)), d2);
+ d0 = _mm_unpacklo_epi8(d0, zero);
+ d2 = _mm_unpacklo_epi8(d2, zero);
+ d0 = _mm_add_epi16(d0, input2);
+ d2 = _mm_add_epi16(d2, input3);
+ d0 = _mm_packus_epi16(d0, d2);
+ // store input0
+ *(int *)dest = _mm_cvtsi128_si32(d0);
+ // store input1
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+ // store input2
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+ // store input3
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
}
-
- input0 = _mm_srli_si128(input2, 8);
- input1 = _mm_srli_si128(input3, 8);
-
- RECON_AND_STORE4X4(dest, input2);
- RECON_AND_STORE4X4(dest, input0);
- RECON_AND_STORE4X4(dest, input1);
- RECON_AND_STORE4X4(dest, input3);
}
void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 9792d2c..14f2ce5 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -48,12 +48,13 @@
}
static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r,
- uint8_t context) {
- const MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree,
- cm->fc.inter_mode_probs[context]);
+ int ctx) {
+ const int mode = treed_read(r, vp9_inter_mode_tree,
+ cm->fc.inter_mode_probs[ctx]);
if (!cm->frame_parallel_decoding_mode)
- ++cm->counts.inter_mode[context][inter_mode_offset(mode)];
- return mode;
+ ++cm->counts.inter_mode[ctx][mode];
+
+ return NEARESTMV + mode;
}
static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
@@ -260,6 +261,16 @@
mv->col = ref->col + diff.col;
}
+static COMPPREDMODE_TYPE read_reference_mode(VP9_COMMON *cm,
+ const MACROBLOCKD *xd,
+ vp9_reader *r) {
+ const int ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
+ const int mode = vp9_read(r, cm->fc.comp_inter_prob[ctx]);
+ if (!cm->frame_parallel_decoding_mode)
+ ++cm->counts.comp_inter[ctx][mode];
+ return mode; // SINGLE_PREDICTION_ONLY or COMP_PREDICTION_ONLY
+}
+
// Read the referncence frame
static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
vp9_reader *r,
@@ -271,27 +282,20 @@
ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
- const int comp_ctx = vp9_get_pred_context_comp_inter_inter(cm, xd);
- int is_comp;
-
- if (cm->comp_pred_mode == HYBRID_PREDICTION) {
- is_comp = vp9_read(r, fc->comp_inter_prob[comp_ctx]);
- if (!cm->frame_parallel_decoding_mode)
- ++counts->comp_inter[comp_ctx][is_comp];
- } else {
- is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY;
- }
+ const COMPPREDMODE_TYPE mode = (cm->comp_pred_mode == HYBRID_PREDICTION)
+ ? read_reference_mode(cm, xd, r)
+ : cm->comp_pred_mode;
// FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
- if (is_comp) {
- const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
- const int ref_ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
- const int b = vp9_read(r, fc->comp_ref_prob[ref_ctx]);
+ if (mode == COMP_PREDICTION_ONLY) {
+ const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
+ const int bit = vp9_read(r, fc->comp_ref_prob[ctx]);
if (!cm->frame_parallel_decoding_mode)
- ++counts->comp_ref[ref_ctx][b];
- ref_frame[fix_ref_idx] = cm->comp_fixed_ref;
- ref_frame[!fix_ref_idx] = cm->comp_var_ref[b];
- } else {
+ ++counts->comp_ref[ctx][bit];
+ ref_frame[idx] = cm->comp_fixed_ref;
+ ref_frame[!idx] = cm->comp_var_ref[bit];
+ } else if (mode == SINGLE_PREDICTION_ONLY) {
const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
if (!cm->frame_parallel_decoding_mode)
@@ -299,14 +303,16 @@
if (bit0) {
const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
- ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
if (!cm->frame_parallel_decoding_mode)
++counts->single_ref[ctx1][1][bit1];
+ ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
} else {
ref_frame[0] = LAST_FRAME;
}
ref_frame[1] = NONE;
+ } else {
+ assert(!"Invalid prediction mode.");
}
}
}
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 86fba05..aad400a 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -153,47 +153,38 @@
vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]);
}
-static void update_mv(vp9_reader *r, vp9_prob *p) {
- if (vp9_read(r, NMV_UPDATE_PROB))
- *p = (vp9_read_literal(r, 7) << 1) | 1;
+static void update_mv_probs(vp9_prob *p, int n, vp9_reader *r) {
+ int i;
+ for (i = 0; i < n; ++i)
+ if (vp9_read(r, NMV_UPDATE_PROB))
+ p[i] = (vp9_read_literal(r, 7) << 1) | 1;
}
-static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) {
- int i, j, k;
+static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) {
+ int i, j;
- for (j = 0; j < MV_JOINTS - 1; ++j)
- update_mv(r, &mvc->joints[j]);
+ update_mv_probs(ctx->joints, MV_JOINTS - 1, r);
for (i = 0; i < 2; ++i) {
- nmv_component *const comp = &mvc->comps[i];
-
- update_mv(r, &comp->sign);
-
- for (j = 0; j < MV_CLASSES - 1; ++j)
- update_mv(r, &comp->classes[j]);
-
- for (j = 0; j < CLASS0_SIZE - 1; ++j)
- update_mv(r, &comp->class0[j]);
-
- for (j = 0; j < MV_OFFSET_BITS; ++j)
- update_mv(r, &comp->bits[j]);
+ nmv_component *const comp_ctx = &ctx->comps[i];
+ update_mv_probs(&comp_ctx->sign, 1, r);
+ update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r);
+ update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r);
+ update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r);
}
for (i = 0; i < 2; ++i) {
- nmv_component *const comp = &mvc->comps[i];
-
+ nmv_component *const comp_ctx = &ctx->comps[i];
for (j = 0; j < CLASS0_SIZE; ++j)
- for (k = 0; k < 3; ++k)
- update_mv(r, &comp->class0_fp[j][k]);
-
- for (j = 0; j < 3; ++j)
- update_mv(r, &comp->fp[j]);
+ update_mv_probs(comp_ctx->class0_fp[j], 3, r);
+ update_mv_probs(comp_ctx->fp, 3, r);
}
if (allow_hp) {
for (i = 0; i < 2; ++i) {
- update_mv(r, &mvc->comps[i].class0_hp);
- update_mv(r, &mvc->comps[i].hp);
+ nmv_component *const comp_ctx = &ctx->comps[i];
+ update_mv_probs(&comp_ctx->class0_hp, 1, r);
+ update_mv_probs(&comp_ctx->hp, 1, r);
}
}
}
@@ -209,20 +200,22 @@
// Allocate storage for each tile column.
// TODO(jzern): when max_threads <= 1 the same storage could be used for each
// tile.
-static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) {
+static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) {
VP9_COMMON *const cm = &pbi->common;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- int i, tile_col;
+ int i, tile_row, tile_col;
CHECK_MEM_ERROR(cm, pbi->mi_streams,
- vpx_realloc(pbi->mi_streams, tile_cols *
+ vpx_realloc(pbi->mi_streams, tile_rows * tile_cols *
sizeof(*pbi->mi_streams)));
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo tile;
-
- vp9_tile_init(&tile, cm, 0, tile_col);
- pbi->mi_streams[tile_col] =
- &cm->mi[cm->mi_rows * tile.mi_col_start];
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileInfo tile;
+ vp9_tile_init(&tile, cm, tile_row, tile_col);
+ pbi->mi_streams[tile_row * tile_cols + tile_col] =
+ &cm->mi[tile.mi_row_start * cm->mode_info_stride
+ + tile.mi_col_start];
+ }
}
// 2 contexts per 'mi unit', so that we have one context per 4x4 txfm
@@ -360,16 +353,15 @@
const int bh = num_8x8_blocks_high_lookup[bsize];
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int offset = mi_row * cm->mode_info_stride + mi_col;
-
- xd->mode_info_stride = cm->mode_info_stride;
+ const int tile_offset = tile->mi_row_start * cm->mode_info_stride +
+ tile->mi_col_start;
xd->mi_8x8 = cm->mi_grid_visible + offset;
xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset;
// we are using the mode info context stream here
- xd->mi_8x8[0] = xd->mi_stream;
+ xd->mi_8x8[0] = xd->mi_stream + offset - tile_offset;
xd->mi_8x8[0]->mbmi.sb_type = bsize;
- ++xd->mi_stream;
// Special case: if prev_mi is NULL, the previous mode info context
// cannot be used.
@@ -768,9 +760,10 @@
}
static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd,
- int tile_col) {
+ int tile_row, int tile_col) {
int i;
- xd->mi_stream = pbi->mi_streams[tile_col];
+ const int tile_cols = 1 << pbi->common.log2_tile_cols;
+ xd->mi_stream = pbi->mi_streams[tile_row * tile_cols + tile_col];
for (i = 0; i < MAX_MB_PLANE; ++i) {
xd->above_context[i] = pbi->above_context[i];
@@ -874,77 +867,68 @@
return size;
}
-static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
- vp9_reader residual_bc;
+typedef struct TileBuffer {
+ const uint8_t *data;
+ size_t size;
+} TileBuffer;
+static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
-
- const uint8_t *const data_end = pbi->source + pbi->source_sz;
- const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
+ TileBuffer tile_buffers[4][1 << 6];
int tile_row, tile_col;
+ const uint8_t *const data_end = pbi->source + pbi->source_sz;
+ const uint8_t *end = NULL;
+ vp9_reader r;
+
+ assert(tile_rows <= 4);
+ assert(tile_cols <= (1 << 6));
// Note: this memset assumes above_context[0], [1] and [2]
// are allocated as part of the same buffer.
vpx_memset(pbi->above_context[0], 0,
- sizeof(*pbi->above_context[0]) * MAX_MB_PLANE *
- 2 * aligned_mi_cols);
+ sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols);
vpx_memset(pbi->above_seg_context, 0,
- sizeof(*pbi->above_seg_context) * aligned_mi_cols);
+ sizeof(*pbi->above_seg_context) * aligned_cols);
- if (pbi->oxcf.inv_tile_order) {
- const uint8_t *data_ptr2[4][1 << 6];
- vp9_reader bc_bak = {0};
-
- // pre-initialize the offsets, we're going to decode in inverse order
- data_ptr2[0][0] = data;
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- const int last_tile =
- tile_row == tile_rows - 1 && tile_col == tile_cols - 1;
- const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
- data_ptr2[tile_row][tile_col] = data;
- data += size;
- }
- }
-
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = tile_cols - 1; tile_col >= 0; tile_col--) {
- TileInfo tile;
-
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- setup_token_decoder(data_ptr2[tile_row][tile_col], data_end,
- data_end - data_ptr2[tile_row][tile_col],
- &cm->error, &residual_bc);
- setup_tile_context(pbi, xd, tile_col);
- decode_tile(pbi, &tile, &residual_bc);
- if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1)
- bc_bak = residual_bc;
- }
- }
- residual_bc = bc_bak;
- } else {
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- const int last_tile =
- tile_row == tile_rows - 1 && tile_col == tile_cols - 1;
- const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
- TileInfo tile;
-
- vp9_tile_init(&tile, cm, tile_row, tile_col);
-
- setup_token_decoder(data, data_end, size, &cm->error, &residual_bc);
- setup_tile_context(pbi, xd, tile_col);
- decode_tile(pbi, &tile, &residual_bc);
- data += size;
- }
+ // Load tile data into tile_buffers
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ const int last_tile = tile_row == tile_rows - 1 &&
+ tile_col == tile_cols - 1;
+ const size_t size = get_tile(data_end, last_tile, &cm->error, &data);
+ TileBuffer *const buf = &tile_buffers[tile_row][tile_col];
+ buf->data = data;
+ buf->size = size;
+ data += size;
}
}
- return vp9_reader_find_end(&residual_bc);
+ // Decode tiles using data from tile_buffers
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ const int col = pbi->oxcf.inv_tile_order ? tile_cols - tile_col - 1
+ : tile_col;
+ const int last_tile = tile_row == tile_rows - 1 &&
+ col == tile_cols - 1;
+ const TileBuffer *const buf = &tile_buffers[tile_row][col];
+ TileInfo tile;
+
+ vp9_tile_init(&tile, cm, tile_row, col);
+ setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r);
+ setup_tile_context(pbi, xd, tile_row, col);
+ decode_tile(pbi, &tile, &r);
+
+ if (last_tile)
+ end = vp9_reader_find_end(&r);
+ }
+ }
+
+ return end;
}
static int tile_worker_hook(void *arg1, void *arg2) {
@@ -1023,7 +1007,7 @@
setup_token_decoder(data, data_end, size, &cm->error,
&tile_data->bit_reader);
- setup_tile_context(pbi, &tile_data->xd, tile_col);
+ setup_tile_context(pbi, &tile_data->xd, 0, tile_col);
worker->had_error = 0;
if (i == num_workers - 1 || tile_col == tile_cols - 1) {
@@ -1227,7 +1211,7 @@
for (i = 0; i < PARTITION_TYPES - 1; ++i)
vp9_diff_update_prob(&r, &fc->partition_prob[j][i]);
- read_mv_probs(&r, nmvc, cm->allow_high_precision_mv);
+ read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
}
return vp9_reader_has_error(&r);
@@ -1323,7 +1307,7 @@
}
}
- alloc_tile_storage(pbi, tile_cols);
+ alloc_tile_storage(pbi, tile_rows, tile_cols);
xd->mi_8x8 = cm->mi_grid_visible;
xd->mode_info_stride = cm->mode_info_stride;
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 87bd36c..e1978d4 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -169,10 +169,8 @@
const unsigned int num_events[/* n */]) {
int i = 0;
- vp9_tree_probs_from_distribution(tree, bct, num_events, 0);
- n--;
-
- for (i = 0; i < n; ++i)
+ vp9_tree_probs_from_distribution(tree, bct, num_events);
+ for (i = 0; i < n - 1; ++i)
vp9_cond_prob_diff_update(w, &Pcur[i], bct[i]);
}
@@ -231,7 +229,7 @@
int i, j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) {
vp9_tree_probs_from_distribution(vp9_switchable_interp_tree, branch_ct,
- cm->counts.switchable_interp[j], 0);
+ cm->counts.switchable_interp[j]);
for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
vp9_cond_prob_diff_update(w, &cm->fc.switchable_interp_prob[j][i],
@@ -250,7 +248,7 @@
for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
unsigned int branch_ct[INTER_MODES - 1][2];
vp9_tree_probs_from_distribution(vp9_inter_mode_tree, branch_ct,
- cm->counts.inter_mode[i], NEARESTMV);
+ cm->counts.inter_mode[i]);
for (j = 0; j < INTER_MODES - 1; ++j)
vp9_cond_prob_diff_update(w, &cm->fc.inter_mode_probs[i][j],
@@ -321,7 +319,7 @@
const vp9_prob *p) {
assert(is_inter_mode(mode));
write_token(w, vp9_inter_mode_tree, p,
- &vp9_inter_mode_encodings[inter_mode_offset(mode)]);
+ &vp9_inter_mode_encodings[INTER_OFFSET(mode)]);
}
@@ -448,7 +446,7 @@
if (bsize >= BLOCK_8X8) {
write_sb_mv_ref(bc, mode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
- [inter_mode_offset(mode)];
+ [INTER_OFFSET(mode)];
}
}
@@ -471,7 +469,7 @@
const MB_PREDICTION_MODE blockmode = m->bmi[j].as_mode;
write_sb_mv_ref(bc, blockmode, mv_ref_p);
++cm->counts.inter_mode[mi->mode_context[rf]]
- [inter_mode_offset(blockmode)];
+ [INTER_OFFSET(blockmode)];
if (blockmode == NEWMV) {
#ifdef ENTROPY_STATS
@@ -703,7 +701,7 @@
continue;
vp9_tree_probs_from_distribution(vp9_coef_tree,
coef_branch_ct[i][j][k][l],
- coef_counts[i][j][k][l], 0);
+ coef_counts[i][j][k][l]);
coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][0][0];
for (m = 0; m < UNCONSTRAINED_NODES; ++m)
@@ -1217,7 +1215,7 @@
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileInfo tile;
- vp9_tile_init(&tile, cm, 0, tile_col);
+ vp9_tile_init(&tile, cm, tile_row, tile_col);
tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a45299b..702fc70 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1765,7 +1765,7 @@
}
static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp, int *totalrate) {
+ int mi_row, TOKENEXTRA **tp) {
VP9_COMMON * const cm = &cpi->common;
int mi_col;
@@ -1910,7 +1910,6 @@
MACROBLOCK * const x = &cpi->mb;
VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD * const xd = &x->e_mbd;
- int totalrate;
// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
// cpi->common.current_video_frame, cpi->common.show_frame,
@@ -1926,8 +1925,6 @@
}
#endif
- totalrate = 0;
-
vp9_zero(cm->counts.switchable_interp);
vp9_zero(cpi->tx_stepdown_count);
@@ -1989,7 +1986,7 @@
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += 8)
- encode_sb_row(cpi, &tile, mi_row, &tp, &totalrate);
+ encode_sb_row(cpi, &tile, mi_row, &tp);
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@@ -2015,10 +2012,6 @@
cpi->sf.skip_encode_frame = 0;
}
- // 256 rate units to the bit,
- // projected_frame_size in units of BYTES
- cpi->projected_frame_size = totalrate >> 8;
-
#if 0
// Keep record of the total distortion this time around for future use
cpi->last_frame_distortion = cpi->frame_distortion;
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index e2c6c4c..030ca64 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -155,9 +155,8 @@
unsigned int (*branch_ct_class0_hp)[2],
unsigned int (*branch_ct_hp)[2]) {
int i, j, k;
- vp9_tree_probs_from_distribution(vp9_mv_joint_tree,
- branch_ct_joint,
- nmv_count->joints, 0);
+ vp9_tree_probs_from_distribution(vp9_mv_joint_tree, branch_ct_joint,
+ nmv_count->joints);
for (i = 0; i < 2; ++i) {
const uint32_t s0 = nmv_count->comps[i].sign[0];
const uint32_t s1 = nmv_count->comps[i].sign[1];
@@ -166,10 +165,10 @@
branch_ct_sign[i][1] = s1;
vp9_tree_probs_from_distribution(vp9_mv_class_tree,
branch_ct_classes[i],
- nmv_count->comps[i].classes, 0);
+ nmv_count->comps[i].classes);
vp9_tree_probs_from_distribution(vp9_mv_class0_tree,
branch_ct_class0[i],
- nmv_count->comps[i].class0, 0);
+ nmv_count->comps[i].class0);
for (j = 0; j < MV_OFFSET_BITS; ++j) {
const uint32_t b0 = nmv_count->comps[i].bits[j][0];
const uint32_t b1 = nmv_count->comps[i].bits[j][1];
@@ -182,11 +181,11 @@
for (k = 0; k < CLASS0_SIZE; ++k) {
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
branch_ct_class0_fp[i][k],
- nmv_count->comps[i].class0_fp[k], 0);
+ nmv_count->comps[i].class0_fp[k]);
}
vp9_tree_probs_from_distribution(vp9_mv_fp_tree,
branch_ct_fp[i],
- nmv_count->comps[i].fp, 0);
+ nmv_count->comps[i].fp);
}
if (usehp) {
for (i = 0; i < 2; ++i) {
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index fca7525..6222394 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -22,7 +22,7 @@
extern int enc_debug;
#endif
-void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
+void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
@@ -30,58 +30,44 @@
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, rc, eob;
- int zbins[2], nzbins[2], zbin;
- int x, y, z, sz;
- int zero_flag = n_coeffs;
+ int i, non_zero_count = count, eob = -1;
+ const int zbins[2] = { zbin_ptr[0] + zbin_oq_value,
+ zbin_ptr[1] + zbin_oq_value };
+ const int nzbins[2] = { zbins[0] * -1,
+ zbins[1] * -1 };
- vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
- vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
- eob = -1;
-
- // Base ZBIN
- zbins[0] = zbin_ptr[0] + zbin_oq_value;
- zbins[1] = zbin_ptr[1] + zbin_oq_value;
- nzbins[0] = zbins[0] * -1;
- nzbins[1] = zbins[1] * -1;
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
if (!skip_block) {
// Pre-scan pass
- for (i = n_coeffs - 1; i >= 0; i--) {
- rc = scan[i];
- z = coeff_ptr[rc];
+ for (i = count - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
- if (z < zbins[rc != 0] && z > nzbins[rc != 0]) {
- zero_flag--;
- } else {
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
break;
- }
}
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
- for (i = 0; i < zero_flag; i++) {
- rc = scan[i];
- z = coeff_ptr[rc];
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- zbin = (zbins[rc != 0]);
+ if (abs_coeff >= zbins[rc != 0]) {
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+ quant_shift_ptr[rc != 0]) >> 16; // quantization
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz;
-
- if (x >= zbin) {
- x += (round_ptr[rc != 0]);
- x = clamp(x, INT16_MIN, INT16_MAX);
- y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
- quant_shift_ptr[rc != 0]) >> 16; // quantize (x)
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0]; // dequantized value
-
- if (y) {
- eob = i; // last nonzero coeffs
- }
+ if (tmp)
+ eob = i;
}
}
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 993919e..6aaa2c7 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -268,10 +268,10 @@
MB_PREDICTION_MODE m;
for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
- cpi->mb.inter_mode_cost[i][inter_mode_offset(m)] =
+ cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] =
cost_token(vp9_inter_mode_tree,
cm->fc.inter_mode_probs[i],
- &vp9_inter_mode_encodings[inter_mode_offset(m)]);
+ &vp9_inter_mode_encodings[INTER_OFFSET(m)]);
}
}
}
@@ -609,7 +609,7 @@
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
- if (plane == 0)
+ if (!xd->lossless && plane == 0)
x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
args->this_rate += args->rate;
@@ -1416,7 +1416,7 @@
// Don't account for mode here if segment skip is enabled.
if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
assert(is_inter_mode(mode));
- return x->inter_mode_cost[mode_context][inter_mode_offset(mode)];
+ return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
} else {
return 0;
}
@@ -1707,7 +1707,7 @@
const struct buf_2d orig_src = x->plane[0].src;
struct buf_2d orig_pre[2];
- mode_idx = inter_mode_offset(this_mode);
+ mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
// if we're near/nearest and mv == 0,0, compare to zeromv
@@ -2002,7 +2002,7 @@
return;
}
- mode_idx = inter_mode_offset(mode_selected);
+ mode_idx = INTER_OFFSET(mode_selected);
vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
@@ -2078,7 +2078,7 @@
return INT64_MAX;
/* set it to the best */
for (i = 0; i < 4; i++) {
- mode_idx = inter_mode_offset(bsi->modes[i]);
+ mode_idx = INTER_OFFSET(bsi->modes[i]);
mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
if (has_second_ref(mbmi))
mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm
index 533456b..1a9e4e8 100644
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -118,6 +118,14 @@
RET
%endmacro
+%macro INC_SRC_BY_SRC_STRIDE 0
+%if ARCH_X86=1 && CONFIG_PIC=1
+ add srcq, src_stridemp
+%else
+ add srcq, src_strideq
+%endif
+%endmacro
+
%macro SUBPEL_VARIANCE 1-2 0 ; W
%if cpuflag(ssse3)
%define bilin_filter_m bilin_filter_m_ssse3
@@ -129,41 +137,85 @@
; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%ifdef PIC
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
-%define sec_str sec_strideq
+
+%ifdef PIC ; 64bit PIC
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse
+ %define sec_str sec_strideq
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse
+ %endif
+ %define h heightd
+ %define bilin_filter sseq
%else
-cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, y_offset, \
- dst, dst_stride, height, sse
+ %if ARCH_X86=1 && CONFIG_PIC=1
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, \
+ height, sse, g_bilin_filter, g_pw_8
+ %define h dword heightm
+ %define sec_str sec_stridemp
+
+ ;Store bilin_filter and pw_8 location in stack
+ GET_GOT eax
+ add esp, 4 ; restore esp
+
+ lea ecx, [GLOBAL(bilin_filter_m)]
+ mov g_bilin_filterm, ecx
+
+ lea ecx, [GLOBAL(pw_8)]
+ mov g_pw_8m, ecx
+
+ LOAD_IF_USED 0, 1 ; load eax, ecx back
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse, \
+ g_bilin_filter, g_pw_8
+ %define h heightd
+
+ ;Store bilin_filter and pw_8 location in stack
+ GET_GOT eax
+ add esp, 4 ; restore esp
+
+ lea ecx, [GLOBAL(bilin_filter_m)]
+ mov g_bilin_filterm, ecx
+
+ lea ecx, [GLOBAL(pw_8)]
+ mov g_pw_8m, ecx
+
+ LOAD_IF_USED 0, 1 ; load eax, ecx back
+ %endif
+ %else
+ %if %2 == 1 ; avg
+ cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
+ 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, \
+ height, sse
+ %if ARCH_X86_64
+ %define h heightd
+ %define sec_str sec_strideq
+ %else
+ %define h dword heightm
+ %define sec_str sec_stridemp
+ %endif
+ %else
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, height, sse
+ %define h heightd
+ %endif
+
+ %define bilin_filter bilin_filter_m
+ %endif
%endif
-%define h heightd
-%define bilin_filter sseq
-%else
-%if %2 == 1 ; avg
-cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
-%if ARCH_X86_64
-%define h heightd
-%define sec_str sec_strideq
-%else
-%define h dword heightm
-%define sec_str sec_stridemp
-%endif
-%else
-cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, y_offset, \
- dst, dst_stride, height, sse
-%define h heightd
-%endif
-%define bilin_filter bilin_filter_m
-%endif
+
ASSERT %1 <= 16 ; m6 overflows if w > 16
pxor m6, m6 ; sum
pxor m7, m7 ; sse
@@ -329,11 +381,22 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0, reuse x_offset reg
+%define tempq x_offsetq
+ add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
+%else
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
.x_zero_y_other_loop:
%if %1 == 16
movu m0, [srcq]
@@ -615,12 +678,23 @@
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
+%else ;x86_32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; x_offset == 0.5. We can reuse x_offset reg
+%define tempq x_offsetq
+ add y_offsetq, g_bilin_filterm
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
%if %1 == 16
movu m0, [srcq]
movu m3, [srcq+1]
@@ -752,12 +826,23 @@
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+;y_offset == 0. We can reuse y_offset reg.
+%define tempq y_offsetq
+ add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
.x_other_y_zero_loop:
%if %1 == 16
movu m0, [srcq]
@@ -873,12 +958,23 @@
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; y_offset == 0.5. We can reuse y_offset reg.
+%define tempq y_offsetq
+ add x_offsetq, g_bilin_filterm
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
%if %1 == 16
movu m0, [srcq]
movu m1, [srcq+1]
@@ -1057,6 +1153,21 @@
%define filter_y_a m10
%define filter_y_b m11
%define filter_rnd m12
+%else ; x86-32
+%if ARCH_X86=1 && CONFIG_PIC=1
+; In this case, there is NO unused register. Used src_stride register. Later,
+; src_stride has to be loaded from stack when it is needed.
+%define tempq src_strideq
+ mov tempq, g_bilin_filterm
+ add x_offsetq, tempq
+ add y_offsetq, tempq
+%define filter_x_a [x_offsetq]
+%define filter_x_b [x_offsetq+16]
+%define filter_y_a [y_offsetq]
+%define filter_y_b [y_offsetq+16]
+
+ mov tempq, g_pw_8m
+%define filter_rnd [tempq]
%else
add x_offsetq, bilin_filter
add y_offsetq, bilin_filter
@@ -1066,6 +1177,8 @@
%define filter_y_b [y_offsetq+16]
%define filter_rnd [pw_8]
%endif
+%endif
+
; x_offset == bilin interpolation && y_offset == bilin interpolation
%if %1 == 16
movu m0, [srcq]
@@ -1093,7 +1206,9 @@
%endif
psraw m0, 4
psraw m2, 4
- add srcq, src_strideq
+
+ INC_SRC_BY_SRC_STRIDE
+
packuswb m0, m2
.x_other_y_other_loop:
%if cpuflag(ssse3)
@@ -1163,7 +1278,7 @@
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
- add srcq, src_strideq
+ INC_SRC_BY_SRC_STRIDE
add dstq, dst_strideq
%else ; %1 < 16
movh m0, [srcq]
@@ -1184,12 +1299,17 @@
%if cpuflag(ssse3)
packuswb m0, m0
%endif
- add srcq, src_strideq
+
+ INC_SRC_BY_SRC_STRIDE
+
.x_other_y_other_loop:
movh m2, [srcq]
movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+
+ INC_SRC_BY_SRC_STRIDE
+ movh m4, [srcq]
+ movh m3, [srcq+1]
+
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
@@ -1253,7 +1373,7 @@
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
- lea srcq, [srcq+src_strideq*2]
+ INC_SRC_BY_SRC_STRIDE
lea dstq, [dstq+dst_strideq*2]
%endif
%if %2 == 1 ; avg
diff --git a/vpxdec.c b/vpxdec.c
index 8e575e1..110e4ac 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -33,21 +33,6 @@
#include "nestegg/include/nestegg/nestegg.h"
#include "third_party/libyuv/include/libyuv/scale.h"
-#if CONFIG_OS_SUPPORT
-#if defined(_MSC_VER)
-#include <io.h>
-#define snprintf _snprintf
-#define isatty _isatty
-#define fileno _fileno
-#else
-#include <unistd.h>
-#endif
-#endif
-
-#ifndef PATH_MAX
-#define PATH_MAX 256
-#endif
-
static const char *exec_name;
static const struct {
diff --git a/vpxenc.c b/vpxenc.c
index df75b85..f0f0df3 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -49,31 +49,6 @@
#include "webmenc.h"
#include "y4minput.h"
-
-/* Need special handling of these functions on Windows */
-#if defined(_MSC_VER)
-/* MSVS doesn't define off_t, and uses _f{seek,tell}i64 */
-typedef __int64 off_t;
-#define fseeko _fseeki64
-#define ftello _ftelli64
-#elif defined(_WIN32)
-/* MinGW defines off_t as long
- and uses f{seek,tell}o64/off64_t for large files */
-#define fseeko fseeko64
-#define ftello ftello64
-#define off_t off64_t
-#endif
-
-#define LITERALU64(hi,lo) ((((uint64_t)hi)<<32)|lo)
-
-/* We should use 32-bit file operations in WebM file format
- * when building ARM executable file (.axf) with RVCT */
-#if !CONFIG_OS_SUPPORT
-typedef long off_t;
-#define fseeko fseek
-#define ftello ftell
-#endif
-
/* Swallow warnings about unused results of fread/fwrite */
static size_t wrap_fread(void *ptr, size_t size, size_t nmemb,
FILE *stream) {
@@ -1409,17 +1384,7 @@
} else
fatal("Unsupported Y4M stream.");
} else if (input->detect.buf_read == 4 && file_is_ivf(input, &fourcc)) {
- input->file_type = FILE_TYPE_IVF;
- switch (fourcc) {
- case 0x32315659:
- input->use_i420 = 0;
- break;
- case 0x30323449:
- input->use_i420 = 1;
- break;
- default:
- fatal("Unsupported fourcc (%08x) in IVF", fourcc);
- }
+ fatal("IVF is not supported as input.");
} else {
input->file_type = FILE_TYPE_RAW;
}
@@ -1433,7 +1398,7 @@
}
static struct stream_state *new_stream(struct global_config *global,
- struct stream_state *prev) {
+ struct stream_state *prev) {
struct stream_state *stream;
stream = calloc(1, sizeof(*stream));
diff --git a/webmenc.c b/webmenc.c
index a584e9d..17bbeec 100644
--- a/webmenc.c
+++ b/webmenc.c
@@ -15,20 +15,6 @@
#include "third_party/libmkv/EbmlWriter.h"
#include "third_party/libmkv/EbmlIDs.h"
-#if defined(_MSC_VER)
-/* MSVS uses _f{seek,tell}i64 */
-#define fseeko _fseeki64
-#define ftello _ftelli64
-#elif defined(_WIN32)
-/* MinGW defines off_t as long, and uses f{seek,tell}o64/off64_t for large
- * files */
-#define fseeko fseeko64
-#define ftello ftello64
-#define off_t off64_t
-#endif
-
-#define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo)
-
void Ebml_Write(struct EbmlGlobal *glob,
const void *buffer_in,
unsigned long len) {