Merge "Allocate tile data adaptively to accommodate the frame size increase"
diff --git a/build/make/Android.mk b/build/make/Android.mk
index 0add523..e971c9d 100644
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -163,6 +163,7 @@
endif
# Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
ifeq ($(CONFIG_VP8), yes)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
endif
@@ -175,6 +176,9 @@
ifeq ($(TARGET_ARCH_ABI),x86)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
endif
+endef
+
+$(eval $(call rtcd_dep_template))
.PHONY: clean
clean:
diff --git a/libs.mk b/libs.mk
index 0f87a8a..0ca8379 100644
--- a/libs.mk
+++ b/libs.mk
@@ -373,6 +373,7 @@
TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@@ -486,7 +487,6 @@
$(LIBVPX_TEST_OBJS) \
-L. -lvpx -lgtest $(extralibs) -lm))
-TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
diff --git a/test/android/Android.mk b/test/android/Android.mk
index af85634..48872a2 100644
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -51,4 +51,6 @@
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
+# some test files depend on *_rtcd.h, ensure they're generated first.
+$(eval $(call rtcd_dep_template))
include $(BUILD_EXECUTABLE)
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index a684ea4..d2687b2 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -16,6 +16,7 @@
#include "./vp9_rtcd.h"
#include "test/acm_random.h"
+#include "test/clear_system_state.h"
#include "test/md5_helper.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
@@ -66,6 +67,7 @@
for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
pred_funcs[k](src, kBPS, above, left);
}
+ libvpx_test::ClearSystemState();
vpx_usec_timer_mark(&timer);
const int elapsed_time =
static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
@@ -211,6 +213,14 @@
NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
+ vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
+ vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
+ vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_4x4_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 8x8
@@ -256,6 +266,14 @@
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
+ vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
+ vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
+ vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_8x8_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 16x16
@@ -299,6 +317,14 @@
vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
+ vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
+ vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
+ vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_16x16_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 32x32
@@ -311,17 +337,20 @@
vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
#if HAVE_SSE2
+#if ARCH_X86_64
INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
vp9_dc_left_predictor_32x32_sse2,
vp9_dc_top_predictor_32x32_sse2,
vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-#if ARCH_X86_64
- vp9_tm_predictor_32x32_sse2
+ vp9_tm_predictor_32x32_sse2)
#else
- NULL
-#endif
- )
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
+ vp9_dc_left_predictor_32x32_sse2,
+ vp9_dc_top_predictor_32x32_sse2,
+ vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif // ARCH_X86_64
#endif // HAVE_SSE2
#if HAVE_SSSE3
@@ -337,4 +366,12 @@
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_32x32_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
+ vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
+ vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
+ vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_32x32_msa)
+#endif // HAVE_MSA
+
#include "test/test_libvpx.cc"
diff --git a/vp9/common/mips/msa/vp9_intra_predict_msa.c b/vp9/common/mips/msa/vp9_intra_predict_msa.c
new file mode 100644
index 0000000..2fc6105
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_intra_predict_msa.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) { \
+ out0 = __msa_subs_u_h(out0, in0); \
+ out1 = __msa_subs_u_h(out1, in1); \
+}
+
+static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t src_data;
+
+ src_data = LW(src);
+
+ SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
+}
+
+static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint32_t src_data1, src_data2;
+
+ src_data1 = LW(src);
+ src_data2 = LW(src + 4);
+
+ for (row = 8; row--;) {
+ SW(src_data1, dst);
+ SW(src_data2, (dst + 4));
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src0;
+
+ src0 = LD_UB(src);
+
+ for (row = 16; row--;) {
+ ST_UB(src0, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src1, src2;
+
+ src1 = LD_UB(src);
+ src2 = LD_UB(src + 16);
+
+ for (row = 32; row--;) {
+ ST_UB2(src1, src2, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t out0, out1, out2, out3;
+
+ out0 = src[0] * 0x01010101;
+ out1 = src[1] * 0x01010101;
+ out2 = src[2] * 0x01010101;
+ out3 = src[3] * 0x01010101;
+
+ SW4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ out0 = src[0] * 0x0101010101010101ull;
+ out1 = src[1] * 0x0101010101010101ull;
+ out2 = src[2] * 0x0101010101010101ull;
+ out3 = src[3] * 0x0101010101010101ull;
+ out4 = src[4] * 0x0101010101010101ull;
+ out5 = src[5] * 0x0101010101010101ull;
+ out6 = src[6] * 0x0101010101010101ull;
+ out7 = src[7] * 0x0101010101010101ull;
+
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out4, out5, out6, out7, dst, dst_stride);
+}
+
+static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 4; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 8; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB2(src0, src0, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src1, src1, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src2, src2, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src3, src3, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val0, val1;
+ v16i8 store, src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LW(src_top);
+ val1 = LW(src_left);
+ INSERT_W2_SB(val0, val1, src);
+ sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t val0;
+ v16i8 store, data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+
+ val0 = LW(src);
+ data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
+ sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val0, val1;
+ v16i8 store;
+ v16u8 src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src_top);
+ val1 = LD(src_left);
+ INSERT_D2_UB(val0, val1, src);
+ sum_h = __msa_hadd_u_h(src, src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t val0;
+ v16i8 store;
+ v16u8 data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src);
+ data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
+ uint64_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(out, out, out, out, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ v16u8 top, left, out;
+ v8u16 sum_h, sum_top, sum_left;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ top = LD_UB(src_top);
+ left = LD_UB(src_left);
+ HADD_UB2_UH(top, left, sum_top, sum_left);
+ sum_h = sum_top + sum_left;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ v16u8 data, out;
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ data = LD_UB(src);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ v16u8 top0, top1, left0, left1, out;
+ v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src_top, 16, top0, top1);
+ LD_UB2(src_left, 16, left0, left1);
+ HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
+ HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
+ sum_h = sum_top0 + sum_top1;
+ sum_h += sum_left0 + sum_left1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 data0, data1, out;
+ v8u16 sum_h, sum_data0, sum_data1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src, 16, data0, data1);
+ HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
+ sum_h = sum_data0 + sum_data1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v16u8 src0, src1, src2, src3;
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+ val = LW(src_top_ptr);
+ src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val);
+
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+ v16u8 src0, src1, src2, src3;
+
+ val = LD(src_top_ptr);
+ src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 2; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r, res_l;
+
+ src_top = LD_SB(src_top_ptr);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 4; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
+
+ LD_SB2(src_top, 16, src_top0, src_top1);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 8; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+ }
+}
+
+void vp9_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_32x32_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_4x4_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_8x8_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_16x16_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_32x32_msa(dst, y_stride);
+}
+
+void vp9_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_32x32_msa(above, left, dst, y_stride);
+}
diff --git a/vp9/common/mips/msa/vp9_macros_msa.h b/vp9/common/mips/msa/vp9_macros_msa.h
index 3751e35..f1217d5a 100644
--- a/vp9/common/mips/msa/vp9_macros_msa.h
+++ b/vp9/common/mips/msa/vp9_macros_msa.h
@@ -743,6 +743,26 @@
CLIP_SH2_0_255(in2, in3); \
}
+/* Description : Horizontal addition of unsigned byte vector elements
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each unsigned odd byte element from 'in0' is added to
+ even unsigned byte element from 'in0' (pairwise) and the
+ halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
+ out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
+}
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
+
+#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) { \
+ HADD_UB2(RTYPE, in0, in1, out0, out1); \
+ HADD_UB2(RTYPE, in2, in3, out2, out3); \
+}
+#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
+
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
@@ -755,6 +775,19 @@
}
#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
+/* Description : Insert specified double word elements from input vectors to 1
+ destination vector
+ Arguments : Inputs - in0, in1 (2 input vectors)
+ Outputs - out (output vector)
+ Return Type - as per RTYPE
+*/
+#define INSERT_D2(RTYPE, in0, in1, out) { \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
+}
+#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
+#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
+
/* Description : Interleave even byte elements from vectors
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index e53e15d..53ae921 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -50,16 +50,16 @@
} b_mode_info;
// Note that the rate-distortion optimization loop, bit-stream writer, and
-// decoder implementation modules critically rely on the enum entry values
+// decoder implementation modules critically rely on the defined entry values
// specified herein. They should be refactored concurrently.
-typedef enum {
- NONE = -1,
- INTRA_FRAME = 0,
- LAST_FRAME = 1,
- GOLDEN_FRAME = 2,
- ALTREF_FRAME = 3,
- MAX_REF_FRAMES = 4
-} MV_REFERENCE_FRAME;
+
+#define NONE -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#define MAX_REF_FRAMES 4
+typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
typedef struct {
@@ -75,12 +75,17 @@
PREDICTION_MODE uv_mode;
// Only for INTER blocks
+ INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
+
+ // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
+
+#if CONFIG_VP9_ENCODER
+ // TODO(slavarnway): Move to encoder
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES];
- INTERP_FILTER interp_filter;
-
+#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h
index 0482025..d089f23 100644
--- a/vp9/common/vp9_enums.h
+++ b/vp9/common/vp9_enums.h
@@ -12,6 +12,7 @@
#define VP9_COMMON_VP9_ENUMS_H_
#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
@@ -40,23 +41,22 @@
MAX_PROFILES
} BITSTREAM_PROFILE;
-typedef enum BLOCK_SIZE {
- BLOCK_4X4,
- BLOCK_4X8,
- BLOCK_8X4,
- BLOCK_8X8,
- BLOCK_8X16,
- BLOCK_16X8,
- BLOCK_16X16,
- BLOCK_16X32,
- BLOCK_32X16,
- BLOCK_32X32,
- BLOCK_32X64,
- BLOCK_64X32,
- BLOCK_64X64,
- BLOCK_SIZES,
- BLOCK_INVALID = BLOCK_SIZES
-} BLOCK_SIZE;
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#define BLOCK_SIZES 13
+#define BLOCK_INVALID BLOCK_SIZES
+typedef uint8_t BLOCK_SIZE;
typedef enum PARTITION_TYPE {
PARTITION_NONE,
@@ -72,13 +72,12 @@
#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
// block transform size
-typedef enum {
- TX_4X4 = 0, // 4x4 transform
- TX_8X8 = 1, // 8x8 transform
- TX_16X16 = 2, // 16x16 transform
- TX_32X32 = 3, // 32x32 transform
- TX_SIZES
-} TX_SIZE;
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0) // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1) // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2) // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
// frame transform mode
typedef enum {
@@ -110,23 +109,22 @@
PLANE_TYPES
} PLANE_TYPE;
-typedef enum {
- DC_PRED, // Average of above and left pixels
- V_PRED, // Vertical
- H_PRED, // Horizontal
- D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
- D135_PRED, // Directional 135 deg = 180 - 45
- D117_PRED, // Directional 117 deg = 180 - 63
- D153_PRED, // Directional 153 deg = 180 - 27
- D207_PRED, // Directional 207 deg = 180 + 27
- D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
- TM_PRED, // True-motion
- NEARESTMV,
- NEARMV,
- ZEROMV,
- NEWMV,
- MB_MODE_COUNT
-} PREDICTION_MODE;
+#define DC_PRED 0 // Average of above and left pixels
+#define V_PRED 1 // Vertical
+#define H_PRED 2 // Horizontal
+#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4 // Directional 135 deg = 180 - 45
+#define D117_PRED 5 // Directional 117 deg = 180 - 63
+#define D153_PRED 6 // Directional 153 deg = 180 - 27
+#define D207_PRED 7 // Directional 207 deg = 180 + 27
+#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9 // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#define MB_MODE_COUNT 14
+typedef uint8_t PREDICTION_MODE;
#define INTRA_MODES (TM_PRED + 1)
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index afcdf22..b256d4a 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -12,7 +12,8 @@
#include "vp9/common/vp9_filter.h"
-const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, const InterpKernel,
+ vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index 808a270..13d38af 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -27,17 +27,16 @@
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
#define SUBPEL_TAPS 8
-typedef enum {
- EIGHTTAP = 0,
- EIGHTTAP_SMOOTH = 1,
- EIGHTTAP_SHARP = 2,
- SWITCHABLE_FILTERS = 3, /* Number of switchable filters */
- BILINEAR = 3,
- // The codec can operate in four possible inter prediction filter mode:
- // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
- SWITCHABLE_FILTER_CONTEXTS = SWITCHABLE_FILTERS + 1,
- SWITCHABLE = 4 /* should be the last one */
-} INTERP_FILTER;
+#define EIGHTTAP 0
+#define EIGHTTAP_SMOOTH 1
+#define EIGHTTAP_SHARP 2
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#define BILINEAR 3
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#define SWITCHABLE 4 /* should be the last one */
+typedef uint8_t INTERP_FILTER;
typedef int16_t InterpKernel[SUBPEL_TAPS];
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 484e457..9816728 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -267,8 +267,8 @@
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
int lvl_seg = default_filt_lvl;
- if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
- const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+ if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+ const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
data : default_filt_lvl + data,
0, MAX_LOOP_FILTER);
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index ce69527..5f8ee0f 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -18,7 +18,8 @@
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int block, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -138,7 +139,7 @@
Done:
- mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
+ mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
@@ -150,9 +151,10 @@
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
- mi_row, mi_col, sync, data);
+ mi_row, mi_col, sync, data, mode_context);
}
static void lower_mv_precision(MV *mv, int allow_hp) {
@@ -181,7 +183,8 @@
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv) {
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context) {
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *const mi = xd->mi[0];
b_mode_info *bmi = mi->bmi;
@@ -190,7 +193,7 @@
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
- mi_row, mi_col, NULL, NULL);
+ mi_row, mi_col, NULL, NULL, mode_context);
near_mv->as_int = 0;
switch (block) {
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index f1df521..621dc14 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -212,7 +212,8 @@
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data);
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
@@ -223,7 +224,8 @@
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv);
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context);
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 564a3eb..d83f3c1 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -266,8 +266,8 @@
int vp9_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex) {
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
- const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+ if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+ const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ?
data : base_qindex + data;
return clamp(seg_qindex, 0, MAXQ);
diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c
index 3312f29..650f4ad 100644
--- a/vp9/common/vp9_reconintra.c
+++ b/vp9/common/vp9_reconintra.c
@@ -464,15 +464,17 @@
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
const uint8_t above_right = above[bs - 1];
+ const uint8_t *const dst_row0 = dst;
int x, size;
- uint8_t avg[31]; // TODO(jzern): this could be block size specific
(void)left;
for (x = 0; x < bs - 1; ++x) {
- avg[x] = AVG3(above[x], above[x + 1], above[x + 2]);
+ dst[x] = AVG3(above[x], above[x + 1], above[x + 2]);
}
- for (x = 0, size = bs - 1; x < bs; ++x, --size) {
- memcpy(dst, avg + x, size);
+ dst[bs - 1] = above_right;
+ dst += stride;
+ for (x = 1, size = bs - 2; x < bs; ++x, --size) {
+ memcpy(dst, dst_row0 + x, size);
memset(dst + size, above_right, x + 1);
dst += stride;
}
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index bbe200d..5035126 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -66,7 +66,7 @@
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;
@@ -78,22 +78,22 @@
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
@@ -105,7 +105,7 @@
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;
@@ -117,22 +117,22 @@
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_8x8 dspr2 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
@@ -144,7 +144,7 @@
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;
@@ -156,22 +156,22 @@
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_16x16 dspr2 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
@@ -183,7 +183,7 @@
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;
@@ -195,22 +195,22 @@
specialize qw/vp9_d153_predictor_32x32/;
add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_32x32 msa/, "$sse2_x86inc";
#
# Loopfilter
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index 910200e..471e238 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -25,12 +25,6 @@
// the coding mechanism is still subject to change so these provide a
// convenient single point of change.
-int vp9_segfeature_active(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->enabled &&
- (seg->feature_mask[segment_id] & (1 << feature_id));
-}
-
void vp9_clearall_segfeatures(struct segmentation *seg) {
vp9_zero(seg->feature_data);
vp9_zero(seg->feature_mask);
@@ -60,12 +54,6 @@
seg->feature_data[segment_id][feature_id] = seg_data;
}
-int vp9_get_segdata(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->feature_data[segment_id][feature_id];
-}
-
-
const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
2, 4, 6, 8, 10, 12,
0, -1, -2, -3, -4, -5, -6, -7
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index ff2d66a..95c9918 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -49,9 +49,12 @@
unsigned int feature_mask[MAX_SEGMENTS];
};
-int vp9_segfeature_active(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int segfeature_active(const struct segmentation *seg,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->enabled &&
+ (seg->feature_mask[segment_id] & (1 << feature_id));
+}
void vp9_clearall_segfeatures(struct segmentation *seg);
@@ -68,9 +71,10 @@
SEG_LVL_FEATURES feature_id,
int seg_data);
-int vp9_get_segdata(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->feature_data[segment_id][feature_id];
+}
extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 7ce3389..d34926d 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -177,7 +177,7 @@
static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
@@ -307,9 +307,9 @@
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = xd->counts;
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
@@ -444,9 +444,8 @@
static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) !=
- INTRA_FRAME;
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
const int ctx = vp9_get_intra_inter_context(xd);
const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
@@ -473,7 +472,9 @@
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int inter_mode_ctx, ref, is_compound;
+ int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int ref, is_compound;
+ uint8_t inter_mode_ctx[MAX_REF_FRAMES];
read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
@@ -487,13 +488,11 @@
"Reference frame has invalid dimensions");
vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
&ref_buf->sf);
- vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi);
+ vp9_find_mv_refs(cm, xd, tile, mi, frame, ref_mvs[frame],
+ mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
- inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
-
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
@@ -502,12 +501,13 @@
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ mbmi->mode = read_inter_mode(cm, xd, r,
+ inter_mode_ctx[mbmi->ref_frame[0]]);
}
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
+ vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
&nearestmv[ref], &nearmv[ref]);
}
}
@@ -526,13 +526,16 @@
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
- if (b_mode == NEARESTMV || b_mode == NEARMV)
+ if (b_mode == NEARESTMV || b_mode == NEARMV) {
+ uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, j, ref, mi_row, mi_col,
&nearest_sub8x8[ref],
- &near_sub8x8[ref]);
+ &near_sub8x8[ref],
+ dummy_mode_ctx);
+ }
if (!assign_mv(cm, xd, b_mode, block, nearestmv,
nearest_sub8x8, near_sub8x8,
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 71c1e0b..df70d48 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -95,19 +95,6 @@
return 1;
}
-static void adjust_cyclic_refresh_parameters(VP9_COMP *const cpi) {
- const VP9_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- // Adjust some parameters, currently only for low resolutions at low bitrates.
- if (cm->width <= 352 &&
- cm->height <= 288 &&
- rc->avg_frame_bandwidth < 3400) {
- cr->motion_thresh = 4;
- cr->rate_boost_fac = 1.25;
- }
-}
-
// Check if this coding block, of size bsize, should be considered for refresh
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
@@ -435,18 +422,30 @@
cr->sb_index = i;
}
-// Set/update global/frame level cyclic refresh parameters.
+// Set cyclic refresh parameters.
void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
cr->percent_refresh = 10;
+ cr->max_qdelta_perc = 50;
+ cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
- // periods of the refresh cycle, after a key frame. This corresponds to ~40
- // frames with cr->percent_refresh = 10.
- if (rc->frames_since_key < 40)
+ // periods of the refresh cycle, after a key frame.
+ if (rc->frames_since_key < 4 * cr->percent_refresh)
cr->rate_ratio_qdelta = 3.0;
else
cr->rate_ratio_qdelta = 2.0;
+ // Adjust some parameters for low resolutions at low bitrates.
+ if (cm->width <= 352 &&
+ cm->height <= 288 &&
+ rc->avg_frame_bandwidth < 3400) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 1.25;
+ } else {
+ cr->motion_thresh = 32;
+ cr->rate_boost_fac = 1.7;
+ }
}
// Setup cyclic background refresh: set delta q and segmentation map.
@@ -475,9 +474,6 @@
int qindex2;
const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
vp9_clear_system_state();
- cr->max_qdelta_perc = 50;
- cr->time_for_refresh = 0;
- cr->rate_boost_fac = 1.7;
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
@@ -485,9 +481,6 @@
// q will not exceed 457, so (q * q) is within 32bit; see:
// vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[].
cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
- cr->motion_thresh = 32;
-
- adjust_cyclic_refresh_parameters(cpi);
// Set up segmentation.
// Clear down the segment map.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index d20e067..092d265 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -93,7 +93,7 @@
static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, vp9_writer *w) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int skip = mi->mbmi.skip;
@@ -207,10 +207,10 @@
// If segment level coding of this signal is disabled...
// or the segment allows multiple reference frame options
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
assert(!is_compound);
assert(mbmi->ref_frame[0] ==
- vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
+ get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
} else {
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
@@ -264,7 +264,7 @@
skip = write_skip(cm, xd, segment_id, mi, w);
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
@@ -293,7 +293,7 @@
write_ref_frames(cm, xd, w);
// If segment skip is not enabled code the mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_8X8) {
write_inter_mode(w, mode, inter_probs);
}
@@ -787,10 +787,10 @@
for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
- const int active = vp9_segfeature_active(seg, i, j);
+ const int active = segfeature_active(seg, i, j);
vp9_wb_write_bit(wb, active);
if (active) {
- const int data = vp9_get_segdata(seg, i, j);
+ const int data = get_segdata(seg, i, j);
const int data_max = vp9_seg_feature_data_max(j);
if (vp9_is_segfeature_signed(j)) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 7c91da1..f5e3e98 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -370,6 +370,7 @@
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
variance_node node;
+ memset(&node, 0, sizeof(node));
tree_to_node(data, bsize, &node);
sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
@@ -1050,7 +1051,7 @@
if (!output_enabled)
return;
- if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < TX_MODES; i++)
rdc->tx_select_diff[i] += ctx->tx_rd_diff[i];
}
@@ -1247,7 +1248,7 @@
vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
} else {
if (bsize >= BLOCK_8X8) {
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
else
@@ -1290,8 +1291,8 @@
if (!frame_is_intra_only(cm)) {
FRAME_COUNTS *const counts = td->counts;
const int inter_block = is_inter_block(mbmi);
- const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_REF_FRAME);
+ const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_REF_FRAME);
if (!seg_ref_active) {
counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
// If the segment reference feature is enabled we have only a single
@@ -1316,7 +1317,7 @@
}
}
if (inter_block &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
if (bsize >= BLOCK_8X8) {
const PREDICTION_MODE mode = mbmi->mode;
@@ -2848,7 +2849,7 @@
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
}
x->source_variance = UINT_MAX;
@@ -2908,7 +2909,7 @@
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
- if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
return 0;
} else {
return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
@@ -2983,7 +2984,7 @@
if (cm->frame_type == KEY_FRAME)
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
- else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
@@ -3598,7 +3599,7 @@
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
if (seg_skip) {
partition_search_type = FIXED_PARTITION;
}
@@ -4160,8 +4161,8 @@
MODE_INFO **mi_8x8 = xd->mi;
MODE_INFO *mi = mi_8x8[0];
MB_MODE_INFO *mbmi = &mi->mbmi;
- const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
const int mis = cm->mi_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index f801851..3d7843e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -124,8 +124,8 @@
stats->pcnt_motion,
stats->pcnt_second_ref,
stats->pcnt_neutral,
- stats->ul_intra_pct,
- stats->image_start_row,
+ stats->intra_skip_pct,
+ stats->inactive_zone_rows,
stats->MVr,
stats->mvr_abs,
stats->MVc,
@@ -162,8 +162,8 @@
section->pcnt_motion = 0.0;
section->pcnt_second_ref = 0.0;
section->pcnt_neutral = 0.0;
- section->ul_intra_pct = 0.0;
- section->image_start_row = 0.0;
+ section->intra_skip_pct = 0.0;
+ section->inactive_zone_rows = 0.0;
section->MVr = 0.0;
section->mvr_abs = 0.0;
section->MVc = 0.0;
@@ -189,8 +189,8 @@
section->pcnt_motion += frame->pcnt_motion;
section->pcnt_second_ref += frame->pcnt_second_ref;
section->pcnt_neutral += frame->pcnt_neutral;
- section->ul_intra_pct += frame->ul_intra_pct;
- section->image_start_row += frame->image_start_row;
+ section->intra_skip_pct += frame->intra_skip_pct;
+ section->inactive_zone_rows += frame->inactive_zone_rows;
section->MVr += frame->MVr;
section->mvr_abs += frame->mvr_abs;
section->MVc += frame->MVc;
@@ -214,8 +214,8 @@
section->pcnt_motion -= frame->pcnt_motion;
section->pcnt_second_ref -= frame->pcnt_second_ref;
section->pcnt_neutral -= frame->pcnt_neutral;
- section->ul_intra_pct -= frame->ul_intra_pct;
- section->image_start_row -= frame->image_start_row;
+ section->intra_skip_pct -= frame->intra_skip_pct;
+ section->inactive_zone_rows -= frame->inactive_zone_rows;
section->MVr -= frame->MVr;
section->mvr_abs -= frame->mvr_abs;
section->MVc -= frame->MVc;
@@ -487,7 +487,7 @@
int second_ref_count = 0;
const int intrapenalty = INTRA_MODE_PENALTY;
double neutral_count;
- int ul_intra_count = 0;
+ int intra_skip_count = 0;
int image_data_start_row = INVALID_ROW;
int new_mv_count = 0;
int sum_in_vectors = 0;
@@ -655,7 +655,7 @@
// common in animations, graphics and screen content, so may be used
// as a signal to detect these types of content.
if (this_error < UL_INTRA_THRESH) {
- ++ul_intra_count;
+ ++intra_skip_count;
} else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
image_data_start_row = mb_row;
}
@@ -995,8 +995,8 @@
}
// Exclude any image dead zone
if (image_data_start_row > 0) {
- ul_intra_count =
- MAX(0, ul_intra_count - (image_data_start_row * cm->mb_cols * 2));
+ intra_skip_count =
+ MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
}
{
@@ -1023,8 +1023,8 @@
fps.pcnt_inter = (double)intercount / num_mbs;
fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
fps.pcnt_neutral = (double)neutral_count / num_mbs;
- fps.ul_intra_pct = (double)ul_intra_count / num_mbs;
- fps.image_start_row = (double)image_data_start_row;
+ fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+ fps.inactive_zone_rows = (double)image_data_start_row;
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -1146,21 +1146,25 @@
static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double section_err,
+ double inactive_zone,
int section_target_bandwidth,
double group_weight_factor) {
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
+
if (section_target_bandwidth <= 0) {
return rc->worst_quality; // Highest value allowed
} else {
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
- const double err_per_mb = section_err / num_mbs;
+ const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+ const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = num_mbs / EDIV_SIZE_FACTOR;
+ const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
- BPER_MB_NORMBITS) / num_mbs;
+ BPER_MB_NORMBITS) / active_mbs;
int q;
int is_svc_upper_layer = 0;
@@ -1173,7 +1177,7 @@
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
- calc_correction_factor(err_per_mb,
+ calc_correction_factor(av_err_per_mb,
ERR_DIVISOR - ediv_size_correction,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
@@ -1452,6 +1456,8 @@
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
+ // TODO(paulwilkins): correct for dead zone
+
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
@@ -1817,6 +1823,8 @@
#if GROUP_ADAPTIVE_MAXQ
double gf_group_raw_error = 0.0;
#endif
+ double gf_group_skip_pct = 0.0;
+ double gf_group_inactive_zone_rows = 0.0;
double gf_first_frame_err = 0.0;
double mod_frame_err = 0.0;
@@ -1866,6 +1874,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
#endif
+ gf_group_skip_pct -= this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
}
// Motion breakout threshold for loop below depends on image size.
@@ -1910,6 +1920,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
if (EOF == input_stats(twopass, &next_frame))
break;
@@ -2012,6 +2024,8 @@
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
rc->baseline_gf_interval = new_gf_interval;
}
@@ -2034,6 +2048,12 @@
const int vbr_group_bits_per_frame =
(int)(gf_group_bits / rc->baseline_gf_interval);
const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
+ const double group_av_skip_pct =
+ gf_group_skip_pct / rc->baseline_gf_interval;
+ const double group_av_inactive_zone =
+ ((gf_group_inactive_zone_rows * 2) /
+ (rc->baseline_gf_interval * (double)cm->mb_rows));
+
int tmp_q;
// rc factor is a weight factor that corrects for local rate control drift.
double rc_factor = 1.0;
@@ -2045,7 +2065,9 @@
(double)(100 - rc->rate_error_estimate) / 100.0);
}
tmp_q =
- get_twopass_worst_quality(cpi, group_av_err, vbr_group_bits_per_frame,
+ get_twopass_worst_quality(cpi, group_av_err,
+ (group_av_skip_pct + group_av_inactive_zone),
+ vbr_group_bits_per_frame,
twopass->kfgroup_inter_fraction * rc_factor);
twopass->active_worst_quality =
MAX(tmp_q, twopass->active_worst_quality >> 1);
@@ -2584,10 +2606,17 @@
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
+ const double section_length = twopass->total_left_stats.count;
const double section_error =
- twopass->total_left_stats.coded_error / twopass->total_left_stats.count;
+ twopass->total_left_stats.coded_error / section_length;
+ const double section_intra_skip =
+ twopass->total_left_stats.intra_skip_pct / section_length;
+ const double section_inactive_zone =
+ (twopass->total_left_stats.inactive_zone_rows * 2) /
+ ((double)cm->mb_rows * section_length);
const int tmp_q =
get_twopass_worst_quality(cpi, section_error,
+ section_intra_skip + section_inactive_zone,
section_target_bandwidth, DEFAULT_GRP_WEIGHT);
twopass->active_worst_quality = tmp_q;
@@ -2604,7 +2633,7 @@
return;
// Set the frame content type flag.
- if (this_frame.ul_intra_pct >= FC_ANIMATION_THRESH)
+ if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
else
twopass->fr_content_type = FC_NORMAL;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 45f1132..0047932 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -51,8 +51,8 @@
double pcnt_motion;
double pcnt_second_ref;
double pcnt_neutral;
- double ul_intra_pct;
- double image_start_row;
+ double intra_skip_pct;
+ double inactive_zone_rows; // Image mask rows top and bottom.
double MVr;
double mvr_abs;
double MVc;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 2c78831..2479b6e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1179,7 +1179,8 @@
if (cm->use_prev_frame_mvs)
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
else
const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
xd->mi[0],
@@ -1658,7 +1659,8 @@
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&dummy_mv[0], &dummy_mv[1]);
@@ -1690,8 +1692,8 @@
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
mbmi->ref_frame[0] = ref_frame;
@@ -1732,7 +1734,8 @@
b_mv[NEWMV].as_int = INVALID_MV;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
&b_mv[NEARESTMV],
- &b_mv[NEARMV]);
+ &b_mv[NEARMV],
+ xd->mi[0]->mbmi.mode_context);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int b_rate = 0;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index db5460b..e6e17c0 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -678,7 +678,7 @@
x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
}
- x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
+ x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 9fa258c..162d4de 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1804,7 +1804,8 @@
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
&frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame]);
+ &frame_mv[NEARMV][frame],
+ xd->mi[0]->mbmi.mode_context);
}
// search for the best motion vector on this segment
@@ -2119,8 +2120,8 @@
unsigned int *ref_costs_single,
unsigned int *ref_costs_comp,
vp9_prob *comp_mode_p) {
- int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ int seg_ref_active = segfeature_active(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
if (seg_ref_active) {
memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
@@ -2220,7 +2221,7 @@
// Gets an initial list of candidate vectors from neighbours and orders them
vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col,
- NULL, NULL);
+ NULL, NULL, xd->mi[0]->mbmi.mode_context);
// Candidate refinement carried out at encoder and decoder
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -3006,8 +3007,8 @@
}
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
}
@@ -3016,7 +3017,7 @@
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
@@ -3195,7 +3196,7 @@
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3637,7 +3638,7 @@
rd_cost->rate = INT_MAX;
- assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+ assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
mbmi->mode = ZEROMV;
mbmi->uv_mode = DC_PRED;
@@ -3849,7 +3850,7 @@
continue;
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3874,13 +3875,13 @@
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 3592031..181a99c 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -484,7 +484,7 @@
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
const int eob_max = 16 << (tx_size << 1);
- return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
+ return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -615,8 +615,8 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp9_get_skip_context(xd);
- const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
if (mbmi->skip) {
if (!dry_run)
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index d018699..b01fdd1 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -146,6 +146,7 @@
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c