Merge "test_intra_pred_speed: add ClearSystemState() call"
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index f5ae440..d2687b2 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -213,6 +213,14 @@
NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
+ vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
+ vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
+ vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_4x4_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 8x8
@@ -258,6 +266,14 @@
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
+ vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
+ vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
+ vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_8x8_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 16x16
@@ -301,6 +317,14 @@
vp9_tm_predictor_16x16_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
+ vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
+ vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
+ vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_16x16_msa)
+#endif // HAVE_MSA
+
// -----------------------------------------------------------------------------
// 32x32
@@ -342,4 +366,12 @@
NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_32x32_neon)
#endif // HAVE_NEON
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
+ vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
+ vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
+ vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
+ NULL, vp9_tm_predictor_32x32_msa)
+#endif // HAVE_MSA
+
#include "test/test_libvpx.cc"
diff --git a/vp9/common/mips/msa/vp9_intra_predict_msa.c b/vp9/common/mips/msa/vp9_intra_predict_msa.c
new file mode 100644
index 0000000..2fc6105
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_intra_predict_msa.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) { \
+ out0 = __msa_subs_u_h(out0, in0); \
+ out1 = __msa_subs_u_h(out1, in1); \
+}
+
+static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t src_data;
+
+ src_data = LW(src);
+
+ SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
+}
+
+static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint32_t src_data1, src_data2;
+
+ src_data1 = LW(src);
+ src_data2 = LW(src + 4);
+
+ for (row = 8; row--;) {
+ SW(src_data1, dst);
+ SW(src_data2, (dst + 4));
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src0;
+
+ src0 = LD_UB(src);
+
+ for (row = 16; row--;) {
+ ST_UB(src0, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src1, src2;
+
+ src1 = LD_UB(src);
+ src2 = LD_UB(src + 16);
+
+ for (row = 32; row--;) {
+ ST_UB2(src1, src2, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t out0, out1, out2, out3;
+
+ out0 = src[0] * 0x01010101;
+ out1 = src[1] * 0x01010101;
+ out2 = src[2] * 0x01010101;
+ out3 = src[3] * 0x01010101;
+
+ SW4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ out0 = src[0] * 0x0101010101010101ull;
+ out1 = src[1] * 0x0101010101010101ull;
+ out2 = src[2] * 0x0101010101010101ull;
+ out3 = src[3] * 0x0101010101010101ull;
+ out4 = src[4] * 0x0101010101010101ull;
+ out5 = src[5] * 0x0101010101010101ull;
+ out6 = src[6] * 0x0101010101010101ull;
+ out7 = src[7] * 0x0101010101010101ull;
+
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out4, out5, out6, out7, dst, dst_stride);
+}
+
+static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 4; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 8; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB2(src0, src0, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src1, src1, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src2, src2, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src3, src3, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val0, val1;
+ v16i8 store, src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LW(src_top);
+ val1 = LW(src_left);
+ INSERT_W2_SB(val0, val1, src);
+ sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t val0;
+ v16i8 store, data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+
+ val0 = LW(src);
+ data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
+ sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val0, val1;
+ v16i8 store;
+ v16u8 src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src_top);
+ val1 = LD(src_left);
+ INSERT_D2_UB(val0, val1, src);
+ sum_h = __msa_hadd_u_h(src, src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t val0;
+ v16i8 store;
+ v16u8 data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src);
+ data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
+ uint64_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(out, out, out, out, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ v16u8 top, left, out;
+ v8u16 sum_h, sum_top, sum_left;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ top = LD_UB(src_top);
+ left = LD_UB(src_left);
+ HADD_UB2_UH(top, left, sum_top, sum_left);
+ sum_h = sum_top + sum_left;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ v16u8 data, out;
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ data = LD_UB(src);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ v16u8 top0, top1, left0, left1, out;
+ v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src_top, 16, top0, top1);
+ LD_UB2(src_left, 16, left0, left1);
+ HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
+ HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
+ sum_h = sum_top0 + sum_top1;
+ sum_h += sum_left0 + sum_left1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 data0, data1, out;
+ v8u16 sum_h, sum_data0, sum_data1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src, 16, data0, data1);
+ HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
+ sum_h = sum_data0 + sum_data1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v16u8 src0, src1, src2, src3;
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+ val = LW(src_top_ptr);
+ src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val);
+
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+ v16u8 src0, src1, src2, src3;
+
+ val = LD(src_top_ptr);
+ src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 2; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r, res_l;
+
+ src_top = LD_SB(src_top_ptr);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 4; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
+
+ LD_SB2(src_top, 16, src_top0, src_top1);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 8; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+ }
+}
+
+void vp9_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_32x32_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_4x4_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_8x8_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_16x16_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_32x32_msa(dst, y_stride);
+}
+
+void vp9_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_32x32_msa(above, left, dst, y_stride);
+}
diff --git a/vp9/common/mips/msa/vp9_macros_msa.h b/vp9/common/mips/msa/vp9_macros_msa.h
index 3751e35..f1217d5 100644
--- a/vp9/common/mips/msa/vp9_macros_msa.h
+++ b/vp9/common/mips/msa/vp9_macros_msa.h
@@ -743,6 +743,26 @@
CLIP_SH2_0_255(in2, in3); \
}
+/* Description : Horizontal addition of unsigned byte vector elements
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each unsigned odd byte element from 'in0' is added to
+ even unsigned byte element from 'in0' (pairwise) and the
+ halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
+ out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
+}
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
+
+#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) { \
+ HADD_UB2(RTYPE, in0, in1, out0, out1); \
+ HADD_UB2(RTYPE, in2, in3, out2, out3); \
+}
+#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
+
/* Description : Insert specified word elements from input vectors to 1
destination vector
Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
@@ -755,6 +775,19 @@
}
#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
+/* Description : Insert specified double word elements from input vectors to 1
+ destination vector
+ Arguments : Inputs - in0, in1 (2 input vectors)
+ Outputs - out (output vector)
+ Return Type - as per RTYPE
+*/
+#define INSERT_D2(RTYPE, in0, in1, out) { \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
+}
+#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
+#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
+
/* Description : Interleave even byte elements from vectors
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index afcdf22..b256d4a 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -12,7 +12,8 @@
#include "vp9/common/vp9_filter.h"
-const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, const InterpKernel,
+ vp9_bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index 484e457..9816728 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -267,8 +267,8 @@
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
int lvl_seg = default_filt_lvl;
- if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
- const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+ if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+ const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
data : default_filt_lvl + data,
0, MAX_LOOP_FILTER);
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 564a3eb..d83f3c1 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -266,8 +266,8 @@
int vp9_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex) {
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
- const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+ if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+ const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ?
data : base_qindex + data;
return clamp(seg_qindex, 0, MAXQ);
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index bbe200d..5035126 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -66,7 +66,7 @@
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;
@@ -78,22 +78,22 @@
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
@@ -105,7 +105,7 @@
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;
@@ -117,22 +117,22 @@
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_8x8 dspr2 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
@@ -144,7 +144,7 @@
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;
@@ -156,22 +156,22 @@
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_16x16 dspr2 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
@@ -183,7 +183,7 @@
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;
@@ -195,22 +195,22 @@
specialize qw/vp9_d153_predictor_32x32/;
add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_32x32 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_32x32 msa/, "$sse2_x86inc";
#
# Loopfilter
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index 910200e..471e238 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -25,12 +25,6 @@
// the coding mechanism is still subject to change so these provide a
// convenient single point of change.
-int vp9_segfeature_active(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->enabled &&
- (seg->feature_mask[segment_id] & (1 << feature_id));
-}
-
void vp9_clearall_segfeatures(struct segmentation *seg) {
vp9_zero(seg->feature_data);
vp9_zero(seg->feature_mask);
@@ -60,12 +54,6 @@
seg->feature_data[segment_id][feature_id] = seg_data;
}
-int vp9_get_segdata(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->feature_data[segment_id][feature_id];
-}
-
-
const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
2, 4, 6, 8, 10, 12,
0, -1, -2, -3, -4, -5, -6, -7
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index ff2d66a..95c9918 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -49,9 +49,12 @@
unsigned int feature_mask[MAX_SEGMENTS];
};
-int vp9_segfeature_active(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int segfeature_active(const struct segmentation *seg,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->enabled &&
+ (seg->feature_mask[segment_id] & (1 << feature_id));
+}
void vp9_clearall_segfeatures(struct segmentation *seg);
@@ -68,9 +71,10 @@
SEG_LVL_FEATURES feature_id,
int seg_data);
-int vp9_get_segdata(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->feature_data[segment_id][feature_id];
+}
extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index bc03caf..d34926d 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -177,7 +177,7 @@
static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
@@ -307,9 +307,9 @@
FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *counts = xd->counts;
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
@@ -444,9 +444,8 @@
static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) !=
- INTRA_FRAME;
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
const int ctx = vp9_get_intra_inter_context(xd);
const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
@@ -493,7 +492,7 @@
mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 71c1e0b..df70d48 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -95,19 +95,6 @@
return 1;
}
-static void adjust_cyclic_refresh_parameters(VP9_COMP *const cpi) {
- const VP9_COMMON *const cm = &cpi->common;
- const RATE_CONTROL *const rc = &cpi->rc;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- // Adjust some parameters, currently only for low resolutions at low bitrates.
- if (cm->width <= 352 &&
- cm->height <= 288 &&
- rc->avg_frame_bandwidth < 3400) {
- cr->motion_thresh = 4;
- cr->rate_boost_fac = 1.25;
- }
-}
-
// Check if this coding block, of size bsize, should be considered for refresh
// (lower-qp coding). Decision can be based on various factors, such as
// size of the coding block (i.e., below min_block size rejected), coding
@@ -435,18 +422,30 @@
cr->sb_index = i;
}
-// Set/update global/frame level cyclic refresh parameters.
+// Set cyclic refresh parameters.
void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
cr->percent_refresh = 10;
+ cr->max_qdelta_perc = 50;
+ cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
- // periods of the refresh cycle, after a key frame. This corresponds to ~40
- // frames with cr->percent_refresh = 10.
- if (rc->frames_since_key < 40)
+ // periods of the refresh cycle, after a key frame.
+ if (rc->frames_since_key < 4 * cr->percent_refresh)
cr->rate_ratio_qdelta = 3.0;
else
cr->rate_ratio_qdelta = 2.0;
+ // Adjust some parameters for low resolutions at low bitrates.
+ if (cm->width <= 352 &&
+ cm->height <= 288 &&
+ rc->avg_frame_bandwidth < 3400) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 1.25;
+ } else {
+ cr->motion_thresh = 32;
+ cr->rate_boost_fac = 1.7;
+ }
}
// Setup cyclic background refresh: set delta q and segmentation map.
@@ -475,9 +474,6 @@
int qindex2;
const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
vp9_clear_system_state();
- cr->max_qdelta_perc = 50;
- cr->time_for_refresh = 0;
- cr->rate_boost_fac = 1.7;
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
@@ -485,9 +481,6 @@
// q will not exceed 457, so (q * q) is within 32bit; see:
// vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[].
cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
- cr->motion_thresh = 32;
-
- adjust_cyclic_refresh_parameters(cpi);
// Set up segmentation.
// Clear down the segment map.
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index d20e067..092d265 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -93,7 +93,7 @@
static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, vp9_writer *w) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int skip = mi->mbmi.skip;
@@ -207,10 +207,10 @@
// If segment level coding of this signal is disabled...
// or the segment allows multiple reference frame options
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
assert(!is_compound);
assert(mbmi->ref_frame[0] ==
- vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
+ get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
} else {
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
@@ -264,7 +264,7 @@
skip = write_skip(cm, xd, segment_id, mi, w);
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
@@ -293,7 +293,7 @@
write_ref_frames(cm, xd, w);
// If segment skip is not enabled code the mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_8X8) {
write_inter_mode(w, mode, inter_probs);
}
@@ -787,10 +787,10 @@
for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
- const int active = vp9_segfeature_active(seg, i, j);
+ const int active = segfeature_active(seg, i, j);
vp9_wb_write_bit(wb, active);
if (active) {
- const int data = vp9_get_segdata(seg, i, j);
+ const int data = get_segdata(seg, i, j);
const int data_max = vp9_seg_feature_data_max(j);
if (vp9_is_segfeature_signed(j)) {
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8864e0e..ddfe69f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1051,7 +1051,7 @@
if (!output_enabled)
return;
- if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < TX_MODES; i++)
rdc->tx_select_diff[i] += ctx->tx_rd_diff[i];
}
@@ -1248,7 +1248,7 @@
vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
} else {
if (bsize >= BLOCK_8X8) {
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
else
@@ -1291,8 +1291,8 @@
if (!frame_is_intra_only(cm)) {
FRAME_COUNTS *const counts = td->counts;
const int inter_block = is_inter_block(mbmi);
- const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_REF_FRAME);
+ const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_REF_FRAME);
if (!seg_ref_active) {
counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
// If the segment reference feature is enabled we have only a single
@@ -1317,7 +1317,7 @@
}
}
if (inter_block &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
if (bsize >= BLOCK_8X8) {
const PREDICTION_MODE mode = mbmi->mode;
@@ -2849,7 +2849,7 @@
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
}
x->source_variance = UINT_MAX;
@@ -2909,7 +2909,7 @@
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
- if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
return 0;
} else {
return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
@@ -2984,7 +2984,7 @@
if (cm->frame_type == KEY_FRAME)
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
- else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
@@ -3599,7 +3599,7 @@
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
if (seg_skip) {
partition_search_type = FIXED_PARTITION;
}
@@ -4157,8 +4157,8 @@
MODE_INFO **mi_8x8 = xd->mi;
MODE_INFO *mi = mi_8x8[0];
MB_MODE_INFO *mbmi = &mi->mbmi;
- const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
const int mis = cm->mi_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 60bff57..2479b6e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1692,8 +1692,8 @@
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
mbmi->ref_frame[0] = ref_frame;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index db5460b..e6e17c0 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -678,7 +678,7 @@
x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
}
- x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
+ x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index eacc63f..162d4de 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2120,8 +2120,8 @@
unsigned int *ref_costs_single,
unsigned int *ref_costs_comp,
vp9_prob *comp_mode_p) {
- int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ int seg_ref_active = segfeature_active(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
if (seg_ref_active) {
memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
@@ -3007,8 +3007,8 @@
}
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
}
@@ -3017,7 +3017,7 @@
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
@@ -3196,7 +3196,7 @@
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3638,7 +3638,7 @@
rd_cost->rate = INT_MAX;
- assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+ assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
mbmi->mode = ZEROMV;
mbmi->uv_mode = DC_PRED;
@@ -3850,7 +3850,7 @@
continue;
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3875,13 +3875,13 @@
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 3592031..181a99c 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -484,7 +484,7 @@
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
const int eob_max = 16 << (tx_size << 1);
- return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
+ return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -615,8 +615,8 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp9_get_skip_context(xd);
- const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
if (mbmi->skip) {
if (!dry_run)
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index d018699..b01fdd1 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -146,6 +146,7 @@
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c