Merge "Moving vp9_kf_default_bmode_probs to vp9_entropymode.c."
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index 3538c7b..9dcc078 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -44,10 +44,8 @@
class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
public:
- FwdTrans4x4Test() { SetUpTestTxfm(); }
- ~FwdTrans4x4Test() {}
-
- void SetUpTestTxfm() {
+ virtual ~FwdTrans4x4Test() {}
+ virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm_ = fdct4x4;
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index eeae208..50e2e9d 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -51,10 +51,8 @@
class FwdTrans8x8Test : public ::testing::TestWithParam<int> {
public:
- FwdTrans8x8Test() { SetUpTestTxfm(); }
- ~FwdTrans8x8Test() {}
-
- void SetUpTestTxfm() {
+ virtual ~FwdTrans8x8Test() {}
+ virtual void SetUp() {
tx_type_ = GetParam();
if (tx_type_ == 0) {
fwd_txfm = fdct8x8;
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 8af9e90..54afc13 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -173,7 +173,6 @@
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;
- oci->clr_type = REG_YUV;
oci->clamp_type = RECON_CLAMP_REQUIRED;
/* Initialize reference frame sign bias structure to defaults */
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 276dd72..a0c97a1 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -72,7 +72,6 @@
int horiz_scale;
int vert_scale;
- YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG *frame_to_show;
@@ -157,7 +156,6 @@
unsigned int current_video_frame;
- int near_boffset[3];
int version;
TOKEN_PARTITION multi_token_partition;
@@ -165,8 +163,10 @@
#ifdef PACKET_TESTING
VP8_HEADER oh;
#endif
+#if CONFIG_POSTPROC_VISUALIZER
double bitrate;
double framerate;
+#endif
#if CONFIG_MULTITHREAD
int processor_core_count;
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 44c35ef..51eeb02 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1095,7 +1095,7 @@
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate bool decoder 0");
if (pc->frame_type == KEY_FRAME) {
- pc->clr_type = (YUV_TYPE)vp8_read_bit(bc);
+ (void)vp8_read_bit(bc); // colorspace
pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc);
}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 2db3096..2d9e343 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -430,7 +430,6 @@
*time_stamp = pbi->last_time_stamp;
*time_end_stamp = 0;
- sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
ret = vp8_post_proc_frame(&pbi->common, sd, flags);
#else
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 4707ae5..5f0c1f7 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1322,7 +1322,7 @@
vp8_start_encode(bc, cx_data, cx_data_end);
/* signal clr type */
- vp8_write_bit(bc, pc->clr_type);
+ vp8_write_bit(bc, 0);
vp8_write_bit(bc, pc->clamp_type);
}
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 73f6583..11f1695 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4821,8 +4821,10 @@
{
#if HAVE_NEON
int64_t store_reg[8];
-#endif
+#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON *cm = &cpi->common;
+#endif
+#endif
struct vpx_usec_timer timer;
int res = 0;
@@ -4848,7 +4850,6 @@
if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
res = -1;
- cm->clr_type = sd->clrtype;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 4531d5a..9a7b9c5 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -695,7 +695,6 @@
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
- yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
return res;
}
@@ -1079,11 +1078,7 @@
ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
- if (sd.clrtype == REG_YUV)
- ctx->preview_img.fmt = VPX_IMG_FMT_I420;
- else
- ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
-
+ ctx->preview_img.fmt = VPX_IMG_FMT_I420;
ctx->preview_img.x_chroma_shift = 1;
ctx->preview_img.y_chroma_shift = 1;
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index b552b84..871b8d3 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -288,8 +288,7 @@
* the Y, U, and V planes, nor other alignment adjustments that
* might be representable by a YV12_BUFFER_CONFIG, so we just
* initialize all the fields.*/
- img->fmt = yv12->clrtype == REG_YUV ?
- VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
+ img->fmt = VPX_IMG_FMT_I420;
img->w = yv12->y_stride;
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
img->d_w = yv12->y_width;
@@ -721,8 +720,6 @@
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
- yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
-
return res;
}
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index 96b27bf..749efe2 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -158,10 +158,6 @@
if (!oci->above_context[0])
goto fail;
- for (i = 1; i < MAX_MB_PLANE; i++)
- oci->above_context[i] =
- oci->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
-
oci->above_seg_context = vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1);
if (!oci->above_seg_context)
goto fail;
@@ -180,7 +176,6 @@
oci->txfm_mode = ONLY_4X4;
oci->comp_pred_mode = HYBRID_PREDICTION;
- oci->clr_type = REG_YUV;
// Initialize reference frame sign bias structure to defaults
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
@@ -197,9 +192,15 @@
}
void vp9_update_frame_size(VP9_COMMON *cm) {
+ int i, mi_cols;
const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, LOG2_MI_SIZE);
const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, LOG2_MI_SIZE);
set_mb_mi(cm, aligned_width, aligned_height);
setup_mi(cm);
+
+ mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ for (i = 1; i < MAX_MB_PLANE; i++)
+ cm->above_context[i] =
+ cm->above_context[0] + i * sizeof(ENTROPY_CONTEXT) * 2 * mi_cols;
}
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 2ca9898..f56586a 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -258,9 +258,6 @@
int mb_to_top_edge;
int mb_to_bottom_edge;
- unsigned int frames_since_golden;
- unsigned int frames_till_alt_ref_frame;
-
int lossless;
/* Inverse transform function pointers. */
void (*inv_txm4x4_1_add)(int16_t *input, uint8_t *dest, int stride);
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index ad4471a..1cee34a 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -137,8 +137,6 @@
int subsampling_x;
int subsampling_y;
- YUV_TYPE clr_type;
-
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
@@ -247,12 +245,8 @@
unsigned int frame_context_idx; /* Context to use/update */
unsigned int current_video_frame;
- int near_boffset[3];
int version;
- double bitrate;
- double framerate;
-
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
#endif
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 2b81c2e..48d86c5 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -12,6 +12,79 @@
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_seg_common.h"
+#if 1
+static const int16_t dc_qlookup[QINDEX_RANGE] = {
+ 4, 8, 8, 9, 10, 11, 12, 12,
+ 13, 14, 15, 16, 17, 18, 19, 19,
+ 20, 21, 22, 23, 24, 25, 26, 26,
+ 27, 28, 29, 30, 31, 32, 32, 33,
+ 34, 35, 36, 37, 38, 38, 39, 40,
+ 41, 42, 43, 43, 44, 45, 46, 47,
+ 48, 48, 49, 50, 51, 52, 53, 53,
+ 54, 55, 56, 57, 57, 58, 59, 60,
+ 61, 62, 62, 63, 64, 65, 66, 66,
+ 67, 68, 69, 70, 70, 71, 72, 73,
+ 74, 74, 75, 76, 77, 78, 78, 79,
+ 80, 81, 81, 82, 83, 84, 85, 85,
+ 87, 88, 90, 92, 93, 95, 96, 98,
+ 99, 101, 102, 104, 105, 107, 108, 110,
+ 111, 113, 114, 116, 117, 118, 120, 121,
+ 123, 125, 127, 129, 131, 134, 136, 138,
+ 140, 142, 144, 146, 148, 150, 152, 154,
+ 156, 158, 161, 164, 166, 169, 172, 174,
+ 177, 180, 182, 185, 187, 190, 192, 195,
+ 199, 202, 205, 208, 211, 214, 217, 220,
+ 223, 226, 230, 233, 237, 240, 243, 247,
+ 250, 253, 257, 261, 265, 269, 272, 276,
+ 280, 284, 288, 292, 296, 300, 304, 309,
+ 313, 317, 322, 326, 330, 335, 340, 344,
+ 349, 354, 359, 364, 369, 374, 379, 384,
+ 389, 395, 400, 406, 411, 417, 423, 429,
+ 435, 441, 447, 454, 461, 467, 475, 482,
+ 489, 497, 505, 513, 522, 530, 539, 549,
+ 559, 569, 579, 590, 602, 614, 626, 640,
+ 654, 668, 684, 700, 717, 736, 755, 775,
+ 796, 819, 843, 869, 896, 925, 955, 988,
+ 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
+};
+
+static const int16_t ac_qlookup[QINDEX_RANGE] = {
+ 4, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 52, 53, 54,
+ 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70,
+ 71, 72, 73, 74, 75, 76, 77, 78,
+ 79, 80, 81, 82, 83, 84, 85, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94,
+ 95, 96, 97, 98, 99, 100, 101, 102,
+ 104, 106, 108, 110, 112, 114, 116, 118,
+ 120, 122, 124, 126, 128, 130, 132, 134,
+ 136, 138, 140, 142, 144, 146, 148, 150,
+ 152, 155, 158, 161, 164, 167, 170, 173,
+ 176, 179, 182, 185, 188, 191, 194, 197,
+ 200, 203, 207, 211, 215, 219, 223, 227,
+ 231, 235, 239, 243, 247, 251, 255, 260,
+ 265, 270, 275, 280, 285, 290, 295, 300,
+ 305, 311, 317, 323, 329, 335, 341, 347,
+ 353, 359, 366, 373, 380, 387, 394, 401,
+ 408, 416, 424, 432, 440, 448, 456, 465,
+ 474, 483, 492, 501, 510, 520, 530, 540,
+ 550, 560, 571, 582, 593, 604, 615, 627,
+ 639, 651, 663, 676, 689, 702, 715, 729,
+ 743, 757, 771, 786, 801, 816, 832, 848,
+ 864, 881, 898, 915, 933, 951, 969, 988,
+ 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
+ 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
+ 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
+ 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+};
+
+void vp9_init_quant_tables(void) { }
+#else
static int16_t dc_qlookup[QINDEX_RANGE];
static int16_t ac_qlookup[QINDEX_RANGE];
@@ -46,6 +119,7 @@
0.5, ac_val));
}
}
+#endif
int16_t vp9_dc_quant(int qindex, int delta) {
return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 8bab9f0..82e20aa 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -328,7 +328,7 @@
specialize vp9_short_iht4x4_add sse2
prototype void vp9_short_iht8x8_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
-specialize vp9_short_iht8x8_add
+specialize vp9_short_iht8x8_add sse2
prototype void vp9_short_iht16x16_add "int16_t *input, uint8_t *output, int pitch, int tx_type"
specialize vp9_short_iht16x16_add
diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c
index 4495b15..1bf869d 100644
--- a/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -638,6 +638,373 @@
RECON_AND_STORE(dest, in7);
}
+// perform 8x8 transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+
+ res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+ res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+ res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+ res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+ res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+ res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+ res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+ res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+void idct8_1d_sse2(__m128i *in) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ in0 = in[0];
+ in1 = in[1];
+ in2 = in[2];
+ in3 = in[3];
+ in4 = in[4];
+ in5 = in[5];
+ in6 = in[6];
+ in7 = in[7];
+
+ // 8x8 Transpose is copied from vp9_short_fdct8x8_sse2()
+ TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+
+ // 4-stage 1D idct8x8
+ IDCT8x8_1D
+ in[0] = in0;
+ in[1] = in1;
+ in[2] = in2;
+ in[3] = in3;
+ in[4] = in4;
+ in[5] = in5;
+ in[6] = in6;
+ in[7] = in7;
+}
+
+void iadst8_1d_sse2(__m128i *in) {
+ const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
+ const __m128i k__const_0 = _mm_set1_epi16(0);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
+ __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+ __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+
+ // transpose
+ array_transpose_8x8(in, in);
+
+ // properly aligned for butterfly input
+ in0 = in[7];
+ in1 = in[0];
+ in2 = in[5];
+ in3 = in[2];
+ in4 = in[3];
+ in5 = in[4];
+ in6 = in[1];
+ in7 = in[6];
+
+ // column transformation
+ // stage 1
+ // interleave and multiply/add into 32-bit integer
+ s0 = _mm_unpacklo_epi16(in0, in1);
+ s1 = _mm_unpackhi_epi16(in0, in1);
+ s2 = _mm_unpacklo_epi16(in2, in3);
+ s3 = _mm_unpackhi_epi16(in2, in3);
+ s4 = _mm_unpacklo_epi16(in4, in5);
+ s5 = _mm_unpackhi_epi16(in4, in5);
+ s6 = _mm_unpacklo_epi16(in6, in7);
+ s7 = _mm_unpackhi_epi16(in6, in7);
+
+ u0 = _mm_madd_epi16(s0, k__cospi_p02_p30);
+ u1 = _mm_madd_epi16(s1, k__cospi_p02_p30);
+ u2 = _mm_madd_epi16(s0, k__cospi_p30_m02);
+ u3 = _mm_madd_epi16(s1, k__cospi_p30_m02);
+ u4 = _mm_madd_epi16(s2, k__cospi_p10_p22);
+ u5 = _mm_madd_epi16(s3, k__cospi_p10_p22);
+ u6 = _mm_madd_epi16(s2, k__cospi_p22_m10);
+ u7 = _mm_madd_epi16(s3, k__cospi_p22_m10);
+ u8 = _mm_madd_epi16(s4, k__cospi_p18_p14);
+ u9 = _mm_madd_epi16(s5, k__cospi_p18_p14);
+ u10 = _mm_madd_epi16(s4, k__cospi_p14_m18);
+ u11 = _mm_madd_epi16(s5, k__cospi_p14_m18);
+ u12 = _mm_madd_epi16(s6, k__cospi_p26_p06);
+ u13 = _mm_madd_epi16(s7, k__cospi_p26_p06);
+ u14 = _mm_madd_epi16(s6, k__cospi_p06_m26);
+ u15 = _mm_madd_epi16(s7, k__cospi_p06_m26);
+
+ // addition
+ w0 = _mm_add_epi32(u0, u8);
+ w1 = _mm_add_epi32(u1, u9);
+ w2 = _mm_add_epi32(u2, u10);
+ w3 = _mm_add_epi32(u3, u11);
+ w4 = _mm_add_epi32(u4, u12);
+ w5 = _mm_add_epi32(u5, u13);
+ w6 = _mm_add_epi32(u6, u14);
+ w7 = _mm_add_epi32(u7, u15);
+ w8 = _mm_sub_epi32(u0, u8);
+ w9 = _mm_sub_epi32(u1, u9);
+ w10 = _mm_sub_epi32(u2, u10);
+ w11 = _mm_sub_epi32(u3, u11);
+ w12 = _mm_sub_epi32(u4, u12);
+ w13 = _mm_sub_epi32(u5, u13);
+ w14 = _mm_sub_epi32(u6, u14);
+ w15 = _mm_sub_epi32(u7, u15);
+
+ // shift and rounding
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+ v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING);
+ v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING);
+ v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING);
+ v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING);
+ v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING);
+ v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING);
+ v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING);
+ v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ u8 = _mm_srai_epi32(v8, DCT_CONST_BITS);
+ u9 = _mm_srai_epi32(v9, DCT_CONST_BITS);
+ u10 = _mm_srai_epi32(v10, DCT_CONST_BITS);
+ u11 = _mm_srai_epi32(v11, DCT_CONST_BITS);
+ u12 = _mm_srai_epi32(v12, DCT_CONST_BITS);
+ u13 = _mm_srai_epi32(v13, DCT_CONST_BITS);
+ u14 = _mm_srai_epi32(v14, DCT_CONST_BITS);
+ u15 = _mm_srai_epi32(v15, DCT_CONST_BITS);
+
+ // back to 16-bit and pack 8 integers into __m128i
+ in[0] = _mm_packs_epi32(u0, u1);
+ in[1] = _mm_packs_epi32(u2, u3);
+ in[2] = _mm_packs_epi32(u4, u5);
+ in[3] = _mm_packs_epi32(u6, u7);
+ in[4] = _mm_packs_epi32(u8, u9);
+ in[5] = _mm_packs_epi32(u10, u11);
+ in[6] = _mm_packs_epi32(u12, u13);
+ in[7] = _mm_packs_epi32(u14, u15);
+
+ // stage 2
+ s0 = _mm_add_epi16(in[0], in[2]);
+ s1 = _mm_add_epi16(in[1], in[3]);
+ s2 = _mm_sub_epi16(in[0], in[2]);
+ s3 = _mm_sub_epi16(in[1], in[3]);
+ u0 = _mm_unpacklo_epi16(in[4], in[5]);
+ u1 = _mm_unpackhi_epi16(in[4], in[5]);
+ u2 = _mm_unpacklo_epi16(in[6], in[7]);
+ u3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p08_p24);
+ v1 = _mm_madd_epi16(u1, k__cospi_p08_p24);
+ v2 = _mm_madd_epi16(u0, k__cospi_p24_m08);
+ v3 = _mm_madd_epi16(u1, k__cospi_p24_m08);
+ v4 = _mm_madd_epi16(u2, k__cospi_m24_p08);
+ v5 = _mm_madd_epi16(u3, k__cospi_m24_p08);
+ v6 = _mm_madd_epi16(u2, k__cospi_p08_p24);
+ v7 = _mm_madd_epi16(u3, k__cospi_p08_p24);
+
+ w0 = _mm_add_epi32(v0, v4);
+ w1 = _mm_add_epi32(v1, v5);
+ w2 = _mm_add_epi32(v2, v6);
+ w3 = _mm_add_epi32(v3, v7);
+ w4 = _mm_sub_epi32(v0, v4);
+ w5 = _mm_sub_epi32(v1, v5);
+ w6 = _mm_sub_epi32(v2, v6);
+ w7 = _mm_sub_epi32(v3, v7);
+
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+ // back to 16-bit intergers
+ s4 = _mm_packs_epi32(u0, u1);
+ s5 = _mm_packs_epi32(u2, u3);
+ s6 = _mm_packs_epi32(u4, u5);
+ s7 = _mm_packs_epi32(u6, u7);
+
+ // stage 3
+ u0 = _mm_unpacklo_epi16(s2, s3);
+ u1 = _mm_unpackhi_epi16(s2, s3);
+ u2 = _mm_unpacklo_epi16(s6, s7);
+ u3 = _mm_unpackhi_epi16(s6, s7);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p16_p16);
+ v1 = _mm_madd_epi16(u1, k__cospi_p16_p16);
+ v2 = _mm_madd_epi16(u0, k__cospi_p16_m16);
+ v3 = _mm_madd_epi16(u1, k__cospi_p16_m16);
+ v4 = _mm_madd_epi16(u2, k__cospi_p16_p16);
+ v5 = _mm_madd_epi16(u3, k__cospi_p16_p16);
+ v6 = _mm_madd_epi16(u2, k__cospi_p16_m16);
+ v7 = _mm_madd_epi16(u3, k__cospi_p16_m16);
+
+ u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+ u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+ u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+ u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+ u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+ v4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+ v5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+ v6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+ v7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+ s2 = _mm_packs_epi32(v0, v1);
+ s3 = _mm_packs_epi32(v2, v3);
+ s6 = _mm_packs_epi32(v4, v5);
+ s7 = _mm_packs_epi32(v6, v7);
+
+ in[0] = s0;
+ in[1] = _mm_sub_epi16(k__const_0, s4);
+ in[2] = s6;
+ in[3] = _mm_sub_epi16(k__const_0, s2);
+ in[4] = s3;
+ in[5] = _mm_sub_epi16(k__const_0, s7);
+ in[6] = s5;
+ in[7] = _mm_sub_epi16(k__const_0, s1);
+}
+
+
+void vp9_short_iht8x8_add_sse2(int16_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ __m128i in[8];
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i final_rounding = _mm_set1_epi16(1<<4);
+
+ // load input data
+ in[0] = _mm_load_si128((__m128i *)input);
+ in[1] = _mm_load_si128((__m128i *)(input + 8 * 1));
+ in[2] = _mm_load_si128((__m128i *)(input + 8 * 2));
+ in[3] = _mm_load_si128((__m128i *)(input + 8 * 3));
+ in[4] = _mm_load_si128((__m128i *)(input + 8 * 4));
+ in[5] = _mm_load_si128((__m128i *)(input + 8 * 5));
+ in[6] = _mm_load_si128((__m128i *)(input + 8 * 6));
+ in[7] = _mm_load_si128((__m128i *)(input + 8 * 7));
+
+ switch (tx_type) {
+ case 0: // DCT_DCT
+ idct8_1d_sse2(in);
+ idct8_1d_sse2(in);
+ break;
+ case 1: // ADST_DCT
+ idct8_1d_sse2(in);
+ iadst8_1d_sse2(in);
+ break;
+ case 2: // DCT_ADST
+ iadst8_1d_sse2(in);
+ idct8_1d_sse2(in);
+ break;
+ case 3: // ADST_ADST
+ iadst8_1d_sse2(in);
+ iadst8_1d_sse2(in);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ // Final rounding and shift
+ in[0] = _mm_adds_epi16(in[0], final_rounding);
+ in[1] = _mm_adds_epi16(in[1], final_rounding);
+ in[2] = _mm_adds_epi16(in[2], final_rounding);
+ in[3] = _mm_adds_epi16(in[3], final_rounding);
+ in[4] = _mm_adds_epi16(in[4], final_rounding);
+ in[5] = _mm_adds_epi16(in[5], final_rounding);
+ in[6] = _mm_adds_epi16(in[6], final_rounding);
+ in[7] = _mm_adds_epi16(in[7], final_rounding);
+
+ in[0] = _mm_srai_epi16(in[0], 5);
+ in[1] = _mm_srai_epi16(in[1], 5);
+ in[2] = _mm_srai_epi16(in[2], 5);
+ in[3] = _mm_srai_epi16(in[3], 5);
+ in[4] = _mm_srai_epi16(in[4], 5);
+ in[5] = _mm_srai_epi16(in[5], 5);
+ in[6] = _mm_srai_epi16(in[6], 5);
+ in[7] = _mm_srai_epi16(in[7], 5);
+
+ RECON_AND_STORE(dest, in[0]);
+ RECON_AND_STORE(dest, in[1]);
+ RECON_AND_STORE(dest, in[2]);
+ RECON_AND_STORE(dest, in[3]);
+ RECON_AND_STORE(dest, in[4]);
+ RECON_AND_STORE(dest, in[5]);
+ RECON_AND_STORE(dest, in[6]);
+ RECON_AND_STORE(dest, in[7]);
+}
+
void vp9_short_idct10_8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index b9c7f30..f734eae 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -413,7 +413,6 @@
*time_stamp = pbi->last_time_stamp;
*time_end_stamp = 0;
- sd->clrtype = pbi->common.clr_type;
#if CONFIG_POSTPROC
ret = vp9_post_proc_frame(&pbi->common, sd, flags);
#else
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8ac46c0..b49f9a3 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1705,9 +1705,6 @@
xd->mode_info_stride = cm->mode_info_stride;
xd->frame_type = cm->frame_type;
- xd->frames_since_golden = cm->frames_since_golden;
- xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
-
// reset intra mode contexts
if (cm->frame_type == KEY_FRAME)
vp9_init_mbmode_probs(cm);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index cbb9f87..84879fc 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -3505,7 +3505,6 @@
if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags,
cpi->active_map_enabled ? cpi->active_map : NULL))
res = -1;
- cm->clr_type = sd->clrtype;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index 0c1f373..ed0122c 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -74,8 +74,6 @@
yv12->alpha_stride = yv12->alpha_buffer ? img->stride[VPX_PLANE_ALPHA] : 0;
yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
- yv12->clrtype = REG_YUV;
-
#if CONFIG_ALPHA
// For development purposes, force alpha to hold the same data a Y for now.
yv12->alpha_buffer = yv12->y_buffer;
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index a919e49..66e587a 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -22,24 +22,6 @@
#define VP9BORDERINPIXELS 160
#define VP9_INTERP_EXTEND 4
- /*************************************
- For INT_YUV:
-
- Y = (R+G*2+B)/4;
- U = (R-B)/2;
- V = (G*2 - R - B)/4;
- And
- R = Y+U-V;
- G = Y+V;
- B = Y-U-V;
- ************************************/
- typedef enum
- {
- REG_YUV = 0, /* Regular yuv */
- INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */
- }
- YUV_TYPE;
-
typedef struct yv12_buffer_config {
int y_width;
int y_height;
@@ -68,7 +50,6 @@
int buffer_alloc_sz;
int border;
int frame_size;
- YUV_TYPE clrtype;
int corrupted;
int flags;