Merge "configure: use -Werror when testing CXX flags w/clang"
diff --git a/.gitignore b/.gitignore
index a93b6a0..dfeae99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,18 +19,14 @@
/config.log
/config.mk
/decode_to_md5
-/decode_to_md5.c
/decode_to_md5.dox
/decode_with_drops
-/decode_with_drops.c
/decode_with_drops.dox
/docs/
/doxyfile
/error_resilient
-/error_resilient.c
/error_resilient.dox
/force_keyframe
-/force_keyframe.c
/force_keyframe.dox
/ivfdec
/ivfdec.dox
@@ -40,27 +36,21 @@
/libvpx.ver
/obj_int_extract
/postproc
-/postproc.c
/postproc.dox
/samples.dox
/simple_decoder
-/simple_decoder.c
/simple_decoder.dox
/simple_encoder
-/simple_encoder.c
/simple_encoder.dox
/test_libvpx
/twopass_encoder
-/twopass_encoder.c
/twopass_encoder.dox
/vp8_api1_migration.dox
/vp8_scalable_patterns
/vp8_scalable_patterns.dox
/vp8_set_maps
-/vp8_set_maps.c
/vp8_set_maps.dox
/vp8cx_set_ref
-/vp8cx_set_ref.c
/vp8cx_set_ref.dox
/vpx.pc
/vpx_config.c
diff --git a/vp9/common/mips/dspr2/vp9_common_dspr2.h b/vp9/common/mips/dspr2/vp9_common_dspr2.h
index 991d3c2..6ebea9f 100644
--- a/vp9/common/mips/dspr2/vp9_common_dspr2.h
+++ b/vp9/common/mips/dspr2/vp9_common_dspr2.h
@@ -85,8 +85,8 @@
);
}
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride);
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride);
void vp9_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 1b2f550..19c582f 100644
--- a/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct16_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_10, step1_11, step1_12, step1_13;
@@ -404,8 +404,8 @@
}
}
-static void idct16_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int i;
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int step1_8, step1_9, step1_10, step1_11;
@@ -905,13 +905,13 @@
);
// First transform rows
- idct16_1d_rows_dspr2(input, out, 16);
+ idct16_rows_dspr2(input, out, 16);
// Then transform columns and add to dest
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
-static void iadst16_1d(const int16_t *input, int16_t *output) {
+static void iadst16(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
int x0 = input[15];
@@ -1099,16 +1099,16 @@
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct16_1d_rows_dspr2(input, outptr, 16);
- idct16_1d_cols_add_blk_dspr2(out, dest, pitch);
+ idct16_rows_dspr2(input, outptr, 16);
+ idct16_cols_add_blk_dspr2(out, dest, pitch);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct16_1d_rows_dspr2(input, outptr, 16);
+ idct16_rows_dspr2(input, outptr, 16);
outptr = out;
for (i = 0; i < 16; ++i) {
- iadst16_1d(outptr, temp_out);
+ iadst16(outptr, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
@@ -1125,7 +1125,7 @@
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1134,7 +1134,7 @@
for (j = 0; j < 16; ++j)
temp_in[j * 16 + i] = out[i * 16 + j];
- idct16_1d_cols_add_blk_dspr2(temp_in, dest, pitch);
+ idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
}
break;
case ADST_ADST: // ADST in both directions
@@ -1145,7 +1145,7 @@
/* prefetch row */
vp9_prefetch_load((const uint8_t *)(input + 16));
- iadst16_1d(input, outptr);
+ iadst16(input, outptr);
input += 16;
outptr += 16;
}
@@ -1153,7 +1153,7 @@
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i];
- iadst16_1d(temp_in, temp_out);
+ iadst16(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * pitch + i] =
clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
@@ -1183,7 +1183,7 @@
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
- idct16_1d_rows_dspr2(input, outptr, 4);
+ idct16_rows_dspr2(input, outptr, 4);
outptr += 4;
for (i = 0; i < 6; ++i) {
@@ -1213,7 +1213,7 @@
}
// Then transform columns
- idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ idct16_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 5e92db3..132d88c 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -18,8 +18,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-void vp9_idct32_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+void vp9_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19;
diff --git a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
index bc67594..74a90b0 100644
--- a/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct32_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
@@ -882,10 +882,10 @@
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 32);
+ idct32_rows_dspr2(input, outptr, 32);
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, dest_stride);
}
void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -903,7 +903,7 @@
);
// Rows
- idct32_1d_rows_dspr2(input, outptr, 8);
+ idct32_rows_dspr2(input, outptr, 8);
outptr += 8;
__asm__ __volatile__ (
@@ -947,7 +947,7 @@
}
// Columns
- vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride);
+ vp9_idct32_cols_add_blk_dspr2(out, dest, stride);
}
void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 5b7aa5e..1990348 100644
--- a/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -19,7 +19,7 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void vp9_idct4_1d_rows_dspr2(const int16_t *input, int16_t *output) {
+static void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
const int const_2_power_13 = 8192;
@@ -104,7 +104,7 @@
}
}
-static void vp9_idct4_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+static void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t step_0, step_1, step_2, step_3;
int Temp0, Temp1, Temp2, Temp3;
@@ -240,10 +240,10 @@
);
// Rows
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
// Columns
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest,
@@ -319,7 +319,7 @@
}
}
-static void iadst4_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst4_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3;
@@ -379,16 +379,16 @@
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- vp9_idct4_1d_rows_dspr2(input, outptr);
- vp9_idct4_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ vp9_idct4_rows_dspr2(input, outptr);
+ vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- vp9_idct4_1d_rows_dspr2(input, outptr);
+ vp9_idct4_rows_dspr2(input, outptr);
outptr = out;
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(outptr, temp_out);
+ iadst4_dspr2(outptr, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
@@ -400,7 +400,7 @@
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -410,11 +410,11 @@
temp_in[i * 4 + j] = out[j * 4 + i];
}
}
- vp9_idct4_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 4; ++i) {
- iadst4_1d_dspr2(input, outptr);
+ iadst4_dspr2(input, outptr);
input += 4;
outptr += 4;
}
@@ -422,7 +422,7 @@
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i];
- iadst4_1d_dspr2(temp_in, temp_out);
+ iadst4_dspr2(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] =
diff --git a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index 93a0840..acccaea 100644
--- a/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -19,8 +19,8 @@
#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
-static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output,
- uint32_t no_rows) {
+static void idct8_rows_dspr2(const int16_t *input, int16_t *output,
+ uint32_t no_rows) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
const int const_2_power_13 = 8192;
int Temp0, Temp1, Temp2, Temp3, Temp4;
@@ -200,8 +200,8 @@
}
}
-static void idct8_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
- int dest_stride) {
+static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
+ int dest_stride) {
int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
int Temp0, Temp1, Temp2, Temp3;
int i;
@@ -462,13 +462,13 @@
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
-static void iadst8_1d_dspr2(const int16_t *input, int16_t *output) {
+static void iadst8_dspr2(const int16_t *input, int16_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
int x0, x1, x2, x3, x4, x5, x6, x7;
@@ -563,14 +563,14 @@
switch (tx_type) {
case DCT_DCT: // DCT in both horizontal and vertical
- idct8_1d_rows_dspr2(input, outptr, 8);
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_rows_dspr2(input, outptr, 8);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
break;
case ADST_DCT: // ADST in vertical, DCT in horizontal
- idct8_1d_rows_dspr2(input, outptr, 8);
+ idct8_rows_dspr2(input, outptr, 8);
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(&out[i * 8], temp_out);
+ iadst8_dspr2(&out[i * 8], temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -580,7 +580,7 @@
break;
case DCT_ADST: // DCT in vertical, ADST in horizontal
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -590,11 +590,11 @@
temp_in[i * 8 + j] = out[j * 8 + i];
}
}
- idct8_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
break;
case ADST_ADST: // ADST in both directions
for (i = 0; i < 8; ++i) {
- iadst8_1d_dspr2(input, outptr);
+ iadst8_dspr2(input, outptr);
input += 8;
outptr += 8;
}
@@ -603,7 +603,7 @@
for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i];
- iadst8_1d_dspr2(temp_in, temp_out);
+ iadst8_dspr2(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] =
@@ -631,7 +631,7 @@
);
// First transform rows
- idct8_1d_rows_dspr2(input, outptr, 4);
+ idct8_rows_dspr2(input, outptr, 4);
outptr += 4;
@@ -659,7 +659,7 @@
// Then transform columns and add to dest
- idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
}
void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index d5c27a5..487f00c 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -218,27 +218,25 @@
} else { // inter/inter
const int above_has_second = has_second_ref(above_mbmi);
const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
if (above_has_second && left_has_second) {
- pred_context = 1 + (above_mbmi->ref_frame[0] == LAST_FRAME ||
- above_mbmi->ref_frame[1] == LAST_FRAME ||
- left_mbmi->ref_frame[0] == LAST_FRAME ||
- left_mbmi->ref_frame[1] == LAST_FRAME);
+ pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
} else if (above_has_second || left_has_second) {
- const MV_REFERENCE_FRAME rfs = !above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf1 = above_has_second ?
- above_mbmi->ref_frame[0] : left_mbmi->ref_frame[0];
- const MV_REFERENCE_FRAME crf2 = above_has_second ?
- above_mbmi->ref_frame[1] : left_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
if (rfs == LAST_FRAME)
pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
else
pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
} else {
- pred_context = 2 * (above_mbmi->ref_frame[0] == LAST_FRAME) +
- 2 * (left_mbmi->ref_frame[0] == LAST_FRAME);
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
}
}
} else if (has_above || has_left) { // one edge available
diff --git a/vp9/common/x86/vp9_asm_stubs.c b/vp9/common/x86/vp9_asm_stubs.c
index 8ab5fb1..feb0098 100644
--- a/vp9/common/x86/vp9_asm_stubs.c
+++ b/vp9/common/x86/vp9_asm_stubs.c
@@ -23,8 +23,8 @@
const short *filter
);
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt1, opt2) \
-void vp9_convolve8_##name##_##opt1(const uint8_t *src, ptrdiff_t src_stride, \
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *filter_x, int x_step_q4, \
const int16_t *filter_y, int y_step_q4, \
@@ -32,7 +32,7 @@
if (step_q4 == 16 && filter[3] != 128) { \
if (filter[0] || filter[1] || filter[2]) { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##8_##avg##opt1(src_start, src_stride, \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, \
dst, dst_stride, \
h, filter); \
src += 16; \
@@ -40,7 +40,7 @@
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##8_##avg##opt1(src_start, src_stride, \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, \
dst, dst_stride, \
h, filter); \
src += 8; \
@@ -48,7 +48,7 @@
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##8_##avg##opt1(src_start, src_stride, \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, \
dst, dst_stride, \
h, filter); \
src += 4; \
@@ -57,7 +57,7 @@
} \
} else { \
while (w >= 16) { \
- vp9_filter_block1d16_##dir##2_##avg##opt2(src, src_stride, \
+ vp9_filter_block1d16_##dir##2_##avg##opt(src, src_stride, \
dst, dst_stride, \
h, filter); \
src += 16; \
@@ -65,7 +65,7 @@
w -= 16; \
} \
while (w >= 8) { \
- vp9_filter_block1d8_##dir##2_##avg##opt2(src, src_stride, \
+ vp9_filter_block1d8_##dir##2_##avg##opt(src, src_stride, \
dst, dst_stride, \
h, filter); \
src += 8; \
@@ -73,7 +73,7 @@
w -= 8; \
} \
while (w >= 4) { \
- vp9_filter_block1d4_##dir##2_##avg##opt2(src, src_stride, \
+ vp9_filter_block1d4_##dir##2_##avg##opt(src, src_stride, \
dst, dst_stride, \
h, filter); \
src += 4; \
@@ -136,18 +136,18 @@
filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_sse2;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2;
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -169,11 +169,11 @@
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3, sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
- ssse3, sse2);
+ ssse3);
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
@@ -236,11 +236,10 @@
// const int16_t *filter_x, int x_step_q4,
// const int16_t *filter_y, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2, sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2, sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2, sse2);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2,
- sse2);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
// void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
diff --git a/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
new file mode 100644
index 0000000..b5e18fe
--- /dev/null
+++ b/vp9/common/x86/vp9_subpixel_bilinear_ssse3.asm
@@ -0,0 +1,422 @@
+;
+; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro GET_PARAM_4 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm3, [rdx] ;load filters
+ psrldq xmm3, 6
+ packsswb xmm3, xmm3
+ pshuflw xmm3, xmm3, 0b ;k3_k4
+
+ movq xmm2, rcx ;rounding
+ pshufd xmm2, xmm2, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_4 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm3
+
+ paddsw xmm0, xmm2 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack to byte
+
+%if %1
+ movd xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movd [rdi], xmm0
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro GET_PARAM 0
+ mov rdx, arg(5) ;filter ptr
+ mov rsi, arg(0) ;src_ptr
+ mov rdi, arg(2) ;output_ptr
+ mov rcx, 0x0400040
+
+ movdqa xmm7, [rdx] ;load filters
+ psrldq xmm7, 6
+ packsswb xmm7, xmm7
+ pshuflw xmm7, xmm7, 0b ;k3_k4
+ punpcklwd xmm7, xmm7
+
+ movq xmm6, rcx ;rounding
+ pshufd xmm6, xmm6, 0
+
+ movsxd rax, DWORD PTR arg(1) ;pixels_per_line
+ movsxd rdx, DWORD PTR arg(3) ;out_pitch
+ movsxd rcx, DWORD PTR arg(4) ;output_height
+%endm
+
+%macro APPLY_FILTER_8 1
+ punpcklbw xmm0, xmm1
+ pmaddubsw xmm0, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ psraw xmm0, 7 ;shift
+ packuswb xmm0, xmm0 ;pack back to byte
+
+%if %1
+ movq xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movq [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+%macro APPLY_FILTER_16 1
+ punpcklbw xmm0, xmm1
+ punpckhbw xmm2, xmm1
+ pmaddubsw xmm0, xmm7
+ pmaddubsw xmm2, xmm7
+
+ paddsw xmm0, xmm6 ;rounding
+ paddsw xmm2, xmm6
+ psraw xmm0, 7 ;shift
+ psraw xmm2, 7
+ packuswb xmm0, xmm2 ;pack back to byte
+
+%if %1
+ movdqu xmm1, [rdi]
+ pavgb xmm0, xmm1
+%endif
+ movdqu [rdi], xmm0 ;store the result
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+ dec rcx
+%endm
+
+global sym(vp9_filter_block1d4_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movd xmm0, [rsi] ;load src
+ movd xmm1, [rsi + rax]
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movq xmm0, [rsi] ;0
+ movq xmm1, [rsi + rax] ;1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_v2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_v2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;0
+ movdqu xmm1, [rsi + rax] ;1
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 0
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d4_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d4_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM_4
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_4 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d8_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d8_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqa xmm1, xmm0
+ psrldq xmm1, 1
+
+ APPLY_FILTER_8 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+global sym(vp9_filter_block1d16_h2_avg_ssse3) PRIVATE
+sym(vp9_filter_block1d16_h2_avg_ssse3):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
+ push rsi
+ push rdi
+ ; end prolog
+
+ GET_PARAM
+.loop:
+ movdqu xmm0, [rsi] ;load src
+ movdqu xmm1, [rsi + 1]
+ movdqa xmm2, xmm0
+
+ APPLY_FILTER_16 1
+ jnz .loop
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index eaedf1e..e895d0a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -491,24 +491,26 @@
} else {
// Note how often each mode chosen as best
cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
+
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -1072,17 +1074,18 @@
} else {
// Note how often each mode chosen as best
cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
}
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index dcebefb..838f74e 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -133,9 +133,9 @@
return 1;
}
-static void output_stats(const VP9_COMP *cpi,
+static void output_stats(const VP9_COMP *cpi,
struct vpx_codec_pkt_list *pktlist,
- FIRSTPASS_STATS *stats) {
+ FIRSTPASS_STATS *stats) {
struct vpx_codec_cx_pkt pkt;
pkt.kind = VPX_CODEC_STATS_PKT;
pkt.data.twopass_stats.buf = stats;
@@ -266,9 +266,9 @@
// Calculate a modified Error used in distributing bits between easier and
// harder frames.
-static double calculate_modified_err(VP9_COMP *cpi,
- FIRSTPASS_STATS *this_frame) {
- struct twopass_rc *const twopass = &cpi->twopass;
+static double calculate_modified_err(const VP9_COMP *cpi,
+ const FIRSTPASS_STATS *this_frame) {
+ const struct twopass_rc *const twopass = &cpi->twopass;
const FIRSTPASS_STATS *const stats = &twopass->total_stats;
const double av_err = stats->ssim_weighted_pred_err / stats->count;
double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err /
@@ -337,7 +337,7 @@
}
// This function returns the maximum target rate per frame.
-static int frame_max_bits(VP9_COMP *cpi) {
+static int frame_max_bits(const VP9_COMP *cpi) {
int64_t max_bits =
((int64_t)cpi->rc.av_per_frame_bandwidth *
(int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100;
@@ -469,7 +469,7 @@
TileInfo tile;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
- PICK_MODE_CONTEXT *ctx = &x->sb64_context;
+ const PICK_MODE_CONTEXT *ctx = &x->sb64_context;
int i;
int recon_yoffset, recon_uvoffset;
@@ -907,7 +907,7 @@
int q;
const int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
- RATE_CONTROL *const rc = &cpi->rc;
+ const RATE_CONTROL *const rc = &cpi->rc;
const double section_err = fpstats->coded_error / fpstats->count;
const double err_per_mb = section_err / num_mbs;
@@ -1040,12 +1040,10 @@
// Function to test for a condition where a complex transition is followed
// by a static section. For example in slide shows where there is a fade
// between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(
- VP9_COMP *cpi,
- int frame_interval,
- int still_interval,
- double loop_decay_rate,
- double last_decay_rate) {
+static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval,
+ int still_interval,
+ double loop_decay_rate,
+ double last_decay_rate) {
int trans_to_still = 0;
// Break clause to detect very still sections after motion
@@ -1413,7 +1411,7 @@
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
- int max_bits = frame_max_bits(cpi); // Max for a single frame
+ const int max_bits = frame_max_bits(cpi); // Max for a single frame
unsigned int allow_alt_ref = cpi->oxcf.play_alternate &&
cpi->oxcf.lag_in_frames;
@@ -1638,27 +1636,23 @@
#endif
// Calculate the bits to be allocated to the group as a whole
- if ((cpi->twopass.kf_group_bits > 0) &&
- (cpi->twopass.kf_group_error_left > 0)) {
- cpi->twopass.gf_group_bits =
- (int64_t)(cpi->twopass.kf_group_bits *
+ if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) {
+ twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits *
(gf_group_err / cpi->twopass.kf_group_error_left));
} else {
- cpi->twopass.gf_group_bits = 0;
+ twopass->gf_group_bits = 0;
}
- cpi->twopass.gf_group_bits =
- (cpi->twopass.gf_group_bits < 0)
- ? 0
- : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits)
- ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits;
+ twopass->gf_group_bits = (twopass->gf_group_bits < 0) ?
+ 0 : (twopass->gf_group_bits > twopass->kf_group_bits) ?
+ twopass->kf_group_bits : twopass->gf_group_bits;
// Clip cpi->twopass.gf_group_bits based on user supplied data rate
// variability limit (cpi->oxcf.two_pass_vbrmax_section)
- if (cpi->twopass.gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
- cpi->twopass.gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+ if (twopass->gf_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
+ twopass->gf_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
// Reset the file position
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
// Assign bits to the arf or gf.
for (i = 0; i <= (rc->source_alt_ref_pending &&
@@ -1686,17 +1680,17 @@
// Calculate the number of bits to be spent on the gf or arf based on
// the boost number
- gf_bits = (int)((double)boost * (cpi->twopass.gf_group_bits /
- (double)allocation_chunks));
+ gf_bits = (int)((double)boost * (twopass->gf_group_bits /
+ (double)allocation_chunks));
// If the frame that is to be boosted is simpler than the average for
// the gf/arf group then use an alternative calculation
// based on the error score of the frame itself
if (rc->baseline_gf_interval < 1 ||
mod_frame_err < gf_group_err / (double)rc->baseline_gf_interval) {
- double alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits *
+ double alt_gf_grp_bits = (double)twopass->kf_group_bits *
(mod_frame_err * (double)rc->baseline_gf_interval) /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left);
+ DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left);
int alt_gf_bits = (int)((double)boost * (alt_gf_grp_bits /
(double)allocation_chunks));
@@ -1707,9 +1701,9 @@
// If it is harder than other frames in the group make sure it at
// least receives an allocation in keeping with its relative error
// score, otherwise it may be worse off than an "un-boosted" frame.
- int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits *
+ int alt_gf_bits = (int)((double)twopass->kf_group_bits *
mod_frame_err /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.kf_group_error_left));
+ DOUBLE_DIVIDE_CHECK(twopass->kf_group_error_left));
if (alt_gf_bits > gf_bits)
gf_bits = alt_gf_bits;
@@ -1720,11 +1714,11 @@
gf_bits = 0;
if (i == 0) {
- cpi->twopass.gf_bits = gf_bits;
+ twopass->gf_bits = gf_bits;
}
if (i == 1 ||
(!rc->source_alt_ref_pending &&
- (cpi->common.frame_type != KEY_FRAME))) {
+ cpi->common.frame_type != KEY_FRAME)) {
// Per frame bit target for this frame
vp9_rc_set_frame_target(cpi, gf_bits);
}
@@ -1732,11 +1726,11 @@
{
// Adjust KF group bits and error remaining
- cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err;
- cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits;
+ twopass->kf_group_error_left -= (int64_t)gf_group_err;
+ twopass->kf_group_bits -= twopass->gf_group_bits;
- if (cpi->twopass.kf_group_bits < 0)
- cpi->twopass.kf_group_bits = 0;
+ if (twopass->kf_group_bits < 0)
+ twopass->kf_group_bits = 0;
// If this is an arf update we want to remove the score for the
// overlay frame at the end which will usually be very cheap to code.
@@ -1745,18 +1739,18 @@
// For normal GFs remove the score for the GF itself unless this is
// also a key frame in which case it has already been accounted for.
if (rc->source_alt_ref_pending) {
- cpi->twopass.gf_group_error_left = (int64_t)gf_group_err - mod_frame_err;
+ twopass->gf_group_error_left = (int64_t)gf_group_err - mod_frame_err;
} else if (cpi->common.frame_type != KEY_FRAME) {
- cpi->twopass.gf_group_error_left = (int64_t)(gf_group_err
+ twopass->gf_group_error_left = (int64_t)(gf_group_err
- gf_first_frame_err);
} else {
- cpi->twopass.gf_group_error_left = (int64_t)gf_group_err;
+ twopass->gf_group_error_left = (int64_t)gf_group_err;
}
- cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits;
+ twopass->gf_group_bits -= twopass->gf_bits;
- if (cpi->twopass.gf_group_bits < 0)
- cpi->twopass.gf_group_bits = 0;
+ if (twopass->gf_group_bits < 0)
+ twopass->gf_group_bits = 0;
// This condition could fail if there are two kfs very close together
// despite (MIN_GF_INTERVAL) and would cause a divide by 0 in the
@@ -1765,12 +1759,10 @@
const int boost = rc->source_alt_ref_pending ? b_boost : rc->gfu_boost;
if (boost >= 150) {
- int alt_extra_bits;
- int pct_extra = (boost - 100) / 50;
- pct_extra = (pct_extra > 20) ? 20 : pct_extra;
-
- alt_extra_bits = (int)((cpi->twopass.gf_group_bits * pct_extra) / 100);
- cpi->twopass.gf_group_bits -= alt_extra_bits;
+ const int pct_extra = MIN(20, (boost - 100) / 50);
+ const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) /
+ 100);
+ twopass->gf_group_bits -= alt_extra_bits;
}
}
}
@@ -1779,20 +1771,20 @@
FIRSTPASS_STATS sectionstats;
zero_stats(§ionstats);
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
for (i = 0; i < rc->baseline_gf_interval; i++) {
- input_stats(&cpi->twopass, &next_frame);
+ input_stats(twopass, &next_frame);
accumulate_stats(§ionstats, &next_frame);
}
avg_stats(§ionstats);
- cpi->twopass.section_intra_rating = (int)
+ twopass->section_intra_rating = (int)
(sectionstats.intra_error /
DOUBLE_DIVIDE_CHECK(sectionstats.coded_error));
- reset_fpf_position(&cpi->twopass, start_pos);
+ reset_fpf_position(twopass, start_pos);
}
}
@@ -1832,9 +1824,9 @@
}
static int test_candidate_kf(VP9_COMP *cpi,
- FIRSTPASS_STATS *last_frame,
- FIRSTPASS_STATS *this_frame,
- FIRSTPASS_STATS *next_frame) {
+ const FIRSTPASS_STATS *last_frame,
+ const FIRSTPASS_STATS *this_frame,
+ const FIRSTPASS_STATS *next_frame) {
int is_viable_kf = 0;
// Does the frame satisfy the primary criteria of a key frame
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index fe0a291..b4972be 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -2417,34 +2417,33 @@
// Function to test for conditions that indicate we should loop
// back and recode a frame.
-static int recode_loop_test(VP9_COMP *cpi,
+static int recode_loop_test(const VP9_COMP *cpi,
int high_limit, int low_limit,
int q, int maxq, int minq) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
int force_recode = 0;
- VP9_COMMON *cm = &cpi->common;
// Special case trap if maximum allowed frame size exceeded.
- if (cpi->rc.projected_frame_size > cpi->rc.max_frame_bandwidth) {
+ if (rc->projected_frame_size > rc->max_frame_bandwidth) {
force_recode = 1;
// Is frame recode allowed.
// Yes if either recode mode 1 is selected or mode 2 is selected
// and the frame is a key frame, golden frame or alt_ref_frame
} else if ((cpi->sf.recode_loop == 1) ||
- ((cpi->sf.recode_loop == 2) &&
- ((cm->frame_type == KEY_FRAME) ||
- cpi->refresh_golden_frame ||
- cpi->refresh_alt_ref_frame))) {
+ ((cpi->sf.recode_loop == 2) &&
+ (cm->frame_type == KEY_FRAME ||
+ cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
// General over and under shoot tests
- if (((cpi->rc.projected_frame_size > high_limit) && (q < maxq)) ||
- ((cpi->rc.projected_frame_size < low_limit) && (q > minq))) {
+ if ((rc->projected_frame_size > high_limit && q < maxq) ||
+ (rc->projected_frame_size < low_limit && q > minq)) {
force_recode = 1;
} else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
// Deal with frame undershoot and whether or not we are
// below the automatically set cq level.
if (q > cpi->cq_target_quality &&
- cpi->rc.projected_frame_size <
- ((cpi->rc.this_frame_target * 7) >> 3)) {
+ rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
force_recode = 1;
}
}
@@ -2918,14 +2917,14 @@
int bottom_index;
SPEED_FEATURES *const sf = &cpi->sf;
- unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
+ unsigned int max_mv_def = MIN(cm->width, cm->height);
struct segmentation *const seg = &cm->seg;
set_ext_overrides(cpi);
/* Scale the source buffer, if required. */
- if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width ||
- cm->mi_rows * 8 != cpi->un_scaled_source->y_height) {
+ if (cm->mi_cols * MI_SIZE != cpi->un_scaled_source->y_width ||
+ cm->mi_rows * MI_SIZE != cpi->un_scaled_source->y_height) {
scale_and_extend_frame_nonnormative(cpi->un_scaled_source,
&cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
@@ -2934,12 +2933,8 @@
}
scale_references(cpi);
- // Clear down mmx registers to allow floating point in what follows.
vp9_clear_system_state();
- // Clear zbin over-quant value and mode boost values.
- cpi->zbin_mode_boost = 0;
-
// Enable or disable mode based tweaking of the zbin.
// For 2 pass only used where GF/ARF prediction quality
// is above a threshold.
@@ -2947,7 +2942,7 @@
cpi->zbin_mode_boost_enabled = 0;
// Current default encoder behavior for the altref sign bias.
- cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
+ cm->ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
@@ -2956,7 +2951,7 @@
cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
// Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
if (sf->auto_mv_step_size) {
- if (frame_is_intra_only(&cpi->common)) {
+ if (frame_is_intra_only(cm)) {
// Initialize max_mv_magnitude for use in the first INTER frame
// after a key/intra-only frame.
cpi->max_mv_magnitude = max_mv_def;
@@ -2965,8 +2960,8 @@
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
- cpi->mv_step_param = vp9_init_search_range(
- cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 *
+ cpi->max_mv_magnitude));
cpi->max_mv_magnitude = 0;
}
}
@@ -3003,9 +2998,8 @@
// static regions if indicated.
// Only allowed in second pass of two pass (as requires lagged coding)
// and if the relevant speed feature flag is set.
- if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+ if (cpi->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
- }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
@@ -3066,7 +3060,7 @@
// should be larger. Q of 0 is disabled because we force tx size to be
// 16x16...
if (cpi->sf.super_fast_rtc) {
- if (cpi->common.current_video_frame == 0)
+ if (cm->current_video_frame == 0)
q /= 3;
if (q == 0)
@@ -3079,14 +3073,8 @@
set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
}
- encode_with_recode_loop(cpi,
- size,
- dest,
- &q,
- bottom_index,
- top_index,
- frame_over_shoot_limit,
- frame_under_shoot_limit);
+ encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index,
+ frame_over_shoot_limit, frame_under_shoot_limit);
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
@@ -3133,18 +3121,15 @@
update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
- full_to_model_counts(cpi->common.counts.coef[t],
- cpi->coef_counts[t]);
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_coef_probs(&cpi->common);
- }
+ full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
- if (!frame_is_intra_only(&cpi->common)) {
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_mode_probs(&cpi->common);
- vp9_adapt_mv_probs(&cpi->common, cpi->common.allow_high_precision_mv);
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
+ vp9_adapt_coef_probs(cm);
+
+ if (!frame_is_intra_only(cm)) {
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(cm);
+ vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
}
@@ -3156,14 +3141,14 @@
output_frame_level_debug_stats(cpi);
#endif
if (cpi->refresh_golden_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
+ cm->frame_flags |= FRAMEFLAGS_GOLDEN;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
+ cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
if (cpi->refresh_alt_ref_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
+ cm->frame_flags |= FRAMEFLAGS_ALTREF;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
+ cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
get_ref_frame_flags(cpi);
@@ -3212,6 +3197,7 @@
// reset to normal state now that we are done.
if (!cm->show_existing_frame)
cm->last_show_frame = cm->show_frame;
+
if (cm->show_frame) {
// current mip will be the prev_mip for the next frame
MODE_INFO *temp = cm->prev_mip;
@@ -3232,6 +3218,7 @@
// update not a real frame
++cm->current_video_frame;
}
+
// restore prev_mi
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad.c
similarity index 100%
rename from vp9/encoder/vp9_sad_c.c
rename to vp9/encoder/vp9_sad.c
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index c691411..f9ba41b 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -76,6 +76,7 @@
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_8t_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_subpixel_bilinear_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
+VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_bilinear_ssse3.asm
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 131d6c7..c225f54 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -56,7 +56,7 @@
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
-VP9_CX_SRCS-yes += encoder/vp9_sad_c.c
+VP9_CX_SRCS-yes += encoder/vp9_sad.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-yes += encoder/vp9_subexp.c