Merge "Add missing calls to emms in the adaptive quantization code"
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index f456abc..5abb9b1 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -113,8 +113,7 @@
test_input_block[j] = src[j] - dst[j];
}
- const int pitch = 64;
- REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
+ REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
for (int j = 0; j < kNumCoeffs; ++j) {
@@ -150,9 +149,9 @@
for (int j = 0; j < kNumCoeffs; ++j)
input_block[j] = rnd.Rand8() - rnd.Rand8();
- const int pitch = 64;
- vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
- REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
+ const int stride = 32;
+ vp9_short_fdct32x32_c(input_block, output_ref_block, stride);
+ REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
if (version_ == 0) {
for (int j = 0; j < kNumCoeffs; ++j)
@@ -188,9 +187,9 @@
for (int j = 0; j < kNumCoeffs; ++j)
input_extreme_block[j] = -255;
- const int pitch = 64;
- vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
- REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
+ const int stride = 32;
+ vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+ REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, stride));
// The minimum quant value is 4.
for (int j = 0; j < kNumCoeffs; ++j) {
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 8ca4f5f..78e54e2 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1062,7 +1062,7 @@
if (cpi->common.frame_type == KEY_FRAME)
{
/* Reset to default counts/probabilities at key frames */
- vp8_copy(cpi->coef_counts, default_coef_counts);
+ vp8_copy(cpi->mb.coef_counts, default_coef_counts);
}
if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 5c8c03e..3111852 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -146,8 +146,6 @@
TX_MODE tx_mode;
int base_qindex;
- int last_kf_gf_q; /* Q used on the last GF or KF */
-
int y_dc_delta_q;
int uv_dc_delta_q;
int uv_ac_delta_q;
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh
index 526be87..af96bb3 100644
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -701,10 +701,10 @@
prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct4x4 sse2
-prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct32x32 sse2
-prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int pitch"
+prototype void vp9_short_fdct32x32_rd "int16_t *InputData, int16_t *OutputData, int stride"
specialize vp9_short_fdct32x32_rd sse2
prototype void vp9_short_fdct16x16 "int16_t *InputData, int16_t *OutputData, int pitch"
diff --git a/vp9/common/vp9_seg_common.c b/vp9/common/vp9_seg_common.c
index 6bfd8f8..ef30404 100644
--- a/vp9/common/vp9_seg_common.c
+++ b/vp9/common/vp9_seg_common.c
@@ -76,7 +76,7 @@
}
-const vp9_tree_index vp9_segment_tree[14] = {
+const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
2, 4, 6, 8, 10, 12,
0, -1, -2, -3, -4, -5, -6, -7
};
diff --git a/vp9/common/vp9_seg_common.h b/vp9/common/vp9_seg_common.h
index f22239b..eb38c06 100644
--- a/vp9/common/vp9_seg_common.h
+++ b/vp9/common/vp9_seg_common.h
@@ -76,7 +76,7 @@
int segment_id,
SEG_LVL_FEATURES feature_id);
-extern const vp9_tree_index vp9_segment_tree[14];
+extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
#endif // VP9_COMMON_VP9_SEG_COMMON_H_
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index b6555bc..00a2903 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -1315,8 +1315,7 @@
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
-void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch) {
- int shortpitch = pitch >> 1;
+void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int stride) {
int i, j;
int output[32 * 32];
@@ -1324,7 +1323,7 @@
for (i = 0; i < 32; ++i) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
- temp_in[j] = input[j * shortpitch + i] * 4;
+ temp_in[j] = input[j * stride + i] * 4;
dct32_1d(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
@@ -1344,8 +1343,7 @@
// Note that although we use dct_32_round in dct32_1d computation flow,
// this 2d fdct32x32 for rate-distortion optimization loop is operating
// within 16 bits precision.
-void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int pitch) {
- int shortpitch = pitch >> 1;
+void vp9_short_fdct32x32_rd_c(int16_t *input, int16_t *out, int stride) {
int i, j;
int output[32 * 32];
@@ -1353,7 +1351,7 @@
for (i = 0; i < 32; ++i) {
int temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j)
- temp_in[j] = input[j * shortpitch + i] * 4;
+ temp_in[j] = input[j * stride + i] * 4;
dct32_1d(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index c1e1a0d..2b5451b 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -365,9 +365,9 @@
yoff = 32 * (block >> twl);
src_diff = p->src_diff + 4 * bw * yoff + xoff;
if (x->use_lp32x32fdct)
- vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
+ vp9_short_fdct32x32_rd(src_diff, coeff, bw * 4);
else
- vp9_short_fdct32x32(src_diff, coeff, bw * 8);
+ vp9_short_fdct32x32(src_diff, coeff, bw * 4);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -532,9 +532,9 @@
vp9_subtract_block(32, 32, src_diff, bw * 4,
src, p->src.stride, dst, pd->dst.stride);
if (x->use_lp32x32fdct)
- vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
+ vp9_short_fdct32x32_rd(src_diff, coeff, bw * 4);
else
- vp9_short_fdct32x32(src_diff, coeff, bw * 8);
+ vp9_short_fdct32x32(src_diff, coeff, bw * 4);
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 2f147a0..ea4c9e8 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -61,6 +61,7 @@
best_err = cpi->find_fractional_mv_step(
x,
&dst_mv->as_mv, &ref_mv->as_mv,
+ xd->allow_high_precision_mv,
x->errorperbit, &v_fn_ptr,
0, cpi->sf.subpel_iters_per_step, NULL, NULL,
& distortion, &sse);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 561c725..a52f5b1 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -275,6 +275,7 @@
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -348,8 +349,7 @@
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
- forced_stop == 0) {
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
FIRST_LEVEL_CHECKS;
@@ -373,6 +373,7 @@
int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -436,8 +437,7 @@
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
- forced_stop == 0) {
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (eighthiters > 1) {
@@ -465,6 +465,7 @@
int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -544,8 +545,7 @@
}
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
- forced_stop == 0) {
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
while (eighthiters--) {
FIRST_LEVEL_CHECKS;
@@ -568,6 +568,7 @@
int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop,
@@ -642,8 +643,7 @@
tc = bc;
}
- if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) &&
- forced_stop == 0) {
+ if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
hstep >>= 1;
FIRST_LEVEL_CHECKS;
if (eighthiters > 1) {
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 77c157c..bcab679 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -74,6 +74,7 @@
typedef int (fractional_mv_step_fp) (
MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
@@ -88,6 +89,7 @@
typedef int (fractional_mv_step_comp_fp) (
MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
+ int allow_hp,
int error_per_bit,
const vp9_variance_fn_ptr_t *vfp,
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2b1caf4..54b3d43 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -3386,11 +3386,6 @@
#if 0
output_frame_level_debug_stats(cpi);
#endif
- // If this was a kf or Gf note the Q
- if ((cm->frame_type == KEY_FRAME)
- || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
- cm->last_kf_gf_q = cm->base_qindex;
-
if (cpi->refresh_golden_frame == 1)
cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
else
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f192968..7add494 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1870,6 +1870,7 @@
cpi->find_fractional_mv_step(x,
&mode_mv[NEWMV].as_mv,
&bsi->ref_mv->as_mv,
+ x->e_mbd.allow_high_precision_mv,
x->errorperbit, v_fn_ptr,
0, cpi->sf.subpel_iters_per_step,
x->nmvjointcost, x->mvcost,
@@ -2450,6 +2451,7 @@
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
+ xd->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
@@ -2585,6 +2587,7 @@
bestsme = cpi->find_fractional_mv_step_comp(
x, &tmp_mv.as_mv,
&ref_mv[id].as_mv,
+ xd->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[block_size],
0, cpi->sf.subpel_iters_per_step,
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 6ff0de4..5cf8143 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -166,6 +166,7 @@
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv,
&best_ref_mv1.as_mv,
+ xd->allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16],
0, cpi->sf.subpel_iters_per_step,
diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2.c b/vp9/encoder/x86/vp9_dct32x32_sse2.c
index 11eec7f..de47a5b 100644
--- a/vp9/encoder/x86/vp9_dct32x32_sse2.c
+++ b/vp9/encoder/x86/vp9_dct32x32_sse2.c
@@ -30,11 +30,11 @@
#endif
void FDCT32x32_2D(int16_t *input,
- int16_t *output_org, int pitch) {
+ int16_t *output_org, int stride) {
// Calculate pre-multiplied strides
- const int str1 = pitch >> 1;
- const int str2 = pitch;
- const int str3 = pitch + str1;
+ const int str1 = stride;
+ const int str2 = 2 * stride;
+ const int str3 = 2 * stride + str1;
// We need an intermediate buffer between passes.
DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]);
// Constants