Merge changes Ifacbf5a0,Ibad7c3dd into experimental * changes: vpxenc: actually report mismatch on stderr. Make superblocks independent of macroblock code and data.
diff --git a/vp8/encoder/x86/quantize_sse2.c b/vp8/encoder/x86/quantize_sse2_intrinsics.c similarity index 100% rename from vp8/encoder/x86/quantize_sse2.c rename to vp8/encoder/x86/quantize_sse2_intrinsics.c
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index f6feafb..2a0e7c5 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk
@@ -89,13 +89,13 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm -VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2_intrinsics.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm # TODO(johann) make this generic ifeq ($(HAVE_SSE2),yes) -vp8/encoder/x86/quantize_sse2.c.o: CFLAGS += -msse2 -vp8/encoder/x86/quantize_sse2.c.d: CFLAGS += -msse2 +vp8/encoder/x86/quantize_sse2_intrinsics.c.o: CFLAGS += -msse2 +vp8/encoder/x86/quantize_sse2_intrinsics.c.d: CFLAGS += -msse2 endif ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
diff --git a/vp9/common/vp9_idct.h b/vp9/common/vp9_idct.h index 17d0134..bc79b5c 100644 --- a/vp9/common/vp9_idct.h +++ b/vp9/common/vp9_idct.h
@@ -84,11 +84,4 @@ transform_1d cols, rows; // vertical and horizontal } transform_2d; -#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n) - 1))) >> (n)) - -/* If we don't want to use ROUND_POWER_OF_TWO macro -static INLINE int16_t round_power_of_two(int16_t value, int n) { - return (value + (1 << (n - 1))) >> n; -}*/ - #endif // VP9_COMMON_VP9_IDCT_H_
diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index d93b7d5..79d0609 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h
@@ -94,7 +94,7 @@ int Width; // width of data passed to the compressor int Height; // height of data passed to the compressor double frame_rate; // set to passed in framerate - int target_bandwidth; // bandwidth to be used in kilobits per second + int64_t target_bandwidth; // bandwidth to be used in kilobits per second int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0 int Sharpness; // parameter used for sharpening output: recommendation 0: @@ -135,9 +135,9 @@ int over_shoot_pct; // buffering parameters - int starting_buffer_level; // in seconds - int optimal_buffer_level; - int maximum_buffer_size; + int64_t starting_buffer_level; // in seconds + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; // controlling quality int fixed_q;
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 30e8951..a516eb3 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c
@@ -20,14 +20,14 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *scale, YV12_BUFFER_CONFIG *other, int this_w, int this_h) { - int other_w, other_h; + int other_h = other->y_height; + int other_w = other->y_width; - other_h = other->y_height; - other_w = other->y_width; scale->x_num = other_w; scale->x_den = this_w; scale->x_offset_q4 = 0; // calculated per-mb scale->x_step_q4 = 16 * other_w / this_w; + scale->y_num = other_h; scale->y_den = this_h; scale->y_offset_q4 = 0; // calculated per-mb @@ -271,10 +271,8 @@ const struct scale_factors *scale, int w, int h, int do_avg, const struct subpix_fn_table *subpix) { - int_mv32 mv; - - mv = scale_motion_vector_q3_to_q4(mv_q3, scale); - src = src + (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); + int_mv32 mv = scale_motion_vector_q3_to_q4(mv_q3, scale); + src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4); scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][do_avg]( src, src_stride, dst, dst_stride, @@ -306,7 +304,7 @@ const int subpel_x = scaled_mv_col_q4 & 15; const int subpel_y = scaled_mv_row_q4 & 15; - src = src + (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); + src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4); scale->predict[!!subpel_x][!!subpel_y][do_avg]( src, src_stride, dst, dst_stride, subpix->filter_x[subpel_x], scale->x_step_q4, @@ -500,18 +498,15 @@ int which_mv; for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = - which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv - : xd->mode_info_context->mbmi.need_to_clamp_mvs; - uint8_t *base_pre; - int_mv ymv; - int pre_stride; + const int clamp_mvs = which_mv ? + xd->mode_info_context->mbmi.need_to_clamp_secondmv : + xd->mode_info_context->mbmi.need_to_clamp_mvs; + uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; + int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; + int_mv ymv; ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - base_pre = which_mv ? xd->second_pre.y_buffer - : xd->pre.y_buffer; - pre_stride = which_mv ? xd->second_pre.y_stride - : xd->pre.y_stride; + if (clamp_mvs) clamp_mv_to_umv_border(&ymv.as_mv, xd); @@ -811,93 +806,61 @@ } } -static -void build_4x4uvmvs(MACROBLOCKD *xd) { +static int mv_pred_row(MACROBLOCKD *mb, int off, int idx) { + int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.row + + mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.row; + return (temp < 0 ? temp - 4 : temp + 4) / 8; +} + +static int mv_pred_col(MACROBLOCKD *mb, int off, int idx) { + int temp = mb->mode_info_context->bmi[off + 0].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 1].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 4].as_mv[idx].as_mv.col + + mb->mode_info_context->bmi[off + 5].as_mv[idx].as_mv.col; + return (temp < 0 ? temp - 4 : temp + 4) / 8; +} + +static void build_4x4uvmvs(MACROBLOCKD *xd) { int i, j; BLOCKD *blockd = xd->block; + const int mask = xd->fullpixel_mask; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - int yoffset = i * 8 + j * 2; - int uoffset = 16 + i * 2 + j; - int voffset = 20 + i * 2 + j; + const int yoffset = i * 8 + j * 2; + const int uoffset = 16 + i * 2 + j; + const int voffset = 20 + i * 2 + j; - int temp; - - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.row - + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.row - + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.row - + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.row; - - if (temp < 0) temp -= 4; - else temp += 4; - - blockd[uoffset].bmi.as_mv[0].as_mv.row = (temp / 8) & - xd->fullpixel_mask; - - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[0].as_mv.col - + xd->mode_info_context->bmi[yoffset + 1].as_mv[0].as_mv.col - + xd->mode_info_context->bmi[yoffset + 4].as_mv[0].as_mv.col - + xd->mode_info_context->bmi[yoffset + 5].as_mv[0].as_mv.col; - - if (temp < 0) temp -= 4; - else temp += 4; - - blockd[uoffset].bmi.as_mv[0].as_mv.col = (temp / 8) & - xd->fullpixel_mask; + MV *u = &blockd[uoffset].bmi.as_mv[0].as_mv; + MV *v = &blockd[voffset].bmi.as_mv[0].as_mv; + u->row = mv_pred_row(xd, yoffset, 0) & mask; + u->col = mv_pred_col(xd, yoffset, 0) & mask; // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd); + clamp_uvmv_to_umv_border(u, xd); // if (x->mode_info_context->mbmi.need_to_clamp_mvs) - clamp_uvmv_to_umv_border(&blockd[uoffset].bmi.as_mv[0].as_mv, xd); + clamp_uvmv_to_umv_border(u, xd); - blockd[voffset].bmi.as_mv[0].as_mv.row = - blockd[uoffset].bmi.as_mv[0].as_mv.row; - blockd[voffset].bmi.as_mv[0].as_mv.col = - blockd[uoffset].bmi.as_mv[0].as_mv.col; + v->row = u->row; + v->col = u->col; if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.row - + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.row - + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.row - + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.row; - - if (temp < 0) { - temp -= 4; - } else { - temp += 4; - } - - blockd[uoffset].bmi.as_mv[1].as_mv.row = (temp / 8) & - xd->fullpixel_mask; - - temp = xd->mode_info_context->bmi[yoffset + 0].as_mv[1].as_mv.col - + xd->mode_info_context->bmi[yoffset + 1].as_mv[1].as_mv.col - + xd->mode_info_context->bmi[yoffset + 4].as_mv[1].as_mv.col - + xd->mode_info_context->bmi[yoffset + 5].as_mv[1].as_mv.col; - - if (temp < 0) { - temp -= 4; - } else { - temp += 4; - } - - blockd[uoffset].bmi.as_mv[1].as_mv.col = (temp / 8) & - xd->fullpixel_mask; + u = &blockd[uoffset].bmi.as_mv[1].as_mv; + v = &blockd[voffset].bmi.as_mv[1].as_mv; + u->row = mv_pred_row(xd, yoffset, 1) & mask; + u->col = mv_pred_col(xd, yoffset, 1) & mask; // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border( - &blockd[uoffset].bmi.as_mv[1].as_mv, xd); + clamp_uvmv_to_umv_border(u, xd); // if (mbmi->need_to_clamp_mvs) - clamp_uvmv_to_umv_border( - &blockd[uoffset].bmi.as_mv[1].as_mv, xd); + clamp_uvmv_to_umv_border(u, xd); - blockd[voffset].bmi.as_mv[1].as_mv.row = - blockd[uoffset].bmi.as_mv[1].as_mv.row; - blockd[voffset].bmi.as_mv[1].as_mv.col = - blockd[uoffset].bmi.as_mv[1].as_mv.col; + v->row = u->row; + v->col = u->col; } } }
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index db1b467..dab88a3 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh
@@ -260,7 +260,7 @@ specialize vp9_short_idct4x4llm_1 prototype void vp9_short_idct4x4llm "int16_t *input, int16_t *output, int pitch" -specialize vp9_short_idct4x4llm +specialize vp9_short_idct4x4llm sse2 prototype void vp9_short_idct8x8 "int16_t *input, int16_t *output, int pitch" specialize vp9_short_idct8x8
diff --git a/vp9/common/x86/vp9_idct_x86.h b/vp9/common/x86/vp9_idct_x86.h index 8320cf8..bd66d8c 100644 --- a/vp9/common/x86/vp9_idct_x86.h +++ b/vp9/common/x86/vp9_idct_x86.h
@@ -20,23 +20,10 @@ */ #if HAVE_MMX -extern prototype_idct(vp9_short_idct4x4llm_1_mmx); -extern prototype_idct(vp9_short_idct4x4llm_mmx); -extern prototype_idct_scalar_add(vp9_dc_only_idct_add_mmx); - extern prototype_second_order(vp9_short_inv_walsh4x4_mmx); extern prototype_second_order(vp9_short_inv_walsh4x4_1_mmx); #if !CONFIG_RUNTIME_CPU_DETECT -#undef vp9_idct_idct1 -#define vp9_idct_idct1 vp9_short_idct4x4llm_1_mmx - -#undef vp9_idct_idct16 -#define vp9_idct_idct16 vp9_short_idct4x4llm_mmx - -#undef vp9_idct_idct1_scalar_add -#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_mmx - #undef vp9_idct_iwalsh16 #define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_mmx
diff --git a/vp9/common/x86/vp9_idctllm_x86.c b/vp9/common/x86/vp9_idctllm_x86.c index 667f5c1..7b3c579 100644 --- a/vp9/common/x86/vp9_idctllm_x86.c +++ b/vp9/common/x86/vp9_idctllm_x86.c
@@ -73,4 +73,129 @@ p1 = _mm_srli_si128(p1, 4); *(int *)dst_ptr = _mm_cvtsi128_si32(p1); } + +void vp9_short_idct4x4llm_sse2(int16_t *input, int16_t *output, int pitch) { + const __m128i zero = _mm_setzero_si128(); + const __m128i eight = _mm_set1_epi16(8); + const __m128i cst = _mm_setr_epi16((short)cospi_16_64, (short)cospi_16_64, + (short)cospi_16_64, (short)-cospi_16_64, + (short)cospi_24_64, (short)-cospi_8_64, + (short)cospi_8_64, (short)cospi_24_64); + const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); + const int half_pitch = pitch >> 1; + __m128i input0, input1, input2, input3; + + // Rows + input0 = _mm_loadl_epi64((__m128i *)input); + input1 = _mm_loadl_epi64((__m128i *)(input + 4)); + input2 = _mm_loadl_epi64((__m128i *)(input + 8)); + input3 = _mm_loadl_epi64((__m128i *)(input + 12)); + + // Construct i3, i1, i3, i1, i2, i0, i2, i0 + input0 = _mm_shufflelo_epi16(input0, 0xd8); + input1 = _mm_shufflelo_epi16(input1, 0xd8); + input2 = _mm_shufflelo_epi16(input2, 0xd8); + input3 = _mm_shufflelo_epi16(input3, 0xd8); + + input0 = _mm_unpacklo_epi32(input0, input0); + input1 = _mm_unpacklo_epi32(input1, input1); + input2 = _mm_unpacklo_epi32(input2, input2); + input3 = _mm_unpacklo_epi32(input3, input3); + + // Stage 1 + input0 = _mm_madd_epi16(input0, cst); + input1 = _mm_madd_epi16(input1, cst); + input2 = _mm_madd_epi16(input2, cst); + input3 = _mm_madd_epi16(input3, cst); + + input0 = _mm_add_epi32(input0, rounding); + input1 = _mm_add_epi32(input1, rounding); + input2 = _mm_add_epi32(input2, rounding); + input3 = _mm_add_epi32(input3, rounding); + + input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); + input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); + input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); + input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); + + // Stage 2 + input0 = _mm_packs_epi32(input0, zero); + input1 = _mm_packs_epi32(input1, zero); + input2 = _mm_packs_epi32(input2, zero); + input3 = _mm_packs_epi32(input3, zero); + + // Transpose + input1 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpacklo_epi16(input2, input3); + input0 = _mm_unpacklo_epi32(input1, input3); + input1 = _mm_unpackhi_epi32(input1, input3); + + // Switch column2, column 3, and then, we got: + // input2: column1, column 0; input3: column2, column 3. + input1 = _mm_shuffle_epi32(input1, 0x4e); + input2 = _mm_add_epi16(input0, input1); + input3 = _mm_sub_epi16(input0, input1); + + // Columns + // Construct i3, i1, i3, i1, i2, i0, i2, i0 + input0 = _mm_shufflelo_epi16(input2, 0xd8); + input1 = _mm_shufflehi_epi16(input2, 0xd8); + input2 = _mm_shufflehi_epi16(input3, 0xd8); + input3 = _mm_shufflelo_epi16(input3, 0xd8); + + input0 = _mm_unpacklo_epi32(input0, input0); + input1 = _mm_unpackhi_epi32(input1, input1); + input2 = _mm_unpackhi_epi32(input2, input2); + input3 = _mm_unpacklo_epi32(input3, input3); + + // Stage 1 + input0 = _mm_madd_epi16(input0, cst); + input1 = _mm_madd_epi16(input1, cst); + input2 = _mm_madd_epi16(input2, cst); + input3 = _mm_madd_epi16(input3, cst); + + input0 = _mm_add_epi32(input0, rounding); + input1 = _mm_add_epi32(input1, rounding); + input2 = _mm_add_epi32(input2, rounding); + input3 = _mm_add_epi32(input3, rounding); + + input0 = _mm_srai_epi32(input0, DCT_CONST_BITS); + input1 = _mm_srai_epi32(input1, DCT_CONST_BITS); + input2 = _mm_srai_epi32(input2, DCT_CONST_BITS); + input3 = _mm_srai_epi32(input3, DCT_CONST_BITS); + + // Stage 2 + input0 = _mm_packs_epi32(input0, zero); + input1 = _mm_packs_epi32(input1, zero); + input2 = _mm_packs_epi32(input2, zero); + input3 = _mm_packs_epi32(input3, zero); + + // Transpose + input1 = _mm_unpacklo_epi16(input0, input1); + input3 = _mm_unpacklo_epi16(input2, input3); + input0 = _mm_unpacklo_epi32(input1, input3); + input1 = _mm_unpackhi_epi32(input1, input3); + + // Switch column2, column 3, and then, we got: + // input2: column1, column 0; input3: column2, column 3. + input1 = _mm_shuffle_epi32(input1, 0x4e); + input2 = _mm_add_epi16(input0, input1); + input3 = _mm_sub_epi16(input0, input1); + + // Final round and shift + input2 = _mm_add_epi16(input2, eight); + input3 = _mm_add_epi16(input3, eight); + + input2 = _mm_srai_epi16(input2, 4); + input3 = _mm_srai_epi16(input3, 4); + + // Store results + _mm_storel_epi64((__m128i *)output, input2); + input2 = _mm_srli_si128(input2, 8); + _mm_storel_epi64((__m128i *)(output + half_pitch), input2); + + _mm_storel_epi64((__m128i *)(output + 3 * half_pitch), input3); + input3 = _mm_srli_si128(input3, 8); + _mm_storel_epi64((__m128i *)(output + 2 * half_pitch), input3); +} #endif
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 055e97b..b44d659 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c
@@ -1191,13 +1191,14 @@ * if we have enough data. Otherwise we will end up with the wrong * size. */ - if (data + 4 < data_end) { - pc->Width = (data[0] | (data[1] << 8)) & 0x3fff; - pc->horiz_scale = data[1] >> 6; - pc->Height = (data[2] | (data[3] << 8)) & 0x3fff; - pc->vert_scale = data[3] >> 6; + if (data + 5 < data_end) { + pc->Width = (data[0] | (data[1] << 8)); + pc->Height = (data[2] | (data[3] << 8)); + + pc->horiz_scale = data[4] >> 4; + pc->vert_scale = data[4] & 0x0F; } - data += 4; + data += 5; if (width != pc->Width || height != pc->Height) { if (pc->Width <= 0) {
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 85246d8..eaf9860 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c
@@ -90,7 +90,7 @@ input[i] *= dq[i]; // the idct halves ( >> 1) the pitch - vp9_short_idct4x4llm_c(input, output, 4 << 1); + vp9_short_idct4x4llm(input, output, 4 << 1); vpx_memset(input, 0, 32); @@ -112,7 +112,7 @@ input[i] *= dq[i]; // the idct halves ( >> 1) the pitch - vp9_short_idct4x4llm_c(input, output, 4 << 1); + vp9_short_idct4x4llm(input, output, 4 << 1); vpx_memset(input, 0, 32); add_residual(output, pred, pitch, dest, stride, 4, 4); }
diff --git a/vp9/decoder/x86/vp9_idct_mmx.h b/vp9/decoder/x86/vp9_idct_mmx.h index c0e9bfd..7d98291 100644 --- a/vp9/decoder/x86/vp9_idct_mmx.h +++ b/vp9/decoder/x86/vp9_idct_mmx.h
@@ -16,9 +16,6 @@ unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc); -void vp9_dc_only_idct_add_mmx(short input_dc, const unsigned char *pred_ptr, - unsigned char *dst_ptr, int pitch, int stride); - void vp9_dequant_idct_add_mmx(short *input, const short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 971da05..b05da87 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c
@@ -1500,17 +1500,20 @@ { int v; - /* TODO(jkoleszar): support arbitrary resolutions */ - v = (pc->horiz_scale << 14) | pc->Width; + // support arbitrary resolutions + v = pc->Width; cx_data[0] = v; cx_data[1] = v >> 8; - v = (pc->vert_scale << 14) | pc->Height; + v = pc->Height; cx_data[2] = v; cx_data[3] = v >> 8; - extra_bytes_packed += 4; - cx_data += 4; + // use a separate byte to store the scale factors, each ranging 0-15 + cx_data[4] = (pc->horiz_scale << 4) | (pc->vert_scale); + + extra_bytes_packed += 5; + cx_data += 5; } vp9_start_encode(&header_bc, cx_data);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3b48f46..a4dbdc5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c
@@ -1247,8 +1247,8 @@ MACROBLOCKD *const xd = &x->e_mbd; int totalrate; - // printf("encode_frame_internal frame %d (%d)\n", - // cpi->common.current_video_frame, cpi->common.show_frame); +// fprintf(stderr, "encode_frame_internal frame %d (%d)\n", +// cpi->common.current_video_frame, cpi->common.show_frame); // Compute a modified set of reference frame probabilities to use when // prediction fails. These are based on the current general estimates for @@ -1329,12 +1329,11 @@ // Take tiles into account and give start/end MB int tile_col; TOKENEXTRA *tp = cpi->tok; - for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { TOKENEXTRA *tp_old = tp; - // For each row of SBs in the frame vp9_get_tile_col_offsets(cm, tile_col); + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { encode_sb_row(cpi, mb_row, &tp, &totalrate); }
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 5278ac2..6335827 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c
@@ -1086,14 +1086,12 @@ cpi->max_gf_interval = cpi->twopass.static_scene_max_gf_interval; } - -static int -rescale(int val, int num, int denom) { +static int64_t rescale(int val, int64_t num, int denom) { int64_t llnum = num; int64_t llden = denom; int64_t llval = val; - return (int)(llval * llnum / llden); + return (llval * llnum / llden); } static void set_tile_limits(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 50780d0..3dc4772 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h
@@ -485,7 +485,7 @@ int kf_boost; int kf_zeromotion_pct; - int target_bandwidth; + int64_t target_bandwidth; struct vpx_codec_pkt_list *output_pkt_list; #if 0
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index cb01500..ca3cab6 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h
@@ -15,12 +15,12 @@ struct yv12_buffer_config; struct VP9_COMP; -extern void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd, - struct VP9_COMP *cpi); +void vp9_pick_filter_level_fast(struct yv12_buffer_config *sd, + struct VP9_COMP *cpi); -extern void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); +void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); -extern void vp9_pick_filter_level(struct yv12_buffer_config *sd, - struct VP9_COMP *cpi); +void vp9_pick_filter_level(struct yv12_buffer_config *sd, + struct VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_PICKLPF_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c5b3e3a..61379b8 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c
@@ -193,19 +193,17 @@ } static int compute_rd_mult(int qindex) { - int q; - - q = vp9_dc_quant(qindex, 0); + int q = vp9_dc_quant(qindex, 0); return (11 * q * q) >> 6; } -void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) { - cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]; - cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; +void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { + cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; + cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; } -void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) { +void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { int q, i; vp9_clear_system_state(); // __asm emms; @@ -214,16 +212,16 @@ // for key frames, golden frames and arf frames. // if (cpi->common.refresh_golden_frame || // cpi->common.refresh_alt_ref_frame) - QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex); + qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex); - cpi->RDMULT = compute_rd_mult(QIndex); + cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { if (cpi->twopass.next_iiratio > 31) cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; else cpi->RDMULT += - (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; + (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; } if (cpi->RDMULT < 7) @@ -234,8 +232,8 @@ vp9_set_speed_features(cpi); - q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25); - q = q << 2; + q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25); + q <<= 2; cpi->RDMULT = cpi->RDMULT << 4; if (q < 8)
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 01b1560..d1b4777 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h
@@ -15,34 +15,34 @@ #define RDCOST(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) ) #define RDCOST_8x8(RM,DM,R,D) ( ((128+((int64_t)R)*(RM)) >> 8) + ((int64_t)DM)*(D) ) -extern void vp9_initialize_rd_consts(VP9_COMP *cpi, int Qvalue); +void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); -extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); +void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); -extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); +void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); -extern void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); +void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); -extern void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); +void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); -extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); +void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, + int mb_row, int mb_col, + int *r, int *d); -extern int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); +int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int mb_row, int mb_col, + int *r, int *d); -extern int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, - int mb_row, int mb_col, - int *r, int *d); +int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int mb_row, int mb_col, + int *r, int *d); -extern void vp9_init_me_luts(); +void vp9_init_me_luts(); -extern void vp9_set_mbmode_and_mvs(MACROBLOCK *x, - MB_PREDICTION_MODE mb, int_mv *mv); +void vp9_set_mbmode_and_mvs(MACROBLOCK *x, + MB_PREDICTION_MODE mb, int_mv *mv); #endif // VP9_ENCODER_VP9_RDOPT_H_
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 2653954..db7a2fd 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c
@@ -126,8 +126,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp8_extracfg *vp8_cfg) { - RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */ - RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */ + RANGE_CHECK(cfg, g_w, 1, 65535); /* 16 bits available */ + RANGE_CHECK(cfg, g_h, 1, 65535); /* 16 bits available */ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); RANGE_CHECK_HI(cfg, g_profile, 3);
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index b2ce9aa..f2b80e1 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c
@@ -229,8 +229,8 @@ if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a) res = VPX_CODEC_UNSUP_BITSTREAM; - si->w = (c[3] | (c[4] << 8)) & 0x3fff; - si->h = (c[5] | (c[6] << 8)) & 0x3fff; + si->w = (c[3] | (c[4] << 8)); + si->h = (c[5] | (c[6] << 8)); /*printf("w=%d, h=%d\n", si->w, si->h);*/ if (!(si->h | si->w))