Merge "Change rc undershoot/overshoot semantics"
diff --git a/build/make/configure.sh b/build/make/configure.sh
index a48fd9f..3324be3 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -729,19 +729,18 @@
add_cflags -arch ${tgt_isa}
add_ldflags -arch_only ${tgt_isa}
- add_cflags "-isysroot /Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.sdk"
+ add_cflags "-isysroot ${SDK_PATH}/SDKs/iPhoneOS4.3.sdk"
# This should be overridable
- alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.2.sdk
+ alt_libc=${SDK_PATH}/SDKs/iPhoneOS4.3.sdk
# Add the paths for the alternate libc
-# for d in usr/include usr/include/gcc/darwin/4.0/; do
- for d in usr/include usr/include/gcc/darwin/4.0/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
+ for d in usr/include usr/include/gcc/darwin/4.2/ usr/lib/gcc/arm-apple-darwin10/4.2.1/include/; do
try_dir="${alt_libc}/${d}"
[ -d "${try_dir}" ] && add_cflags -I"${try_dir}"
done
- for d in lib usr/lib; do
+ for d in lib usr/lib usr/lib/system; do
try_dir="${alt_libc}/${d}"
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
done
@@ -885,6 +884,8 @@
link_with_cc=gcc
tune_cflags="-march="
setup_gnu_toolchain
+ #for 32 bit x86 builds, -O3 did not turn on this flag
+ enabled optimizations && check_add_cflags -fomit-frame-pointer
;;
esac
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index c2ef44a..fb948fd 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -447,6 +447,8 @@
obj_int_extract)
tag Tool \
Name="VCCLCompilerTool" \
+ Optimization="2" \
+ FavorSizeorSpeed="1" \
AdditionalIncludeDirectories="$incs" \
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
RuntimeLibrary="$release_runtime" \
@@ -462,6 +464,8 @@
tag Tool \
Name="VCCLCompilerTool" \
+ Optimization="2" \
+ FavorSizeorSpeed="1" \
AdditionalIncludeDirectories="$incs" \
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
RuntimeLibrary="$release_runtime" \
@@ -476,6 +480,8 @@
tag Tool \
Name="VCCLCompilerTool" \
AdditionalIncludeDirectories="$incs" \
+ Optimization="2" \
+ FavorSizeorSpeed="1" \
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
RuntimeLibrary="$release_runtime" \
UsePrecompiledHeader="0" \
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
index 4297470..3418e36 100644
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -120,7 +120,7 @@
//video
Video = 0xE0,
FlagInterlaced = 0x9A,
-// StereoMode = 0x53B8,
+ StereoMode = 0x53B8,
PixelWidth = 0xB0,
PixelHeight = 0xBA,
PixelCropBottom = 0x54AA,
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index fc8e072..aef6927 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -169,12 +169,8 @@
unsigned char partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
- unsigned char dc_diff;
unsigned char need_to_clamp_mvs;
-
unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
-
- unsigned char force_no_skip; /* encoder only */
} MB_MODE_INFO;
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 47207fa..036bafc 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -13,10 +13,12 @@
#include "vpx_mem/vpx_mem.h"
-static void extend_plane_borders
+static void copy_and_extend_plane
(
unsigned char *s, /* source */
- int sp, /* pitch */
+ int sp, /* source pitch */
+ unsigned char *d, /* destination */
+ int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
@@ -25,7 +27,6 @@
int er /* extend right border */
)
{
-
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
@@ -34,68 +35,73 @@
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
- dest_ptr1 = s - el;
- dest_ptr2 = s + w;
+ dest_ptr1 = d - el;
+ dest_ptr2 = d + w;
- for (i = 0; i < h - 0 + 1; i++)
+ for (i = 0; i < h; i++)
{
- /* Some linkers will complain if we call vpx_memset with el set to a
- * constant 0.
- */
- if (el)
- vpx_memset(dest_ptr1, src_ptr1[0], el);
+ vpx_memset(dest_ptr1, src_ptr1[0], el);
+ vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
vpx_memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
- dest_ptr1 += sp;
- dest_ptr2 += sp;
+ dest_ptr1 += dp;
+ dest_ptr2 += dp;
}
- /* Now copy the top and bottom source lines into each line of the respective borders */
- src_ptr1 = s - el;
- src_ptr2 = s + sp * (h - 1) - el;
- dest_ptr1 = s + sp * (-et) - el;
- dest_ptr2 = s + sp * (h) - el;
- linesize = el + er + w + 1;
+ /* Now copy the top and bottom lines into each line of the respective
+ * borders
+ */
+ src_ptr1 = d - el;
+ src_ptr2 = d + dp * (h - 1) - el;
+ dest_ptr1 = d + dp * (-et) - el;
+ dest_ptr2 = d + dp * (h) - el;
+ linesize = el + er + w;
- for (i = 0; i < (int)et; i++)
+ for (i = 0; i < et; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
- dest_ptr1 += sp;
+ dest_ptr1 += dp;
}
- for (i = 0; i < (int)eb; i++)
+ for (i = 0; i < eb; i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
- dest_ptr2 += sp;
+ dest_ptr2 += dp;
}
}
-void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
+void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst)
{
- int er = 0xf & (16 - (width & 0xf));
- int eb = 0xf & (16 - (height & 0xf));
+ int et = dst->border;
+ int el = dst->border;
+ int eb = dst->border + dst->y_height - src->y_height;
+ int er = dst->border + dst->y_width - src->y_width;
- /* check for non multiples of 16 */
- if (er != 0 || eb != 0)
- {
- extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
+ copy_and_extend_plane(src->y_buffer, src->y_stride,
+ dst->y_buffer, dst->y_stride,
+ src->y_height, src->y_width,
+ et, el, eb, er);
- /* adjust for uv */
- height = (height + 1) >> 1;
- width = (width + 1) >> 1;
- er = 0x7 & (8 - (width & 0x7));
- eb = 0x7 & (8 - (height & 0x7));
+ et = (et + 1) >> 1;
+ el = (el + 1) >> 1;
+ eb = (eb + 1) >> 1;
+ er = (er + 1) >> 1;
- if (er || eb)
- {
- extend_plane_borders(ybf->u_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
- extend_plane_borders(ybf->v_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
- }
- }
+ copy_and_extend_plane(src->u_buffer, src->uv_stride,
+ dst->u_buffer, dst->uv_stride,
+ src->uv_height, src->uv_width,
+ et, el, eb, er);
+
+ copy_and_extend_plane(src->v_buffer, src->uv_stride,
+ dst->v_buffer, dst->uv_stride,
+ src->uv_height, src->uv_width,
+ et, el, eb, er);
}
+
/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
{
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index fd0a608..9e0be4e 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -14,8 +14,8 @@
#include "vpx_scale/yv12config.h"
-void Extend(YV12_BUFFER_CONFIG *ybf);
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
-void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height);
+void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst);
#endif
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 37c5b77..221998b 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -353,6 +353,9 @@
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+ int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
+ mbd->mode_info_context->mbmi.mode != SPLITMV &&
+ mbd->mode_info_context->mbmi.mb_skip_coeff);
filter_level = baseline_filter_level[Segment];
@@ -367,14 +370,14 @@
if (mb_col > 0)
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
/* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
}
@@ -457,6 +460,10 @@
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+ int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
+ mbd->mode_info_context->mbmi.mode != SPLITMV &&
+ mbd->mode_info_context->mbmi.mb_skip_coeff);
+
filter_level = baseline_filter_level[Segment];
/* Apply any context driven MB level adjustment */
@@ -467,14 +474,14 @@
if (mb_col > 0)
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
/* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
}
@@ -565,6 +572,10 @@
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+ int skip_lf = (mbd->mode_info_context->mbmi.mode != B_PRED &&
+ mbd->mode_info_context->mbmi.mode != SPLITMV &&
+ mbd->mode_info_context->mbmi.mb_skip_coeff);
+
filter_level = baseline_filter_level[Segment];
if (filter_level)
@@ -572,12 +583,12 @@
if (mb_col > 0)
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
- if (mbd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
}
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 2e8f804..a059519 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -109,6 +109,7 @@
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
int Sharpness; // parameter used for sharpening output: recommendation 0:
int cpu_used;
+ unsigned int rc_max_intra_bitrate_pct;
// mode ->
//(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 5bfc7d6..660880b 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -804,11 +804,14 @@
for (j = 0; j < mb_cols; j++)
{
char zz[4];
+ int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED &&
+ mi[mb_index].mbmi.mode != SPLITMV &&
+ mi[mb_index].mbmi.mb_skip_coeff));
if (oci->frame_type == KEY_FRAME)
sprintf(zz, "a");
else
- sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
+ sprintf(zz, "%c", dc_diff + '0');
vp8_blit_text(zz, y_ptr, post->y_stride);
mb_index ++;
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 7cfab41..6862bae 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -207,12 +207,12 @@
}
+/*encoder only*/
void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
{
int i;
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
@@ -257,69 +257,32 @@
}
/*encoder only*/
-void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
+void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
{
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *pred_ptr = x->predictor;
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+ int pre_stride = x->block[0].pre_stride;
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
+ ptr_base = x->pre.y_buffer;
+ ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+
+ if ((mv_row | mv_col) & 7)
{
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *pred_ptr = x->predictor;
- int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
- int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
- int pre_stride = x->block[0].pre_stride;
-
- ptr_base = x->pre.y_buffer;
- ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
-
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
- }
+ x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
}
else
{
- int i;
-
- if (x->mode_info_context->mbmi.partitioning < 3)
- {
- for (i = 0; i < 4; i++)
- {
- BLOCKD *d = &x->block[bbb[i]];
- build_inter_predictors4b(x, d, 16);
- }
-
- }
- else
- {
- for (i = 0; i < 16; i += 2)
- {
- BLOCKD *d0 = &x->block[i];
- BLOCKD *d1 = &x->block[i+1];
-
- if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
- build_inter_predictors2b(x, d0, 16);
- else
- {
- vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
- vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
- }
-
- }
- }
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
}
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
- if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
- x->mode_info_context->mbmi.mode != SPLITMV)
+ if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
@@ -535,58 +498,58 @@
+void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x)
+{
+ unsigned char *dst_ptr = x->dst.y_buffer;
+
+ int offset;
+ unsigned char *ptr_base;
+ unsigned char *ptr;
+ unsigned char *uptr, *vptr;
+ unsigned char *udst_ptr = x->dst.u_buffer;
+ unsigned char *vdst_ptr = x->dst.v_buffer;
+
+ int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+ int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+ int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
+
+ ptr_base = x->pre.y_buffer;
+ ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
+
+ mv_row = x->block[16].bmi.mv.as_mv.row;
+ mv_col = x->block[16].bmi.mv.as_mv.col;
+ pre_stride >>= 1;
+ offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+ uptr = x->pre.u_buffer + offset;
+ vptr = x->pre.v_buffer + offset;
+
+ if ((mv_row | mv_col) & 7)
+ {
+ x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
+ x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
+ }
+}
void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
{
- /*unsigned char *pred_ptr = x->block[0].predictor;
- unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
- unsigned char *pred_ptr = x->predictor;
unsigned char *dst_ptr = x->dst.y_buffer;
if (x->mode_info_context->mbmi.mode != SPLITMV)
{
- int offset;
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *uptr, *vptr;
- /*unsigned char *pred_ptr = x->predictor;
- unsigned char *upred_ptr = &x->predictor[256];
- unsigned char *vpred_ptr = &x->predictor[320];*/
- unsigned char *udst_ptr = x->dst.u_buffer;
- unsigned char *vdst_ptr = x->dst.v_buffer;
-
- int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
- int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
- int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
-
- ptr_base = x->pre.y_buffer;
- ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
-
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
-
- mv_row = x->block[16].bmi.mv.as_mv.row;
- mv_col = x->block[16].bmi.mv.as_mv.col;
- pre_stride >>= 1;
- offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
- uptr = x->pre.u_buffer + offset;
- vptr = x->pre.v_buffer + offset;
-
- if ((mv_row | mv_col) & 7)
- {
- x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
- x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
- }
+ vp8_build_inter16x16_predictors_mb_s(x);
}
else
{
@@ -599,25 +562,20 @@
{
for (i = 0; i < 4; i++)
{
+ unsigned char *ptr_base;
+ unsigned char *ptr;
BLOCKD *d = &x->block[bbb[i]];
- /*build_inter_predictors4b(x, d, 16);*/
+ ptr_base = *(d->base_pre);
+ ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
+
+ if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
- unsigned char *ptr_base;
- unsigned char *ptr;
- unsigned char *pred_ptr = d->predictor;
-
- ptr_base = *(d->base_pre);
- ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
-
- if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
- {
- x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
- else
- {
- RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
- }
+ x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
+ }
+ else
+ {
+ RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
}
}
@@ -633,7 +591,6 @@
/*build_inter_predictors2b(x, d0, 16);*/
unsigned char *ptr_base;
unsigned char *ptr;
- unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
@@ -665,7 +622,6 @@
/*build_inter_predictors2b(x, d0, 8);*/
unsigned char *ptr_base;
unsigned char *ptr;
- unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 7c1dee4..688bebe 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -14,8 +14,9 @@
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x);
+extern void vp8_build_inter16x16_predictors_mb_s(MACROBLOCKD *x);
-extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x);
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index 44eaf08..b7542b3 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,8 +12,6 @@
#ifndef _PTHREAD_EMULATION
#define _PTHREAD_EMULATION
-#define VPXINFINITE 10000 /* 10second. */
-
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
@@ -28,7 +26,7 @@
#define pthread_t HANDLE
#define pthread_attr_t DWORD
#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
-#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
+#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
#define thread_sleep(nms) Sleep(nms)
#define pthread_cancel(thread) terminate_thread(thread,0)
@@ -62,7 +60,7 @@
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL)
-#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE))
+#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
#define sem_post(sem) SetEvent(*sem)
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
#define thread_sleep(nms) Sleep(nms)
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index edee157..34a7e18 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -32,9 +32,6 @@
mov rdx, arg(1) ; dequant
mov rax, arg(0) ; qcoeff
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
-
movd xmm4, [rax]
movd xmm5, [rdx]
@@ -43,9 +40,12 @@
pmullw xmm4, xmm5
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
+
; clear coeffs
- movd [rax], xmm7
- movd [rax+32], xmm7
+ movd [rax], xmm5
+ movd [rax+32], xmm5
;pshufb
pshuflw xmm4, xmm4, 00000000b
pshufhw xmm4, xmm4, 00000000b
@@ -62,10 +62,10 @@
lea rcx, [3*rcx]
movq xmm3, [rax+rcx]
- punpcklbw xmm0, xmm7
- punpcklbw xmm1, xmm7
- punpcklbw xmm2, xmm7
- punpcklbw xmm3, xmm7
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
mov rax, arg(3) ; dst
movsxd rdx, dword ptr arg(4) ; dst_stride
@@ -77,10 +77,10 @@
paddw xmm3, xmm4
; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
; store blocks back out
movq [rax], xmm0
@@ -102,6 +102,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -347,6 +348,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -377,8 +379,8 @@
mov rdi, arg(3) ; dst
mov rdx, arg(5) ; dc
- ; Zero out xmm7, for use unpacking
- pxor xmm7, xmm7
+ ; Zero out xmm5, for use unpacking
+ pxor xmm5, xmm5
; load up 2 dc words here == 2*16 = doubleword
movd xmm4, [rdx]
@@ -398,10 +400,10 @@
psraw xmm4, 3
; Predict buffer needs to be expanded from bytes to words
- punpcklbw xmm0, xmm7
- punpcklbw xmm1, xmm7
- punpcklbw xmm2, xmm7
- punpcklbw xmm3, xmm7
+ punpcklbw xmm0, xmm5
+ punpcklbw xmm1, xmm5
+ punpcklbw xmm2, xmm5
+ punpcklbw xmm3, xmm5
; Add to predict buffer
paddw xmm0, xmm4
@@ -410,10 +412,10 @@
paddw xmm3, xmm4
; pack up before storing
- packuswb xmm0, xmm7
- packuswb xmm1, xmm7
- packuswb xmm2, xmm7
- packuswb xmm3, xmm7
+ packuswb xmm0, xmm5
+ packuswb xmm1, xmm5
+ packuswb xmm2, xmm5
+ packuswb xmm3, xmm5
; Load destination stride before writing out,
; doesn't need to persist
@@ -441,6 +443,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -692,6 +695,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 83c97df..1da4fd8 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -17,7 +17,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
- SAVE_XMM
+ SAVE_XMM 6
push rsi
push rdi
; end prolog
@@ -41,7 +41,7 @@
movdqa xmm4, xmm0
punpcklqdq xmm0, xmm3 ;d1 a1
punpckhqdq xmm4, xmm3 ;c1 b1
- movd xmm7, eax
+ movd xmm6, eax
movdqa xmm1, xmm4 ;c1 b1
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
@@ -66,7 +66,7 @@
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
movdqa xmm3, xmm4 ;ip[4] ip[0]
- pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03
+ pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
@@ -90,8 +90,8 @@
punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- paddw xmm5, xmm7
- paddw xmm1, xmm7
+ paddw xmm5, xmm6
+ paddw xmm1, xmm6
psraw xmm5, 3
psraw xmm1, 3
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 849133d..c2ce1a1 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -288,7 +288,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -338,7 +338,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -584,7 +584,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -634,7 +634,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1024,7 +1024,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1091,7 +1091,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1249,7 +1249,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1318,7 +1318,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1386,7 +1386,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1503,7 +1503,7 @@
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 30b4bf5..06d51ec 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -26,7 +26,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -256,7 +256,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -456,7 +456,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 4ad3973..67b6420 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -67,7 +67,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
- SAVE_XMM
+ SAVE_XMM 7
push rsi
push rdi
; end prolog
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index b87cad2..83e3b14 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -37,7 +37,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -157,7 +157,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -333,7 +333,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -428,7 +428,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -538,7 +538,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -651,7 +651,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -816,7 +816,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -908,7 +908,6 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
- ;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx
push rsi
push rdi
@@ -948,7 +947,6 @@
pop rdi
pop rsi
RESTORE_GOT
- ;RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -969,7 +967,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1238,7 +1236,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 7f6fd93..1ddbc54 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -39,6 +39,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -107,6 +108,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -162,6 +164,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -179,7 +182,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -194,10 +197,6 @@
mov rdi, arg(2) ;output_ptr
-;;
-;; cmp esi, DWORD PTR [rax]
-;; je vp8_filter_block1d16_h4_ssse3
-
mov rsi, arg(0) ;src_ptr
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
@@ -271,61 +270,7 @@
pop rdi
pop rsi
RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-vp8_filter_block1d16_h4_ssse3:
- movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
- movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
-
- mov rsi, arg(0) ;src_ptr
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rcx, dword ptr arg(4) ;output_height
- movsxd rdx, dword ptr arg(3) ;output_pitch
-
-filter_block1d16_h4_rowloop_ssse3:
- movdqu xmm1, XMMWORD PTR [rsi - 2]
-
- movdqa xmm2, xmm1
- pshufb xmm1, [GLOBAL(shuf2b)]
- pshufb xmm2, [GLOBAL(shuf3b)]
- pmaddubsw xmm1, xmm5
-
- movdqu xmm3, XMMWORD PTR [rsi + 6]
-
- pmaddubsw xmm2, xmm6
- movdqa xmm0, xmm3
- pshufb xmm3, [GLOBAL(shuf3b)]
- pshufb xmm0, [GLOBAL(shuf2b)]
-
- paddsw xmm1, [GLOBAL(rd)]
- paddsw xmm1, xmm2
-
- pmaddubsw xmm0, xmm5
- pmaddubsw xmm3, xmm6
-
- psraw xmm1, 7
- packuswb xmm1, xmm1
- lea rsi, [rsi + rax]
- paddsw xmm3, xmm0
- paddsw xmm3, [GLOBAL(rd)]
- psraw xmm3, 7
- packuswb xmm3, xmm3
-
- punpcklqdq xmm1, xmm3
-
- movdqa XMMWORD Ptr [rdi], xmm1
-
- add rdi, rdx
- dec rcx
- jnz filter_block1d16_h4_rowloop_ssse3
-
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -344,6 +289,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -451,6 +397,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -471,6 +418,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -566,6 +514,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -638,6 +587,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -656,6 +606,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -728,6 +679,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -776,6 +728,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -932,7 +885,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1195,7 +1148,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 82841e8..9598508 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -111,16 +111,15 @@
*/
static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
{
- if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
-
vp8_build_intra_predictors_mbuv_s(xd);
RECON_INVOKE(&pbi->common.rtcd.recon,
build_intra_predictors_mby_s)(xd);
}
else
{
- vp8_build_inter_predictors_mb_s(xd);
+ vp8_build_inter16x16_predictors_mb_s(xd);
}
}
@@ -195,11 +194,10 @@
clamp_mvs(xd);
}
- xd->mode_info_context->mbmi.dc_diff = 1;
-
- if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV);
+ if (!eobtotal)
{
- xd->mode_info_context->mbmi.dc_diff = 0;
skip_recon_mb(pbi, xd);
return;
}
@@ -208,7 +206,7 @@
mb_init_dequantizer(pbi, xd);
/* do prediction */
- if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
vp8_build_intra_predictors_mbuv(xd);
@@ -255,7 +253,7 @@
xd->predictor, xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs, xd->block[24].diff);
}
- else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
+ else if (xd->mode_info_context->mbmi.mode == B_PRED)
{
for (i = 0; i < 16; i++)
{
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
old mode 100755
new mode 100644
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index ef2e00d..1e83ab5 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -76,7 +76,6 @@
pbi->common.current_video_frame = 0;
pbi->ready_for_new_data = 1;
- pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
#if CONFIG_MULTITHREAD
pbi->max_threads = oxcf->max_threads;
vp8_decoder_create_threads(pbi);
@@ -252,7 +251,6 @@
VP8D_COMP *pbi = (VP8D_COMP *) ptr;
VP8_COMMON *cm = &pbi->common;
int retcode = 0;
- struct vpx_usec_timer timer;
/*if(pbi->ready_for_new_data == 0)
return -1;*/
@@ -317,8 +315,6 @@
pbi->common.error.setjmp = 1;
- vpx_usec_timer_start(&timer);
-
/*cm->current_video_frame++;*/
pbi->Source = source;
pbi->source_sz = size;
@@ -379,15 +375,9 @@
if(pbi->common.filter_level)
{
- struct vpx_usec_timer lpftimer;
- vpx_usec_timer_start(&lpftimer);
/* Apply the loop filter if appropriate. */
-
vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
- vpx_usec_timer_mark(&lpftimer);
- pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
-
cm->last_frame_type = cm->frame_type;
cm->last_filter_type = cm->filter_type;
cm->last_sharpness_level = cm->sharpness_level;
@@ -398,11 +388,6 @@
vp8_clear_system_state();
- vpx_usec_timer_mark(&timer);
- pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
-
- pbi->time_decoding += pbi->decode_microseconds;
-
/*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
if (cm->show_frame)
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index ac1e332..9b91756 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -81,12 +81,6 @@
const unsigned char *Source;
unsigned int source_sz;
-
- unsigned int CPUFreq;
- unsigned int decode_microseconds;
- unsigned int time_decoding;
- unsigned int time_loop_filtering;
-
#if CONFIG_MULTITHREAD
/* variable for threading */
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 3d9d428..c92de00 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -108,12 +108,10 @@
clamp_mvs(xd);
}
- xd->mode_info_context->mbmi.dc_diff = 1;
-
- if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+ eobtotal |= (xd->mode_info_context->mbmi.mode == B_PRED ||
+ xd->mode_info_context->mbmi.mode == SPLITMV);
+ if (!eobtotal)
{
- xd->mode_info_context->mbmi.dc_diff = 0;
-
/*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
{
@@ -122,7 +120,7 @@
}
else
{
- vp8_build_inter_predictors_mb_s(xd);
+ vp8_build_inter16x16_predictors_mb_s(xd);
}
return;
}
@@ -322,6 +320,7 @@
if (pbi->common.filter_level)
{
+ int skip_lf;
if( mb_row != pc->mb_rows-1 )
{
/* Save decoded MB last row data for next-row decoding */
@@ -349,6 +348,10 @@
/* update loopfilter info */
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV &&
+ xd->mode_info_context->mbmi.mb_skip_coeff);
+
filter_level = pbi->mt_baseline_filter_level[Segment];
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
@@ -362,14 +365,14 @@
if (mb_col > 0)
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
- if (xd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
/* don't apply across umv border */
if (mb_row > 0)
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
- if (xd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
}
}
@@ -810,6 +813,7 @@
if (pbi->common.filter_level)
{
+ int skip_lf;
/* Save decoded MB last row data for next-row decoding */
if(mb_row != pc->mb_rows-1)
{
@@ -837,6 +841,9 @@
/* update loopfilter info */
Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
+ xd->mode_info_context->mbmi.mode != SPLITMV &&
+ xd->mode_info_context->mbmi.mb_skip_coeff);
filter_level = pbi->mt_baseline_filter_level[Segment];
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
@@ -850,14 +857,14 @@
if (mb_col > 0)
pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
- if (xd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
/* don't apply across umv border */
if (mb_row > 0)
pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
- if (xd->mode_info_context->mbmi.dc_diff > 0)
+ if (!skip_lf)
pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
}
}
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 5a2568d..6de4c85 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -34,7 +34,7 @@
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
short *quant;
short *quant_fast;
- short *quant_shift;
+ unsigned char *quant_shift;
short *zbin;
short *zrun_zbin_boost;
short *round;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index ab4071d..f48a1fc 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -147,7 +147,7 @@
#define EXACT_QUANT
#ifdef EXACT_QUANT
static void vp8cx_invert_quant(int improved_quant, short *quant,
- short *shift, short d)
+ unsigned char *shift, short d)
{
if(improved_quant)
{
@@ -1157,7 +1157,6 @@
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
-#if !(CONFIG_REALTIME_ONLY)
if (cpi->sf.RD && cpi->compressor_speed != 2)
{
vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
@@ -1170,7 +1169,6 @@
rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
}
else
-#endif
{
int rate2, best_distortion;
MB_PREDICTION_MODE mode, best_mode = DC_PRED;
@@ -1188,7 +1186,7 @@
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (Error16x16 > this_rd)
{
@@ -1242,8 +1240,6 @@
else
x->encode_breakout = cpi->oxcf.encode_breakout;
-#if !(CONFIG_REALTIME_ONLY)
-
if (cpi->sf.RD)
{
int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
@@ -1270,7 +1266,6 @@
}
else
-#endif
vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
cpi->prediction_error += distortion;
@@ -1386,7 +1381,7 @@
cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
}
- if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
+ if (!x->skip)
{
vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
@@ -1396,7 +1391,7 @@
}
else
- vp8_stuff_inter16x16(x);
+ vp8_build_inter_predictors_mb_s(xd);
}
if (!x->skip)
@@ -1405,11 +1400,6 @@
{
if (cpi->common.mb_no_coeff_skip)
{
- if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
- xd->mode_info_context->mbmi.dc_diff = 0;
- else
- xd->mode_info_context->mbmi.dc_diff = 1;
-
xd->mode_info_context->mbmi.mb_skip_coeff = 1;
cpi->skip_true_count ++;
vp8_fix_contexts(xd);
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 4400006..7d835a1 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -75,14 +75,9 @@
vp8_quantize_mby(x);
-#if !(CONFIG_REALTIME_ONLY)
-#if 1
if (x->optimize)
vp8_optimize_mby(x, rtcd);
-#endif
-#endif
-
vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
RECON_INVOKE(&rtcd->common->recon, recon_mby)
@@ -126,15 +121,9 @@
vp8_quantize_mbuv(x);
-#if !(CONFIG_REALTIME_ONLY)
-#if 1
-
if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
vp8_optimize_mbuv(x, rtcd);
-#endif
-#endif
-
vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 463dbca..2509e06 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -19,6 +19,7 @@
#include "vp8/common/reconintra.h"
#include "dct.h"
#include "vpx_mem/vpx_mem.h"
+#include "rdopt.h"
#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
@@ -195,42 +196,7 @@
}
-void vp8_stuff_inter16x16(MACROBLOCK *x)
-{
- vp8_build_inter_predictors_mb_s(&x->e_mbd);
- /*
- // recon = copy from predictors to destination
- {
- BLOCKD *b = &x->e_mbd.block[0];
- unsigned char *pred_ptr = b->predictor;
- unsigned char *dst_ptr = *(b->base_dst) + b->dst;
- int stride = b->dst_stride;
- int i;
- for(i=0;i<16;i++)
- vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16);
-
- b = &x->e_mbd.block[16];
- pred_ptr = b->predictor;
- dst_ptr = *(b->base_dst) + b->dst;
- stride = b->dst_stride;
-
- for(i=0;i<8;i++)
- vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
-
- b = &x->e_mbd.block[20];
- pred_ptr = b->predictor;
- dst_ptr = *(b->base_dst) + b->dst;
- stride = b->dst_stride;
-
- for(i=0;i<8;i++)
- vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
- }
- */
-}
-
-#if !(CONFIG_REALTIME_ONLY)
-#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp8_token_state vp8_token_state;
@@ -608,7 +574,6 @@
ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
}
}
-#endif
void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
@@ -620,10 +585,8 @@
vp8_quantize_mb(x);
-#if !(CONFIG_REALTIME_ONLY)
if (x->optimize)
optimize_mb(x, rtcd);
-#endif
vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
@@ -635,7 +598,7 @@
/* this funciton is used by first pass only */
void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
{
- vp8_build_inter_predictors_mby(&x->e_mbd);
+ vp8_build_inter16x16_predictors_mby(&x->e_mbd);
ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 8c93aa1..47fc72d 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -95,8 +95,6 @@
struct VP8_ENCODER_RTCD;
void vp8_encode_inter16x16(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
-extern void vp8_stuff_inter16x16(MACROBLOCK *x);
-
void vp8_build_dcblock(MACROBLOCK *b);
void vp8_transform_mb(MACROBLOCK *mb);
void vp8_transform_mbuv(MACROBLOCK *x);
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 8f24a11..6f33099 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -786,7 +786,8 @@
// TODO: handle the case when duration is set to 0, or something less
// than the full time between subsequent cpi->source_time_stamp s .
- fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
+ fps.duration = cpi->source->ts_end
+ - cpi->source->ts_start;
// don't want to do output stats with a stack variable!
memcpy(cpi->this_frame_stats,
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 1d672be..d48c95b 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -89,9 +89,7 @@
cpi->rtcd.quantize.quantb = vp8_regular_quantize_b;
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sad;
-#endif
cpi->rtcd.search.diamond_search = vp8_diamond_search_sad;
#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
diff --git a/vp8/encoder/lookahead.c b/vp8/encoder/lookahead.c
new file mode 100644
index 0000000..3b86d40
--- /dev/null
+++ b/vp8/encoder/lookahead.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include "vpx_config.h"
+#include "lookahead.h"
+#include "vp8/common/extend.h"
+
+#define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY? 1 : 25)
+
+struct lookahead_ctx
+{
+ unsigned int max_sz; /* Absolute size of the queue */
+ unsigned int sz; /* Number of buffers currently in the queue */
+ unsigned int read_idx; /* Read index */
+ unsigned int write_idx; /* Write index */
+ struct lookahead_entry *buf; /* Buffer list */
+};
+
+
+/* Return the buffer at the given absolute index and increment the index */
+static struct lookahead_entry *
+pop(struct lookahead_ctx *ctx,
+ unsigned int *idx)
+{
+ unsigned int index = *idx;
+ struct lookahead_entry *buf = ctx->buf + index;
+
+ assert(index < ctx->max_sz);
+ if(++index >= ctx->max_sz)
+ index -= ctx->max_sz;
+ *idx = index;
+ return buf;
+}
+
+
+void
+vp8_lookahead_destroy(struct lookahead_ctx *ctx)
+{
+ if(ctx)
+ {
+ if(ctx->buf)
+ {
+ int i;
+
+ for(i = 0; i < ctx->max_sz; i++)
+ vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img);
+ free(ctx->buf);
+ }
+ free(ctx);
+ }
+}
+
+
+struct lookahead_ctx*
+vp8_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int depth)
+{
+ struct lookahead_ctx *ctx = NULL;
+ int i;
+
+ /* Clamp the lookahead queue depth */
+ if(depth < 1)
+ depth = 1;
+ else if(depth > MAX_LAG_BUFFERS)
+ depth = MAX_LAG_BUFFERS;
+
+ /* Align the buffer dimensions */
+ width = (width + 15) & ~15;
+ height = (height + 15) & ~15;
+
+ /* Allocate the lookahead structures */
+ ctx = calloc(1, sizeof(*ctx));
+ if(ctx)
+ {
+ ctx->max_sz = depth;
+ ctx->buf = calloc(depth, sizeof(*ctx->buf));
+ if(!ctx->buf)
+ goto bail;
+ for(i=0; i<depth; i++)
+ if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, width, height, 16))
+ goto bail;
+ }
+ return ctx;
+bail:
+ vp8_lookahead_destroy(ctx);
+ return NULL;
+}
+
+
+int
+vp8_lookahead_push(struct lookahead_ctx *ctx,
+ YV12_BUFFER_CONFIG *src,
+ int64_t ts_start,
+ int64_t ts_end,
+ unsigned int flags)
+{
+ struct lookahead_entry* buf;
+
+ if(ctx->sz + 1 > ctx->max_sz)
+ return 1;
+ ctx->sz++;
+ buf = pop(ctx, &ctx->write_idx);
+ vp8_copy_and_extend_frame(src, &buf->img);
+ buf->ts_start = ts_start;
+ buf->ts_end = ts_end;
+ buf->flags = flags;
+ return 0;
+}
+
+
+struct lookahead_entry*
+vp8_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain)
+{
+ struct lookahead_entry* buf = NULL;
+
+ if(ctx->sz && (drain || ctx->sz == ctx->max_sz))
+ {
+ buf = pop(ctx, &ctx->read_idx);
+ ctx->sz--;
+ }
+ return buf;
+}
+
+
+struct lookahead_entry*
+vp8_lookahead_peek(struct lookahead_ctx *ctx,
+ int index)
+{
+ struct lookahead_entry* buf = NULL;
+
+ assert(index < ctx->max_sz);
+ if(index < ctx->sz)
+ {
+ index += ctx->read_idx;
+ if(index >= ctx->max_sz)
+ index -= ctx->max_sz;
+ buf = ctx->buf + index;
+ }
+ return buf;
+}
+
+
+unsigned int
+vp8_lookahead_depth(struct lookahead_ctx *ctx)
+{
+ return ctx->sz;
+}
diff --git a/vp8/encoder/lookahead.h b/vp8/encoder/lookahead.h
new file mode 100644
index 0000000..a483d7e
--- /dev/null
+++ b/vp8/encoder/lookahead.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef LOOKAHEAD_H
+#define LOOKAHEAD_H
+#include "vpx_scale/yv12config.h"
+#include "vpx/vpx_integer.h"
+
+struct lookahead_entry
+{
+ YV12_BUFFER_CONFIG img;
+ int64_t ts_start;
+ int64_t ts_end;
+ unsigned int flags;
+};
+
+
+struct lookahead_ctx;
+
+/**\brief Initializes the lookahead stage
+ *
+ * The lookahead stage is a queue of frame buffers on which some analysis
+ * may be done when buffers are enqueued.
+ *
+ *
+ */
+struct lookahead_ctx* vp8_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int depth
+ );
+
+
+/**\brief Destroys the lookahead stage
+ *
+ */
+void vp8_lookahead_destroy(struct lookahead_ctx *ctx);
+
+
+/**\brief Enqueue a source buffer
+ *
+ * This function will copy the source image into a new framebuffer with
+ * the expected stride/border.
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] src Pointer to the image to enqueue
+ * \param[in] ts_start Timestamp for the start of this frame
+ * \param[in] ts_end Timestamp for the end of this frame
+ * \param[in] flags Flags set on this frame
+ */
+int
+vp8_lookahead_push(struct lookahead_ctx *ctx,
+ YV12_BUFFER_CONFIG *src,
+ int64_t ts_start,
+ int64_t ts_end,
+ unsigned int flags);
+
+
+/**\brief Get the next source buffer to encode
+ *
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] drain Flag indicating the buffer should be drained
+ * (return a buffer regardless of the current queue depth)
+ *
+ * \retval NULL, if drain set and queue is empty
+ * \retval NULL, if drain not set and queue not of the configured depth
+ *
+ */
+struct lookahead_entry*
+vp8_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain);
+
+
+/**\brief Get a future source buffer to encode
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] index Index of the frame to be returned, 0 == next frame
+ *
+ * \retval NULL, if no buffer exists at the specified index
+ *
+ */
+struct lookahead_entry*
+vp8_lookahead_peek(struct lookahead_ctx *ctx,
+ int index);
+
+
+/**\brief Get the number of frames currently in the lookahead queue
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ */
+unsigned int
+vp8_lookahead_depth(struct lookahead_ctx *ctx);
+
+
+#endif
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 716f514..651890d 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -194,13 +194,13 @@
#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))
//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
{
unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
unsigned char *z = (*(b->base_src) + b->src);
@@ -214,6 +214,7 @@
unsigned int whichdir;
unsigned int halfiters = 4;
unsigned int quarteriters = 4;
+ int thismse;
int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
@@ -225,7 +226,8 @@
bestmv->col <<= 3;
// calculate central point error
- besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+ besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
+ *distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
@@ -314,7 +316,7 @@
#undef CHECK_BETTER
#undef MIN
#undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
{
int bestmse = INT_MAX;
MV startmv;
@@ -325,6 +327,7 @@
int left, right, up, down, diag;
unsigned int sse;
int whichdir ;
+ int thismse;
// Trap uncodable vectors
@@ -332,6 +335,7 @@
{
bestmv->row <<= 3;
bestmv->col <<= 3;
+ *distortion = INT_MAX;
return INT_MAX;
}
@@ -341,51 +345,60 @@
startmv = *bestmv;
// calculate central point error
- bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+ bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
+ *distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.row = startmv.row;
this_mv.col = ((startmv.col - 8) | 4);
- left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col += 8;
- right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
- right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
}
// go up then down and check error
this_mv.col = startmv.col;
this_mv.row = ((startmv.row - 8) | 4);
- up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.row += 8;
- down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
- down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
}
@@ -400,32 +413,34 @@
case 0:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
break;
case 1:
this_mv.col += 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
break;
case 2:
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row += 4;
- diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
+ thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
break;
case 3:
default:
this_mv.col += 4;
this_mv.row += 4;
- diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
+ thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
break;
}
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
// }
@@ -448,30 +463,34 @@
if (startmv.col & 7)
{
this_mv.col = startmv.col - 2;
- left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
}
- left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col += 4;
- right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
}
// go up then down and check error
@@ -480,30 +499,34 @@
if (startmv.row & 7)
{
this_mv.row = startmv.row - 2;
- up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.row = (startmv.row - 8) | 6;
- up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
- up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.row += 4;
- down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
- down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
}
@@ -525,12 +548,12 @@
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+ thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
}
}
else
@@ -540,12 +563,12 @@
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
}
}
@@ -556,12 +579,12 @@
if (startmv.row & 7)
{
this_mv.row -= 2;
- diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.row = (startmv.row - 8) | 6;
- diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+ thismse = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
}
break;
@@ -571,36 +594,36 @@
if (startmv.col & 7)
{
this_mv.col -= 2;
- diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
}
else
{
this_mv.col = (startmv.col - 8) | 6;
- diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+ thismse = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
}
break;
case 3:
this_mv.col += 2;
this_mv.row += 2;
- diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+ thismse = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
break;
}
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
-// }
-
return bestmse;
}
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
{
int bestmse = INT_MAX;
MV startmv;
@@ -610,12 +633,14 @@
unsigned char *z = (*(b->base_src) + b->src);
int left, right, up, down, diag;
unsigned int sse;
+ int thismse;
// Trap uncodable vectors
if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
{
bestmv->row <<= 3;
bestmv->col <<= 3;
+ *distortion = INT_MAX;
return INT_MAX;
}
@@ -625,51 +650,60 @@
startmv = *bestmv;
// calculate central point error
- bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+ bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
+ *distortion = bestmse;
bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
// go left then right and check error
this_mv.row = startmv.row;
this_mv.col = ((startmv.col - 8) | 4);
- left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
- left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
+ left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (left < bestmse)
{
*bestmv = this_mv;
bestmse = left;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col += 8;
- right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
- right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
+ right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (right < bestmse)
{
*bestmv = this_mv;
bestmse = right;
+ *distortion = thismse;
+ *sse1 = sse;
}
// go up then down and check error
this_mv.col = startmv.col;
this_mv.row = ((startmv.row - 8) | 4);
- up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (up < bestmse)
{
*bestmv = this_mv;
bestmse = up;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.row += 8;
- down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
- down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
+ down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (down < bestmse)
{
*bestmv = this_mv;
bestmse = down;
+ *distortion = thismse;
+ *sse1 = sse;
}
// somewhat strangely not doing all the diagonals for half pel is slower than doing them.
@@ -713,44 +747,52 @@
#else
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = (this_mv.row - 8) | 4;
- diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col += 8;
- diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col = (this_mv.col - 8) | 4;
this_mv.row = startmv.row + 4;
- diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
this_mv.col += 8;
- diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
- diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+ thismse = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
+ diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
if (diag < bestmse)
{
*bestmv = this_mv;
bestmse = diag;
+ *distortion = thismse;
+ *sse1 = sse;
}
#endif
@@ -1190,8 +1232,6 @@
+ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
}
-
-#if !(CONFIG_REALTIME_ONLY)
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv)
{
unsigned char *what = (*(b->base_src) + b->src);
@@ -1571,7 +1611,6 @@
else
return INT_MAX;
}
-#endif /* !(CONFIG_REALTIME_ONLY) */
#ifdef ENTROPY_STATS
void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 5efcec2..b14cbcb 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -49,7 +49,7 @@
typedef int (fractional_mv_step_fp)
(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
- int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);
+ int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse);
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index a18447d..4208f1f 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -71,7 +71,7 @@
int vp8_estimate_entropy_savings(VP8_COMP *cpi);
int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
-extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi);
+extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance);
static void set_default_lf_deltas(VP8_COMP *cpi);
@@ -287,16 +287,9 @@
vp8_yv12_de_alloc_frame_buffer(&cpi->last_frame_uf);
vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source);
#if VP8_TEMPORAL_ALT_REF
- vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer);
+ vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer);
#endif
- {
- int i;
-
- for (i = 0; i < MAX_LAG_BUFFERS; i++)
- vp8_yv12_de_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer);
-
- cpi->source_buffer_count = 0;
- }
+ vp8_lookahead_destroy(cpi->lookahead);
vpx_free(cpi->tok);
cpi->tok = 0;
@@ -1252,35 +1245,23 @@
}
static void alloc_raw_frame_buffers(VP8_COMP *cpi)
{
- int i, buffers;
- /* allocate source_buffer to be multiples of 16 */
int width = (cpi->oxcf.Width + 15) & ~15;
+ int height = (cpi->oxcf.Height + 15) & ~15;
- buffers = cpi->oxcf.lag_in_frames;
-
- if (buffers > MAX_LAG_BUFFERS)
- buffers = MAX_LAG_BUFFERS;
-
- if (buffers < 1)
- buffers = 1;
-
- for (i = 0; i < buffers; i++)
- if (vp8_yv12_alloc_frame_buffer(&cpi->src_buffer[i].source_buffer,
- width, cpi->oxcf.Height,
- 16))
- vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate lag buffer");
+ cpi->lookahead = vp8_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height,
+ cpi->oxcf.lag_in_frames);
+ if(!cpi->lookahead)
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate lag buffers");
#if VP8_TEMPORAL_ALT_REF
- if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer.source_buffer,
- width, cpi->oxcf.Height, 16))
+ if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer,
+ width, height, 16))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
#endif
-
- cpi->source_buffer_count = 0;
}
static int vp8_alloc_partition_data(VP8_COMP *cpi)
@@ -1478,10 +1459,6 @@
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
// Initialise the starting buffer levels
- cpi->oxcf.starting_buffer_level =
- rescale(cpi->oxcf.starting_buffer_level,
- cpi->oxcf.target_bandwidth, 1000);
-
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
@@ -1542,7 +1519,6 @@
break;
-#if !(CONFIG_REALTIME_ONLY)
case MODE_GOODQUALITY:
cpi->pass = 0;
cpi->compressor_speed = 1;
@@ -1583,7 +1559,6 @@
cpi->pass = 2;
cpi->compressor_speed = 0;
break;
-#endif
}
if (cpi->pass == 0)
@@ -1656,6 +1631,10 @@
// Convert target bandwidth from Kbit/s to Bit/s
cpi->oxcf.target_bandwidth *= 1000;
+ cpi->oxcf.starting_buffer_level =
+ rescale(cpi->oxcf.starting_buffer_level,
+ cpi->oxcf.target_bandwidth, 1000);
+
// Set or reset optimal and maximum buffer levels.
if (cpi->oxcf.optimal_buffer_level == 0)
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
@@ -1720,9 +1699,6 @@
cm->horiz_scale = cpi->horiz_scale;
cm->vert_scale = cpi->vert_scale ;
- // As per VP8
- cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000;
-
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
if (cpi->oxcf.Sharpness > 7)
cpi->oxcf.Sharpness = 7;
@@ -1752,10 +1728,6 @@
vp8_alloc_compressor_data(cpi);
}
- // Clamp KF frame size to quarter of data rate
- if (cpi->intra_frame_target > cpi->target_bandwidth >> 2)
- cpi->intra_frame_target = cpi->target_bandwidth >> 2;
-
if (cpi->oxcf.fixed_q >= 0)
{
cpi->last_q[0] = cpi->oxcf.fixed_q;
@@ -1774,7 +1746,7 @@
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
// YX Temp
- cpi->last_alt_ref_sei = -1;
+ cpi->alt_ref_source = NULL;
cpi->is_src_frame_alt_ref = 0;
cpi->is_next_src_alt_ref = 0;
@@ -1981,7 +1953,6 @@
cpi->frames_till_gf_update_due = 0;
cpi->key_frame_count = 1;
- cpi->tot_key_frame_bits = 0;
cpi->ni_av_qi = cpi->oxcf.worst_allowed_q;
cpi->ni_tot_qi = 0;
@@ -2007,7 +1978,6 @@
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
- cpi->prior_key_frame_size[i] = cpi->intra_frame_target;
cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
}
@@ -2117,15 +2087,11 @@
cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
-#if !(CONFIG_REALTIME_ONLY)
cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
-#endif
cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search);
cpi->ready_for_new_frame = 1;
- cpi->source_encode_index = 0;
-
// make sure frame 1 is okay
cpi->error_bins[0] = cpi->common.MBs;
@@ -2173,7 +2139,8 @@
if (cpi->pass != 1)
{
FILE *f = fopen("opsnr.stt", "a");
- double time_encoded = (cpi->source_end_time_stamp - cpi->first_time_stamp_ever) / 10000000.000;
+ double time_encoded = (cpi->last_end_time_stamp_seen
+ - cpi->first_time_stamp_ever) / 10000000.000;
double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
double dr = (double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
@@ -2186,7 +2153,7 @@
double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t Time(us)\n");
- fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f %8.0f\n",
+ fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n",
dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim,
total_encode_time);
}
@@ -2628,37 +2595,13 @@
vp8_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer,
tmp_height, hs, hr, vs, vr, 0);
+ vp8_yv12_extend_frame_borders(&cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
#endif
}
- // we may need to copy to a buffer so we can extend the image...
- else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width ||
- cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height)
- {
- //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->rtcd.flags & HAS_NEON)
-#endif
- {
- vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
- }
-#if CONFIG_RUNTIME_CPU_DETECT
- else
-#endif
-#endif
-#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
- {
- vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
- }
-#endif
-
- cpi->Source = &cpi->scaled_source;
- }
-
- vp8_extend_to_multiple_of16(cpi->Source, cm->Width, cm->Height);
-
}
+
+
static void resize_key_frame(VP8_COMP *cpi)
{
#if CONFIG_SPATIAL_RESAMPLING
@@ -2718,16 +2661,17 @@
if (cpi->pass == 2)
vp8_calc_auto_iframe_target_size(cpi);
- // 1 Pass there is no information on which to base size so use bandwidth per second * fixed fraction
else
#endif
- cpi->this_frame_target = cpi->oxcf.target_bandwidth / 2;
-
- // in error resilient mode the first frame is bigger since it likely contains
- // all the static background
- if (cpi->oxcf.error_resilient_mode == 1 || (cpi->compressor_speed == 2))
{
- cpi->this_frame_target *= 3; // 5;
+ /* 1 Pass there is no information on which to base size so use
+ * bandwidth per second * fraction of the initial buffer
+ * level
+ */
+ cpi->this_frame_target = cpi->oxcf.starting_buffer_level / 2;
+
+ if(cpi->this_frame_target > cpi->oxcf.target_bandwidth * 3 / 2)
+ cpi->this_frame_target = cpi->oxcf.target_bandwidth * 3 / 2;
}
// Key frame from VFW/auto-keyframe/first frame
@@ -2761,6 +2705,19 @@
}
}
+ /* Apply limits on keyframe target.
+ *
+ * TODO: move this after consolidating
+ * vp8_calc_iframe_target_size() and vp8_calc_auto_iframe_target_size()
+ */
+ if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_max_intra_bitrate_pct)
+ {
+ unsigned int max_rate = cpi->av_per_frame_bandwidth
+ * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
+
+ if (cpi->this_frame_target > max_rate)
+ cpi->this_frame_target = max_rate;
+ }
return 1;
}
@@ -3571,7 +3528,6 @@
if (Adjustment)
{
int buff_lvl_step;
- int tmp_lvl = cpi->buffer_level;
if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size)
{
@@ -4419,9 +4375,9 @@
vp8_clear_system_state(); //__asm emms;
if (cpi->total_coded_error_left != 0.0)
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
- "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
- "%10.3f %8ld\n",
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+ "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%10.3f %8d\n",
cpi->common.current_video_frame, cpi->this_frame_target,
cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
@@ -4438,9 +4394,9 @@
(double)cpi->bits_left / cpi->total_coded_error_left,
cpi->tot_recode_hits);
else
- fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
- "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
- "%8ld\n",
+ fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d"
+ "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f"
+ "%8d\n",
cpi->common.current_video_frame,
cpi->this_frame_target, cpi->projected_frame_size,
(cpi->projected_frame_size - cpi->this_frame_target),
@@ -4672,17 +4628,17 @@
extern void vp8_push_neon(INT64 *store);
extern void vp8_pop_neon(INT64 *store);
#endif
+
+
int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
{
#if HAVE_ARMV7
INT64 store_reg[8];
#endif
- VP8_COMP *cpi = (VP8_COMP *) ptr;
- VP8_COMMON *cm = &cpi->common;
+ VP8_COMP *cpi = (VP8_COMP *) ptr;
+ VP8_COMMON *cm = &cpi->common;
struct vpx_usec_timer timer;
-
- if (!cpi)
- return -1;
+ int res = 0;
#if HAVE_ARMV7
#if CONFIG_RUNTIME_CPU_DETECT
@@ -4694,75 +4650,10 @@
#endif
vpx_usec_timer_start(&timer);
-
- // no more room for frames;
- if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
- {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->rtcd.flags & HAS_NEON)
-#endif
- {
- vp8_pop_neon(store_reg);
- }
-#endif
- return -1;
- }
-
- //printf("in-cpi->source_buffer_count: %d\n", cpi->source_buffer_count);
-
+ if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
+ frame_flags))
+ res = -1;
cm->clr_type = sd->clrtype;
-
- // make a copy of the frame for use later...
-#if !(CONFIG_REALTIME_ONLY)
-
- if (cpi->oxcf.allow_lag)
- {
- int which_buffer = cpi->source_encode_index - 1;
- SOURCE_SAMPLE *s;
-
- if (which_buffer == -1)
- which_buffer = cpi->oxcf.lag_in_frames - 1;
-
- if (cpi->source_buffer_count < cpi->oxcf.lag_in_frames - 1)
- which_buffer = cpi->source_buffer_count;
-
- s = &cpi->src_buffer[which_buffer];
-
- s->source_time_stamp = time_stamp;
- s->source_end_time_stamp = end_time;
- s->source_frame_flags = frame_flags;
- vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
-
- cpi->source_buffer_count ++;
- }
- else
-#endif
- {
- SOURCE_SAMPLE *s;
- s = &cpi->src_buffer[0];
- s->source_end_time_stamp = end_time;
- s->source_time_stamp = time_stamp;
- s->source_frame_flags = frame_flags;
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
- if (cm->rtcd.flags & HAS_NEON)
-#endif
- {
- vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
- }
-#if CONFIG_RUNTIME_CPU_DETECT
- else
-#endif
-#endif
-#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
- {
- vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
- }
-#endif
- cpi->source_buffer_count = 1;
- }
-
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -4775,8 +4666,10 @@
}
#endif
- return 0;
+ return res;
}
+
+
int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
{
#if HAVE_ARMV7
@@ -4787,6 +4680,7 @@
struct vpx_usec_timer tsctimer;
struct vpx_usec_timer ticktimer;
struct vpx_usec_timer cmptimer;
+ YV12_BUFFER_CONFIG *force_src_buffer = NULL;
if (!cpi)
return -1;
@@ -4802,95 +4696,24 @@
vpx_usec_timer_start(&cmptimer);
-
- // flush variable tells us that even though we have less than 10 frames
- // in our buffer we need to start producing compressed frames.
- // Probably because we are at the end of a file....
- if ((cpi->source_buffer_count == cpi->oxcf.lag_in_frames && cpi->oxcf.lag_in_frames > 0)
- || (!cpi->oxcf.allow_lag && cpi->source_buffer_count > 0)
- || (flush && cpi->source_buffer_count > 0))
- {
-
- SOURCE_SAMPLE *s;
-
- s = &cpi->src_buffer[cpi->source_encode_index];
- cpi->source_time_stamp = s->source_time_stamp;
- cpi->source_end_time_stamp = s->source_end_time_stamp;
+ cpi->source = NULL;
#if !(CONFIG_REALTIME_ONLY)
-
- // Should we code an alternate reference frame
- if (cpi->oxcf.error_resilient_mode == 0 &&
- cpi->oxcf.play_alternate &&
- cpi->source_alt_ref_pending &&
- (cpi->frames_till_gf_update_due < cpi->source_buffer_count) &&
- cpi->oxcf.lag_in_frames != 0)
+ // Should we code an alternate reference frame
+ if (cpi->oxcf.error_resilient_mode == 0 &&
+ cpi->oxcf.play_alternate &&
+ cpi->source_alt_ref_pending)
+ {
+ if ((cpi->source = vp8_lookahead_peek(cpi->lookahead,
+ cpi->frames_till_gf_update_due)))
{
- cpi->last_alt_ref_sei = (cpi->source_encode_index + cpi->frames_till_gf_update_due) % cpi->oxcf.lag_in_frames;
-
-#if VP8_TEMPORAL_ALT_REF
-
+ cpi->alt_ref_source = cpi->source;
if (cpi->oxcf.arnr_max_frames > 0)
{
-#if 0
- // my attempt at a loop that tests the results of strength filter.
- int start_frame = cpi->last_alt_ref_sei - 3;
-
- int i, besti = -1, pastin = cpi->oxcf.arnr_strength;
-
- int besterr;
-
- if (start_frame < 0)
- start_frame += cpi->oxcf.lag_in_frames;
-
- besterr = calc_low_ss_err(&cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer,
- &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
-
- for (i = 0; i < 7; i++)
- {
- int thiserr;
- cpi->oxcf.arnr_strength = i;
- vp8_temporal_filter_prepare_c(cpi);
-
- thiserr = calc_low_ss_err(&cpi->alt_ref_buffer.source_buffer,
- &cpi->src_buffer[start_frame].source_buffer, IF_RTCD(&cpi->rtcd.variance));
-
- if (10 * thiserr < besterr * 8)
- {
- besterr = thiserr;
- besti = i;
- }
- }
-
- if (besti != -1)
- {
- cpi->oxcf.arnr_strength = besti;
- vp8_temporal_filter_prepare_c(cpi);
- s = &cpi->alt_ref_buffer;
-
- // FWG not sure if I need to copy this data for the Alt Ref frame
- s->source_time_stamp = cpi->src_buffer[cpi->last_alt_ref_sei].source_time_stamp;
- s->source_end_time_stamp = cpi->src_buffer[cpi->last_alt_ref_sei].source_end_time_stamp;
- s->source_frame_flags = cpi->src_buffer[cpi->last_alt_ref_sei].source_frame_flags;
- }
- else
- s = &cpi->src_buffer[cpi->last_alt_ref_sei];
-
-#else
- vp8_temporal_filter_prepare_c(cpi);
- s = &cpi->alt_ref_buffer;
-
- // FWG not sure if I need to copy this data for the Alt Ref frame
- s->source_time_stamp = cpi->src_buffer[cpi->last_alt_ref_sei].source_time_stamp;
- s->source_end_time_stamp = cpi->src_buffer[cpi->last_alt_ref_sei].source_end_time_stamp;
- s->source_frame_flags = cpi->src_buffer[cpi->last_alt_ref_sei].source_frame_flags;
-
-#endif
+ vp8_temporal_filter_prepare_c(cpi,
+ cpi->frames_till_gf_update_due);
+ force_src_buffer = &cpi->alt_ref_buffer;
}
- else
-#endif
- s = &cpi->src_buffer[cpi->last_alt_ref_sei];
-
cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
cm->refresh_alt_ref_frame = 1;
cm->refresh_golden_frame = 0;
@@ -4900,40 +4723,33 @@
cpi->is_src_frame_alt_ref = 0;
cpi->is_next_src_alt_ref = 0;
}
- else
+ }
#endif
+
+ if (!cpi->source)
+ {
+ if ((cpi->source = vp8_lookahead_pop(cpi->lookahead, flush)))
{
cm->show_frame = 1;
-#if !(CONFIG_REALTIME_ONLY)
- if (cpi->oxcf.allow_lag)
- {
- if (cpi->source_encode_index == cpi->last_alt_ref_sei)
- {
- cpi->is_src_frame_alt_ref = 1;
- cpi->last_alt_ref_sei = -1;
- }
- else
- cpi->is_src_frame_alt_ref = 0;
+ cpi->is_src_frame_alt_ref = cpi->alt_ref_source
+ && (cpi->source == cpi->alt_ref_source);
- cpi->source_encode_index = (cpi->source_encode_index + 1) % cpi->oxcf.lag_in_frames;
-
- if(cpi->source_encode_index == cpi->last_alt_ref_sei)
- cpi->is_next_src_alt_ref = 1;
- else
- cpi->is_next_src_alt_ref = 0;
- }
-
-#endif
- cpi->source_buffer_count--;
+ cpi->is_next_src_alt_ref = cpi->alt_ref_source
+ && (vp8_lookahead_peek(cpi->lookahead, 0)
+ == cpi->alt_ref_source);
+ if(cpi->is_src_frame_alt_ref)
+ cpi->alt_ref_source = NULL;
}
+ }
- cpi->un_scaled_source = &s->source_buffer;
- cpi->Source = &s->source_buffer;
- cpi->source_frame_flags = s->source_frame_flags;
-
- *time_stamp = cpi->source_time_stamp;
- *time_end = cpi->source_end_time_stamp;
+ if (cpi->source)
+ {
+ cpi->un_scaled_source =
+ cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img;
+ *time_stamp = cpi->source->ts_start;
+ *time_end = cpi->source->ts_end;
+ *frame_flags = cpi->source->flags;
}
else
{
@@ -4959,26 +4775,24 @@
return -1;
}
- *frame_flags = cpi->source_frame_flags;
-
- if (cpi->source_time_stamp < cpi->first_time_stamp_ever)
+ if (cpi->source->ts_start < cpi->first_time_stamp_ever)
{
- cpi->first_time_stamp_ever = cpi->source_time_stamp;
- cpi->last_end_time_stamp_seen = cpi->source_time_stamp;
+ cpi->first_time_stamp_ever = cpi->source->ts_start;
+ cpi->last_end_time_stamp_seen = cpi->source->ts_start;
}
// adjust frame rates based on timestamps given
if (!cm->refresh_alt_ref_frame)
{
- if (cpi->source_time_stamp == cpi->first_time_stamp_ever)
+ if (cpi->source->ts_start == cpi->first_time_stamp_ever)
{
- double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp);
+ double this_fps = 10000000.000 / (cpi->source->ts_end - cpi->source->ts_start);
vp8_new_frame_rate(cpi, this_fps);
}
else
{
- long long nanosecs = cpi->source_end_time_stamp
+ long long nanosecs = cpi->source->ts_end
- cpi->last_end_time_stamp_seen;
if (nanosecs > 0)
@@ -4989,8 +4803,8 @@
}
- cpi->last_time_stamp_seen = cpi->source_time_stamp;
- cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp;
+ cpi->last_time_stamp_seen = cpi->source->ts_start;
+ cpi->last_end_time_stamp_seen = cpi->source->ts_end;
}
if (cpi->compressor_speed == 2)
@@ -5111,7 +4925,6 @@
if (cpi->b_calculate_psnr)
{
- double y, u, v;
double ye,ue,ve;
double frame_psnr;
YV12_BUFFER_CONFIG *orig = cpi->Source;
@@ -5144,7 +4957,7 @@
cpi->total_sq_error += sq_error;
cpi->total += frame_psnr;
{
- double y2, u2, v2, frame_psnr2, frame_ssim2 = 0;
+ double frame_psnr2, frame_ssim2 = 0;
double weight = 0;
vp8_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6, 1, 0, IF_RTCD(&cm->rtcd.postproc));
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 0e53f68..fdd3b99 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -29,6 +29,7 @@
#include "mcomp.h"
#include "temporal_filter.h"
#include "vp8/common/findnearmv.h"
+#include "lookahead.h"
//#define SPEEDSTATS 1
#define MIN_GF_INTERVAL 4
@@ -217,14 +218,6 @@
void *ptr1;
} LPFTHREAD_DATA;
-typedef struct
-{
- INT64 source_time_stamp;
- INT64 source_end_time_stamp;
-
- DECLARE_ALIGNED(16, YV12_BUFFER_CONFIG, source_buffer);
- unsigned int source_frame_flags;
-} SOURCE_SAMPLE;
typedef struct VP8_ENCODER_RTCD
{
@@ -251,17 +244,17 @@
{
DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, Y1quant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, Y2quant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
- DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, UVquant_shift[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
@@ -280,19 +273,17 @@
VP8_CONFIG oxcf;
+ struct lookahead_ctx *lookahead;
+ struct lookahead_entry *source;
+ struct lookahead_entry *alt_ref_source;
+
YV12_BUFFER_CONFIG *Source;
YV12_BUFFER_CONFIG *un_scaled_source;
- INT64 source_time_stamp;
- INT64 source_end_time_stamp;
- unsigned int source_frame_flags;
YV12_BUFFER_CONFIG scaled_source;
- int source_buffer_count; // number of src_buffers in use for lagged encoding
- int source_encode_index; // index of buffer in src_buffer to encode
int source_alt_ref_pending; // frame in src_buffers has been identified to be encoded as an alt ref
int source_alt_ref_active; // an alt ref frame has been encoded and is usable
- int last_alt_ref_sei; // index into src_buffers of frame used as alt reference
int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame
int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame
@@ -301,8 +292,6 @@
int gold_is_alt; // don't do both alt and gold search ( just do gold).
//int refresh_alt_ref_frame;
- SOURCE_SAMPLE src_buffer[MAX_LAG_BUFFERS];
-
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tok;
@@ -396,14 +385,11 @@
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
INT64 key_frame_count;
- INT64 tot_key_frame_bits;
- int prior_key_frame_size[KEY_FRAME_CONTEXT];
int prior_key_frame_distance[KEY_FRAME_CONTEXT];
int per_frame_bandwidth; // Current section per frame bandwidth target
int av_per_frame_bandwidth; // Average frame size target for clip
int min_frame_bandwidth; // Minimum allocation that should be used for any frame
int last_key_frame_size;
- int intra_frame_target;
int inter_frame_target;
double output_frame_rate;
long long last_time_stamp_seen;
@@ -638,7 +624,7 @@
VP8_ENCODER_RTCD rtcd;
#endif
#if VP8_TEMPORAL_ALT_REF
- SOURCE_SAMPLE alt_ref_buffer;
+ YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
int fixed_divide[512];
#endif
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 0759e2d..765ff26 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -50,7 +50,7 @@
extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse)
{
(void) b;
(void) d;
@@ -58,6 +58,8 @@
(void) error_per_bit;
(void) vfp;
(void) mvcost;
+ (void) distortion;
+ (void) sse;
bestmv->row <<= 3;
bestmv->col <<= 3;
return 0;
@@ -194,7 +196,7 @@
rate = mode_costs[mode];
vp8_predict_intra4x4(b, mode, b->predictor);
distortion = get_prediction_error(be, b, &rtcd->variance);
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate, distortion);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd)
{
@@ -212,7 +214,13 @@
}
-int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int *Rate, int *best_dist)
+int vp8_pick_intra4x4mby_modes
+(
+ const VP8_ENCODER_RTCD *rtcd,
+ MACROBLOCK *mb,
+ int *Rate,
+ int *best_dist
+)
{
MACROBLOCKD *const xd = &mb->e_mbd;
int i;
@@ -239,20 +247,18 @@
mic->bmi[i].mode = xd->block[i].bmi.mode = best_mode;
- // Break out case where we have already exceeded best so far value that was bassed in
+ // Break out case where we have already exceeded best so far value
+ // that was passed in
if (distortion > *best_dist)
break;
}
- for (i = 0; i < 16; i++)
- xd->block[i].bmi.mv.as_int = 0;
-
*Rate = cost;
if (i == 16)
{
*best_dist = distortion;
- error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, distortion);
+ error = RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
else
{
@@ -260,6 +266,9 @@
error = INT_MAX;
}
+ for (i = 0; i < 16; i++)
+ xd->block[i].bmi.mv.as_int = 0;
+
return error;
}
@@ -435,7 +444,7 @@
int bestsme;
//int all_rds[MAX_MODES]; // Experimental debug code.
int best_mode_index = 0;
- int sse = INT_MAX;
+ unsigned int sse = INT_MAX;
MV mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -452,6 +461,8 @@
int skip_mode[4] = {0, 0, 0, 0};
+ int have_subp_search = cpi->sf.half_pixel_search; /* In real-time mode, when Speed >= 15, no sub-pixel search. */
+
vpx_memset(mode_mv, 0, sizeof(mode_mv));
vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
vpx_memset(near_mv, 0, sizeof(near_mv));
@@ -632,10 +643,10 @@
switch (this_mode)
{
case B_PRED:
- distortion2 = *returndistortion; // Best so far passed in as breakout value to vp8_pick_intra4x4mby_modes
- vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate, &distortion2);
- rate2 += rate;
- distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
+ // Pass best so far to vp8_pick_intra4x4mby_modes to use as breakout
+ distortion2 = *returndistortion;
+ vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x,
+ &rate, &distortion2);
if (distortion2 == INT_MAX)
{
@@ -643,7 +654,12 @@
}
else
{
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ rate2 += rate;
+ distortion2 = VARIANCE_INVOKE
+ (&cpi->rtcd.variance, get16x16prederror)(
+ x->src.y_buffer, x->src.y_stride,
+ x->e_mbd.predictor, 16, 0x7fffffff);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
{
@@ -667,7 +683,7 @@
(&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd)
{
@@ -781,7 +797,7 @@
}
if (bestsme < INT_MAX)
- cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
+ cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse);
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -811,9 +827,10 @@
x->e_mbd.block[0].bmi.mode = this_mode;
x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
- distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
+ if((this_mode != NEWMV) || !(have_subp_search))
+ distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse);
- this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (cpi->active_map_enabled && x->active_ptr[0] == 0)
{
@@ -921,7 +938,6 @@
best_mbmode.uv_mode = 0;
best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
best_mbmode.partitioning = 0;
- best_mbmode.dc_diff = 0;
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
@@ -932,6 +948,7 @@
}
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ return;
}
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 8fea983..f96fc53 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -14,7 +14,6 @@
#include "vpx_ports/config.h"
#include "vp8/common/onyxc_int.h"
-#define RD_ESTIMATE(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
extern int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *, MACROBLOCK *mb, int *Rate, int *Distortion);
extern void vp8_pick_intra_mbuv_mode(MACROBLOCK *mb);
extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra);
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 803e3a5..86ed267 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -27,7 +27,7 @@
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
short *quant_ptr = b->quant_fast;
- short *quant_shift_ptr = b->quant_shift;
+ unsigned char *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -112,7 +112,7 @@
short *zbin_ptr = b->zbin;
short *round_ptr = b->round;
short *quant_ptr = b->quant;
- short *quant_shift_ptr = b->quant_shift;
+ unsigned char *quant_shift_ptr = b->quant_shift;
short *qcoeff_ptr = d->qcoeff;
short *dqcoeff_ptr = d->dqcoeff;
short *dequant_ptr = d->dequant;
@@ -166,7 +166,7 @@
int sz;
short *coeff_ptr;
short *quant_ptr;
- short *quant_shift_ptr;
+ unsigned char *quant_shift_ptr;
short *qcoeff_ptr;
short *dqcoeff_ptr;
short *dequant_ptr;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 767b271..8e8854a 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1451,86 +1451,85 @@
return (bits_per_mb_at_max_q * cpi->common.MBs) >> BPER_MB_NORMBITS;
}
-void vp8_adjust_key_frame_context(VP8_COMP *cpi)
+
+static int estimate_keyframe_frequency(VP8_COMP *cpi)
{
int i;
- int av_key_frames_per_second;
- // Average key frame frequency and size
- unsigned int total_weight = 0;
- unsigned int av_key_frame_frequency = 0;
- unsigned int av_key_frame_bits = 0;
+ // Average key frame frequency
+ int av_key_frame_frequency = 0;
- unsigned int output_frame_rate = (unsigned int)(100 * cpi->output_frame_rate);
- unsigned int target_bandwidth = (unsigned int)(100 * cpi->target_bandwidth);
-
- // Clear down mmx registers to allow floating point in what follows
- vp8_clear_system_state(); //__asm emms;
-
- // Update the count of total key frame bits
- cpi->tot_key_frame_bits += cpi->projected_frame_size;
-
- // First key frame at start of sequence is a special case. We have no frequency data.
+ /* First key frame at start of sequence is a special case. We have no
+ * frequency data.
+ */
if (cpi->key_frame_count == 1)
{
- av_key_frame_frequency = (int)cpi->output_frame_rate * 2; // Assume a default of 1 kf every 2 seconds
- av_key_frame_bits = cpi->projected_frame_size;
- av_key_frames_per_second = output_frame_rate / av_key_frame_frequency; // Note output_frame_rate not cpi->output_frame_rate
+ /* Assume a default of 1 kf every 2 seconds, or the max kf interval,
+ * whichever is smaller.
+ */
+ int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
+ av_key_frame_frequency = (int)cpi->output_frame_rate * 2;
+
+ if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
+ av_key_frame_frequency = cpi->oxcf.key_freq;
+
+ cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1]
+ = av_key_frame_frequency;
}
else
{
+ unsigned int total_weight = 0;
int last_kf_interval =
(cpi->frames_since_key > 0) ? cpi->frames_since_key : 1;
- // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
+ /* reset keyframe context and calculate weighted average of last
+ * KEY_FRAME_CONTEXT keyframes
+ */
for (i = 0; i < KEY_FRAME_CONTEXT; i++)
{
if (i < KEY_FRAME_CONTEXT - 1)
- {
- cpi->prior_key_frame_size[i] = cpi->prior_key_frame_size[i+1];
- cpi->prior_key_frame_distance[i] = cpi->prior_key_frame_distance[i+1];
- }
+ cpi->prior_key_frame_distance[i]
+ = cpi->prior_key_frame_distance[i+1];
else
- {
- cpi->prior_key_frame_size[i] = cpi->projected_frame_size;
cpi->prior_key_frame_distance[i] = last_kf_interval;
- }
- av_key_frame_bits += prior_key_frame_weight[i] * cpi->prior_key_frame_size[i];
- av_key_frame_frequency += prior_key_frame_weight[i] * cpi->prior_key_frame_distance[i];
- total_weight += prior_key_frame_weight[i];
+ av_key_frame_frequency += prior_key_frame_weight[i]
+ * cpi->prior_key_frame_distance[i];
+ total_weight += prior_key_frame_weight[i];
}
- av_key_frame_bits /= total_weight;
av_key_frame_frequency /= total_weight;
- av_key_frames_per_second = output_frame_rate / av_key_frame_frequency;
}
+ return av_key_frame_frequency;
+}
+
+
+void vp8_adjust_key_frame_context(VP8_COMP *cpi)
+{
+ // Clear down mmx registers to allow floating point in what follows
+ vp8_clear_system_state();
// Do we have any key frame overspend to recover?
- if ((cpi->pass != 2) && (cpi->projected_frame_size > cpi->per_frame_bandwidth))
+ // Two-pass overspend handled elsewhere.
+ if ((cpi->pass != 2)
+ && (cpi->projected_frame_size > cpi->per_frame_bandwidth))
{
- // Update the count of key frame overspend to be recovered in subsequent frames
- // A portion of the KF overspend is treated as gf overspend (and hence recovered more quickly)
- // as the kf is also a gf. Otherwise the few frames following each kf tend to get more bits
- // allocated than those following other gfs.
- cpi->kf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 7 / 8;
- cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 1 / 8;
- if(!av_key_frame_frequency)
- av_key_frame_frequency = 60;
+ int overspend;
- // Work out how much to try and recover per frame.
- // For one pass we estimate the number of frames to spread it over based upon past history.
- // For two pass we know how many frames there will be till the next kf.
- if (cpi->pass == 2)
- {
- if (cpi->frames_to_key > 16)
- cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits / (int)cpi->frames_to_key;
- else
- cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits / 16;
- }
- else
- cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits / (int)av_key_frame_frequency;
+ /* Update the count of key frame overspend to be recovered in
+ * subsequent frames. A portion of the KF overspend is treated as gf
+ * overspend (and hence recovered more quickly) as the kf is also a
+ * gf. Otherwise the few frames following each kf tend to get more
+ * bits allocated than those following other gfs.
+ */
+ overspend = (cpi->projected_frame_size - cpi->per_frame_bandwidth);
+ cpi->kf_overspend_bits += overspend * 7 / 8;
+ cpi->gf_overspend_bits += overspend * 1 / 8;
+
+ /* Work out how much to try and recover per frame. */
+ cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits
+ / estimate_keyframe_frequency(cpi);
}
cpi->frames_since_key = 0;
@@ -1539,6 +1538,7 @@
}
void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit)
+void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit)
{
// Set-up bounds on acceptable frame size:
if (cpi->oxcf.fixed_q >= 0)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 863b6d4..1f329d7 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -31,7 +31,7 @@
#include "vp8/common/g_common.h"
#include "variance.h"
#include "mcomp.h"
-
+#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
#include "dct.h"
#include "vp8/common/systemdependent.h"
@@ -46,13 +46,8 @@
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
-
-#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-
#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
-
-
static const int auto_speed_thresh[17] =
{
1000,
@@ -480,7 +475,6 @@
}
-#if !(CONFIG_REALTIME_ONLY)
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
{
int c = !type; /* start at coef 0, unless Y with Y2 */
@@ -847,7 +841,6 @@
x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
}
-#endif
int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
{
@@ -875,7 +868,6 @@
}
}
-#if !(CONFIG_REALTIME_ONLY)
static int labels2mode(
MACROBLOCK *x,
int const *labelings, int which_label,
@@ -1213,12 +1205,15 @@
if (bestsme < INT_MAX)
{
+ int distortion;
+ unsigned int sse;
+
if (!cpi->common.full_pixel)
cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
- bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost);
+ bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion, &sse);
else
vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
- bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost);
+ bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion, &sse);
}
} /* NEW4X4 */
@@ -1437,7 +1432,6 @@
return bsi.segment_rd;
}
-#endif
static void swap(int *x,int *y)
{
@@ -1725,7 +1719,6 @@
}
}
-#if !(CONFIG_REALTIME_ONLY)
void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra)
{
BLOCK *b = &x->block[0];
@@ -1760,9 +1753,6 @@
//int intermodecost[MAX_MODES];
MB_PREDICTION_MODE uv_intra_mode;
-
- int force_no_skip = 0;
-
MV mvp;
int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
int saddone=0;
@@ -1865,8 +1855,6 @@
int disable_skip = 0;
int other_cost = 0;
- force_no_skip = 0;
-
// Experimental debug code.
// Record of rd values recorded for this MB. -1 indicates not measured
//all_rds[mode_index] = -1;
@@ -2198,8 +2186,11 @@
x->mv_row_max = tmp_row_max;
if (bestsme < INT_MAX)
- // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11
- cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
+ {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
+ }
mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -2230,8 +2221,6 @@
case ZEROMV:
- mv_selected:
-
// Trap vectors that reach beyond the UMV borders
// Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
// because of the lack of break statements in the previous two cases.
@@ -2240,14 +2229,15 @@
continue;
vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
- vp8_build_inter_predictors_mby(&x->e_mbd);
+ vp8_build_inter16x16_predictors_mby(&x->e_mbd);
if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
x->skip = 1;
}
else if (x->encode_breakout)
{
- int sum, sse;
+ int sum;
+ unsigned int sse;
int threshold = (xd->block[0].dequant[1]
* xd->block[0].dequant[1] >>4);
@@ -2256,7 +2246,7 @@
VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
(x->src.y_buffer, x->src.y_stride,
- x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
+ x->e_mbd.predictor, 16, &sse, &sum);
if (sse < threshold)
{
@@ -2280,8 +2270,7 @@
distortion_uv = sse2;
disable_skip = 1;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
- distortion2);
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
break;
}
@@ -2376,7 +2365,6 @@
{
// Note index of best mode so far
best_mode_index = mode_index;
- x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
if (this_mode <= B_PRED)
{
@@ -2473,7 +2461,6 @@
best_mbmode.uv_mode = 0;
best_mbmode.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
best_mbmode.partitioning = 0;
- best_mbmode.dc_diff = 0;
vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
@@ -2484,6 +2471,7 @@
}
x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
+ return;
}
@@ -2507,4 +2495,3 @@
x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
}
-#endif
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index 1d1be11..1d5f9a3 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -11,6 +11,9 @@
#ifndef __INC_RDOPT_H
#define __INC_RDOPT_H
+
+#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+
extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
extern int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_to, int *distortion, int best_rd);
extern int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *returnrate, int *rate_to, int *returndistortion);
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index 64d67c6..c78be37 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -290,8 +290,8 @@
}
}
-const static long long c1 = 426148; // (256^2*(.01*255)^2
-const static long long c2 = 3835331; //(256^2*(.03*255)^2
+const static long long cc1 = 26634; // (64^2*(.01*255)^2
+const static long long cc2 = 239708; // (64^2*(.03*255)^2
static double similarity
(
@@ -303,10 +303,19 @@
int count
)
{
- long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
+ long long ssim_n, ssim_d;
+ long long c1, c2;
- long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
- (count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
+ //scale the constants by number of pixels
+ c1 = (cc1*count*count)>>12;
+ c2 = (cc2*count*count)>>12;
+
+ ssim_n = (2*sum_s*sum_r+ c1)*((long long) 2*count*sum_sxr-
+ (long long) 2*sum_s*sum_r+c2);
+
+ ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
+ ((long long)count*sum_sq_s-(long long)sum_s*sum_s +
+ (long long)count*sum_sq_r-(long long) sum_r*sum_r +c2) ;
return ssim_n * 1.0 / ssim_d;
}
@@ -332,18 +341,33 @@
const vp8_variance_rtcd_vtable_t *rtcd)
{
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- double ssim3;
- long long ssim_n;
- long long ssim_d;
+ long long ssim3;
+ long long ssim_n,ssim_n1,ssim_n2;
+ long long ssim_d,ssim_d1,ssim_d2;
+ long long ssim_t1,ssim_t2;
+ long long c1, c2;
+
+ // normalize by 256/64
+ c1 = cc1*16;
+ c2 = cc2*16;
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
- ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
+ ssim_n1 = (2*sum_s*sum_r+ c1);
- ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
- (256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
+ ssim_n2 =((long long) 2*256*sum_sxr-(long long) 2*sum_s*sum_r+c2);
- ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
- return (long)( 256*ssim3 * ssim3 );
+ ssim_d1 =((long long)sum_s*sum_s +(long long)sum_r*sum_r+c1);
+
+ ssim_d2 = (256 * (long long) sum_sq_s-(long long) sum_s*sum_s +
+ (long long) 256*sum_sq_r-(long long) sum_r*sum_r +c2) ;
+
+ ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
+ ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
+
+ ssim3 = 256 *ssim_t1 * ssim_t2;
+ if(ssim3 <0 )
+ ssim3=0;
+ return (long)( ssim3 );
}
// TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
// such that the window regions overlap block boundaries to penalize blocking
@@ -361,18 +385,20 @@
)
{
int i,j;
-
+ int samples =0;
double ssim_total=0;
- // we can sample points as frequently as we like start with 1 per 8x8
- for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
+ // we can sample points as frequently as we like start with 1 per 4x4
+ for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
{
- for(j=0; j < width; j+=8 )
+ for(j=0; j < width-8; j+=4 )
{
- ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
+ double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2, rtcd);
+ ssim_total += v;
+ samples++;
}
}
- ssim_total /= (width/8 * height /8);
+ ssim_total /= samples;
return ssim_total;
}
@@ -405,4 +431,4 @@
*weight = 1;
return ssimv;
-}
+}
\ No newline at end of file
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index cec9518..b771955 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -208,10 +208,12 @@
// Try sub-pixel MC?
//if (bestsme > error_thresh && bestsme < INT_MAX)
{
+ int distortion;
+ unsigned int sse;
bestsme = cpi->find_fractional_mv_step(x, b, d,
&d->bmi.mv.as_mv, &best_ref_mv1,
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
- mvcost);
+ mvcost, &distortion, &sse);
}
#endif
@@ -357,8 +359,8 @@
}
// Normalize filter output to produce AltRef frame
- dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
- stride = cpi->alt_ref_buffer.source_buffer.y_stride;
+ dst1 = cpi->alt_ref_buffer.y_buffer;
+ stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
for (i = 0,k = 0; i < 16; i++)
{
@@ -377,9 +379,9 @@
byte += stride - 16;
}
- dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
- dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
- stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
+ dst1 = cpi->alt_ref_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.v_buffer;
+ stride = cpi->alt_ref_buffer.uv_stride;
byte = mb_uv_offset;
for (i = 0,k = 256; i < 8; i++)
{
@@ -422,7 +424,8 @@
void vp8_temporal_filter_prepare_c
(
- VP8_COMP *cpi
+ VP8_COMP *cpi,
+ int distance
)
{
int frame = 0;
@@ -441,12 +444,9 @@
int max_frames = cpi->active_arnr_frames;
- num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
-
- if (num_frames_backward < 0)
- num_frames_backward += cpi->oxcf.lag_in_frames;
-
- num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
+ num_frames_backward = distance;
+ num_frames_forward = vp8_lookahead_depth(cpi->lookahead)
+ - (num_frames_backward + 1);
switch (blur_type)
{
@@ -498,8 +498,7 @@
break;
}
- start_frame = (cpi->last_alt_ref_sei
- + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
+ start_frame = distance + frames_to_blur_forward;
#ifdef DEBUGFWG
// DEBUG FWG
@@ -520,12 +519,9 @@
for (frame = 0; frame < frames_to_blur; frame++)
{
int which_buffer = start_frame - frame;
-
- if (which_buffer < 0)
- which_buffer += cpi->oxcf.lag_in_frames;
-
- cpi->frames[frames_to_blur-1-frame]
- = &cpi->src_buffer[which_buffer].source_buffer;
+ struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead,
+ which_buffer);
+ cpi->frames[frames_to_blur-1-frame] = &buf->img;
}
vp8_temporal_filter_iterate_c (
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index e3f423f..1c59238 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -224,28 +224,9 @@
int plane_type;
int b;
- TOKENEXTRA *start = *t;
- TOKENEXTRA *tp = *t;
-
- x->mode_info_context->mbmi.dc_diff = 1;
-
-#if 0
-
- if (x->mbmi.force_no_skip)
- {
- x->mbmi.mb_skip_coeff = 1;
- //reset for next_mb.
- x->mbmi.force_no_skip = 0;
- }
-
-#endif
-
-#if 1
-
x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x);
if (x->mode_info_context->mbmi.mb_skip_coeff)
{
-
cpi->skip_true_count++;
if (!cpi->common.mb_no_coeff_skip)
@@ -255,17 +236,11 @@
vp8_fix_contexts(x);
}
- if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
- x->mode_info_context->mbmi.dc_diff = 0;
- else
- x->mode_info_context->mbmi.dc_diff = 1;
-
-
return;
}
cpi->skip_false_count++;
-#endif
+
#if 0
vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
#endif
@@ -292,42 +267,6 @@
A + vp8_block2above[b],
L + vp8_block2left[b], cpi);
-#if 0
-
- if (cpi->common.mb_no_coeff_skip)
- {
- int skip = 1;
-
- while ((tp != *t) && skip)
- {
- skip = (skip && (tp->Token == DCT_EOB_TOKEN));
- tp ++;
- }
-
- if (skip != x->mbmi.mb_skip_coeff)
- skip += 0;
-
- x->mbmi.mb_skip_coeff = skip;
-
- if (x->mbmi.mb_skip_coeff == 1)
- {
- x->mbmi.dc_diff = 0;
- //redo the coutnts
- vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts));
-
- *t = start;
- cpi->skip_true_count++;
- //skip_true_count++;
- }
- else
- {
-
- cpi->skip_false_count++;
- //skip_false_count++;
- }
- }
-
-#endif
}
@@ -510,13 +449,6 @@
A + vp8_block2above[24], L + vp8_block2left[24], cpi);
plane_type = 0;
-
- if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
- x->mode_info_context->mbmi.dc_diff = 0;
- else
- x->mode_info_context->mbmi.dc_diff = 1;
-
-
for (b = 0; b < 16; b++)
stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
A + vp8_block2above[b],
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 652dd98..3d52a5d 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -33,6 +33,7 @@
%define input rcx
%define output rdx
%define pitch r8
+ SAVE_XMM 7, u
%else
%define input rdi
%define output rsi
@@ -53,6 +54,7 @@
pop rbp
%else
%ifidn __OUTPUT_FORMAT__,x64
+ RESTORE_XMM
%endif
%endif
ret
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index c0f06bb..9946294 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -22,33 +22,33 @@
; end prologue
mov rsi, arg(0) ;coeff_ptr
-
mov rdi, arg(1) ;dcoef_ptr
- movdqa xmm3, [rsi]
- movdqa xmm4, [rdi]
- movdqa xmm5, [rsi+16]
+ movdqa xmm0, [rsi]
+ movdqa xmm1, [rdi]
- movdqa xmm6, [rdi+16]
- psubw xmm3, xmm4
+ movdqa xmm2, [rsi+16]
+ movdqa xmm3, [rdi+16]
- psubw xmm5, xmm6
- pmaddwd xmm3, xmm3
- pmaddwd xmm5, xmm5
+ psubw xmm0, xmm1
+ psubw xmm2, xmm3
- paddd xmm3, xmm5
+ pmaddwd xmm0, xmm0
+ pmaddwd xmm2, xmm2
- pxor xmm7, xmm7
- movdqa xmm0, xmm3
+ paddd xmm0, xmm2
- punpckldq xmm0, xmm7
- punpckhdq xmm3, xmm7
+ pxor xmm5, xmm5
+ movdqa xmm1, xmm0
- paddd xmm0, xmm3
- movdqa xmm3, xmm0
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
psrldq xmm0, 8
- paddd xmm0, xmm3
+ paddd xmm0, xmm1
movq rax, xmm0
@@ -208,53 +208,54 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
+ SAVE_XMM 6
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;coeff_ptr
- pxor xmm7, xmm7
+ pxor xmm6, xmm6
mov rdi, arg(1) ;dcoef_ptr
- pxor xmm2, xmm2
+ pxor xmm4, xmm4
- movd xmm1, dword ptr arg(2) ;dc
- por xmm1, xmm2
+ movd xmm5, dword ptr arg(2) ;dc
+ por xmm5, xmm4
- pcmpeqw xmm1, xmm7
+ pcmpeqw xmm5, xmm6
mov rcx, 16
mberror_loop:
- movdqa xmm3, [rsi]
- movdqa xmm4, [rdi]
+ movdqa xmm0, [rsi]
+ movdqa xmm1, [rdi]
- movdqa xmm5, [rsi+16]
- movdqa xmm6, [rdi+16]
+ movdqa xmm2, [rsi+16]
+ movdqa xmm3, [rdi+16]
- psubw xmm5, xmm6
- pmaddwd xmm5, xmm5
+ psubw xmm2, xmm3
+ pmaddwd xmm2, xmm2
- psubw xmm3, xmm4
- pand xmm3, xmm1
+ psubw xmm0, xmm1
+ pand xmm0, xmm5
- pmaddwd xmm3, xmm3
+ pmaddwd xmm0, xmm0
add rsi, 32
add rdi, 32
sub rcx, 1
- paddd xmm2, xmm5
+ paddd xmm4, xmm2
- paddd xmm2, xmm3
+ paddd xmm4, xmm0
jnz mberror_loop
- movdqa xmm0, xmm2
- punpckldq xmm0, xmm7
+ movdqa xmm0, xmm4
+ punpckldq xmm0, xmm6
- punpckhdq xmm2, xmm7
- paddd xmm0, xmm2
+ punpckhdq xmm4, xmm6
+ paddd xmm0, xmm4
movdqa xmm1, xmm0
psrldq xmm0, 8
@@ -265,6 +266,7 @@
pop rdi
pop rsi
; begin epilog
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -342,7 +344,7 @@
mov rdi, arg(1) ;d_ptr
mov rcx, 16
- pxor xmm7, xmm7
+ pxor xmm3, xmm3
mbuverror_loop:
@@ -352,7 +354,7 @@
psubw xmm1, xmm2
pmaddwd xmm1, xmm1
- paddd xmm7, xmm1
+ paddd xmm3, xmm1
add rsi, 16
add rdi, 16
@@ -361,7 +363,7 @@
jnz mbuverror_loop
pxor xmm0, xmm0
- movdqa xmm1, xmm7
+ movdqa xmm1, xmm3
movdqa xmm2, xmm1
punpckldq xmm1, xmm0
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 39439f0..71efd56 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -17,7 +17,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index 5e40dc7..056b64c 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -20,7 +20,7 @@
sym(vp8_regular_quantize_b_sse2):
push rbp
mov rbp, rsp
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
%if ABI_IS_32BIT
@@ -142,7 +142,7 @@
movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
; downshift by quant_shift[rc]
- movsx ecx, WORD PTR[rax + %1 * 2] ; quant_shift_ptr[rc]
+ movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
sar edi, cl ; also sets Z bit
je rq_zigzag_loop_%1 ; !y
mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
diff --git a/vp8/encoder/x86/quantize_sse4.asm b/vp8/encoder/x86/quantize_sse4.asm
new file mode 100644
index 0000000..258899e
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse4.asm
@@ -0,0 +1,254 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license and patent
+; grant that can be found in the LICENSE file in the root of the source
+; tree. All contributing project authors may be found in the AUTHORS
+; file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+%include "asm_enc_offsets.asm"
+
+
+; void vp8_regular_quantize_b_sse4 | arg
+; (BLOCK *b, | 0
+; BLOCKD *d) | 1
+
+global sym(vp8_regular_quantize_b_sse4)
+sym(vp8_regular_quantize_b_sse4):
+
+%if ABI_IS_32BIT
+ push rbp
+ mov rbp, rsp
+ GET_GOT rbx
+ push rdi
+ push rsi
+
+ ALIGN_STACK 16, rax
+ %define qcoeff 0 ; 32
+ %define stack_size 32
+ sub rsp, stack_size
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 8, u
+ push rdi
+ push rsi
+ %endif
+%endif
+ ; end prolog
+
+%if ABI_IS_32BIT
+ mov rdi, arg(0) ; BLOCK *b
+ mov rsi, arg(1) ; BLOCKD *d
+%else
+ %ifidn __OUTPUT_FORMAT__,x64
+ mov rdi, rcx ; BLOCK *b
+ mov rsi, rdx ; BLOCKD *d
+ %else
+ ;mov rdi, rdi ; BLOCK *b
+ ;mov rsi, rsi ; BLOCKD *d
+ %endif
+%endif
+
+ mov rax, [rdi + vp8_block_coeff]
+ mov rcx, [rdi + vp8_block_zbin]
+ mov rdx, [rdi + vp8_block_round]
+ movd xmm7, [rdi + vp8_block_zbin_extra]
+
+ ; z
+ movdqa xmm0, [rax]
+ movdqa xmm1, [rax + 16]
+
+ ; duplicate zbin_oq_value
+ pshuflw xmm7, xmm7, 0
+ punpcklwd xmm7, xmm7
+
+ movdqa xmm2, xmm0
+ movdqa xmm3, xmm1
+
+ ; sz
+ psraw xmm0, 15
+ psraw xmm1, 15
+
+ ; (z ^ sz)
+ pxor xmm2, xmm0
+ pxor xmm3, xmm1
+
+ ; x = abs(z)
+ psubw xmm2, xmm0
+ psubw xmm3, xmm1
+
+ ; zbin
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; *zbin_ptr + zbin_oq_value
+ paddw xmm4, xmm7
+ paddw xmm5, xmm7
+
+ movdqa xmm6, xmm2
+ movdqa xmm7, xmm3
+
+ ; x - (*zbin_ptr + zbin_oq_value)
+ psubw xmm6, xmm4
+ psubw xmm7, xmm5
+
+ ; round
+ movdqa xmm4, [rdx]
+ movdqa xmm5, [rdx + 16]
+
+ mov rax, [rdi + vp8_block_quant_shift]
+ mov rcx, [rdi + vp8_block_quant]
+ mov rdx, [rdi + vp8_block_zrun_zbin_boost]
+
+ ; x + round
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ ; quant
+ movdqa xmm4, [rcx]
+ movdqa xmm5, [rcx + 16]
+
+ ; y = x * quant_ptr >> 16
+ pmulhw xmm4, xmm2
+ pmulhw xmm5, xmm3
+
+ ; y += x
+ paddw xmm2, xmm4
+ paddw xmm3, xmm5
+
+ pxor xmm4, xmm4
+%if ABI_IS_32BIT
+ movdqa [rsp + qcoeff], xmm4
+ movdqa [rsp + qcoeff + 16], xmm4
+%else
+ pxor xmm8, xmm8
+%endif
+
+ ; quant_shift
+ movdqa xmm5, [rax]
+
+ ; zrun_zbin_boost
+ mov rax, rdx
+
+%macro ZIGZAG_LOOP 5
+ ; x
+ pextrw ecx, %4, %2
+
+ ; if (x >= zbin)
+ sub cx, WORD PTR[rdx] ; x - zbin
+ lea rdx, [rdx + 2] ; zbin_boost_ptr++
+ jl rq_zigzag_loop_%1 ; x < zbin
+
+ pextrw edi, %3, %2 ; y
+
+ ; downshift by quant_shift[rc]
+ pextrb ecx, xmm5, %1 ; quant_shift[rc]
+ sar edi, cl ; also sets Z bit
+ je rq_zigzag_loop_%1 ; !y
+%if ABI_IS_32BIT
+ mov WORD PTR[rsp + qcoeff + %1 *2], di
+%else
+ pinsrw %5, edi, %2 ; qcoeff[rc]
+%endif
+ mov rdx, rax ; reset to b->zrun_zbin_boost
+rq_zigzag_loop_%1:
+%endmacro
+; in vp8_default_zig_zag1d order: see vp8/common/entropy.c
+ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
+ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
+ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
+
+ mov rcx, [rsi + vp8_blockd_dequant]
+ mov rdi, [rsi + vp8_blockd_dqcoeff]
+
+%if ABI_IS_32BIT
+ movdqa xmm4, [rsp + qcoeff]
+ movdqa xmm5, [rsp + qcoeff + 16]
+%else
+ %define xmm5 xmm8
+%endif
+
+ ; y ^ sz
+ pxor xmm4, xmm0
+ pxor xmm5, xmm1
+ ; x = (y ^ sz) - sz
+ psubw xmm4, xmm0
+ psubw xmm5, xmm1
+
+ ; dequant
+ movdqa xmm0, [rcx]
+ movdqa xmm1, [rcx + 16]
+
+ mov rcx, [rsi + vp8_blockd_qcoeff]
+
+ pmullw xmm0, xmm4
+ pmullw xmm1, xmm5
+
+ ; store qcoeff
+ movdqa [rcx], xmm4
+ movdqa [rcx + 16], xmm5
+
+ ; store dqcoeff
+ movdqa [rdi], xmm0
+ movdqa [rdi + 16], xmm1
+
+ ; select the last value (in zig_zag order) for EOB
+ pxor xmm6, xmm6
+ pcmpeqw xmm4, xmm6
+ pcmpeqw xmm5, xmm6
+
+ packsswb xmm4, xmm5
+ pshufb xmm4, [GLOBAL(zig_zag1d)]
+ pmovmskb edx, xmm4
+ xor rdi, rdi
+ mov eax, -1
+ xor dx, ax
+ bsr eax, edx
+ sub edi, edx
+ sar edi, 31
+ add eax, 1
+ and eax, edi
+
+ mov [rsi + vp8_blockd_eob], eax
+
+ ; begin epilog
+%if ABI_IS_32BIT
+ add rsp, stack_size
+ pop rsp
+
+ pop rsi
+ pop rdi
+ RESTORE_GOT
+ pop rbp
+%else
+ %undef xmm5
+ %ifidn __OUTPUT_FORMAT__,x64
+ pop rsi
+ pop rdi
+ RESTORE_XMM
+ %endif
+%endif
+
+ ret
+
+SECTION_RODATA
+align 16
+; vp8/common/entropy.c: vp8_default_zig_zag1d
+zig_zag1d:
+ db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
index f093580..bbe475f 100644
--- a/vp8/encoder/x86/quantize_x86.h
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -51,4 +51,17 @@
#endif /* HAVE_SSSE3 */
+
+#if HAVE_SSE4_1
+extern prototype_quantize_block(vp8_regular_quantize_b_sse4);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp8_quantize_quantb
+#define vp8_quantize_quantb vp8_regular_quantize_b_sse4
+
+#endif /* !CONFIG_RUNTIME_CPU_DETECT */
+
+#endif /* HAVE_SSE4_1 */
+
#endif /* QUANTIZE_X86_H */
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index cc6bc3c..04ee72f 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -21,6 +21,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM 6
push rsi
push rdi
; end prolog
@@ -34,7 +35,7 @@
lea rcx, [rsi+rax*8]
lea rcx, [rcx+rax*8]
- pxor xmm7, xmm7
+ pxor xmm6, xmm6
x16x16sad_wmt_loop:
@@ -52,32 +53,33 @@
punpcklbw xmm1, xmm3
psadbw xmm0, xmm1
- movq xmm6, QWORD PTR [rsi+rax+8]
+ movq xmm2, QWORD PTR [rsi+rax+8]
movq xmm3, QWORD PTR [rdi+rdx+8]
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rdx*2]
- punpcklbw xmm4, xmm6
+ punpcklbw xmm4, xmm2
punpcklbw xmm5, xmm3
psadbw xmm4, xmm5
- paddw xmm7, xmm0
- paddw xmm7, xmm4
+ paddw xmm6, xmm0
+ paddw xmm6, xmm4
cmp rsi, rcx
jne x16x16sad_wmt_loop
- movq xmm0, xmm7
- psrldq xmm7, 8
+ movq xmm0, xmm6
+ psrldq xmm6, 8
- paddw xmm0, xmm7
+ paddw xmm0, xmm6
movq rax, xmm0
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index f0336ab..2dbcc7d 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -33,14 +33,15 @@
movsxd rdx, dword ptr arg(3) ; ref_stride
%else
%ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define ref_ptr r8
%define ref_stride r9
%define end_ptr r10
%define ret_var r11
- %define result_ptr [rsp+8+4*8]
- %define max_err [rsp+8+4*8]
+ %define result_ptr [rsp+xmm_stack_space+8+4*8]
+ %define max_err [rsp+xmm_stack_space+8+4*8]
%else
%define src_ptr rdi
%define src_stride rsi
@@ -72,6 +73,7 @@
pop rbp
%else
%ifidn __OUTPUT_FORMAT__,x64
+ RESTORE_XMM
%endif
%endif
ret
@@ -106,6 +108,7 @@
xchg rbx, rax
%else
%ifidn __OUTPUT_FORMAT__,x64
+ SAVE_XMM 7, u
%define src_ptr rcx
%define src_stride rdx
%define r0_ptr rsi
@@ -113,7 +116,7 @@
%define r2_ptr r11
%define r3_ptr r8
%define ref_stride r9
- %define result_ptr [rsp+16+4*8]
+ %define result_ptr [rsp+xmm_stack_space+16+4*8]
push rsi
LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
@@ -151,6 +154,7 @@
%else
%ifidn __OUTPUT_FORMAT__,x64
pop rsi
+ RESTORE_XMM
%endif
%endif
ret
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 69c5eae..6ecf081 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -157,6 +157,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
push rsi
push rdi
push rcx
@@ -253,6 +254,7 @@
pop rcx
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -268,6 +270,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
+ SAVE_XMM 7
push rsi
push rdi
push rcx
@@ -361,6 +364,7 @@
pop rcx
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm
index c267cdb..d5d267a 100644
--- a/vp8/encoder/x86/ssim_opt.asm
+++ b/vp8/encoder/x86/ssim_opt.asm
@@ -16,12 +16,12 @@
paddusw xmm14, xmm4 ; sum_r
movdqa xmm1, xmm3
pmaddwd xmm1, xmm1
- paddq xmm13, xmm1 ; sum_sq_s
+ paddd xmm13, xmm1 ; sum_sq_s
movdqa xmm2, xmm4
pmaddwd xmm2, xmm2
- paddq xmm12, xmm2 ; sum_sq_r
+ paddd xmm12, xmm2 ; sum_sq_r
pmaddwd xmm3, xmm4
- paddq xmm11, xmm3 ; sum_sxr
+ paddd xmm11, xmm3 ; sum_sxr
%endmacro
; Sum across the register %1 starting with q words
@@ -66,6 +66,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
push rsi
push rdi
; end prolog
@@ -115,19 +116,20 @@
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
- movq [rdi], xmm15;
+ movd [rdi], xmm15;
mov rdi,arg(5)
- movq [rdi], xmm14;
+ movd [rdi], xmm14;
mov rdi,arg(6)
- movq [rdi], xmm13;
+ movd [rdi], xmm13;
mov rdi,arg(7)
- movq [rdi], xmm12;
+ movd [rdi], xmm12;
mov rdi,arg(8)
- movq [rdi], xmm11;
+ movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -154,6 +156,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
push rsi
push rdi
; end prolog
@@ -174,11 +177,8 @@
NextRow2:
;grab source and reference pixels
- movq xmm5, [rsi]
- movq xmm6, [rdi]
-
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
+ movq xmm3, [rsi]
+ movq xmm4, [rdi]
punpcklbw xmm3, xmm0 ; low_s
punpcklbw xmm4, xmm0 ; low_r
@@ -197,19 +197,20 @@
SUM_ACROSS_Q xmm11
mov rdi,arg(4)
- movq [rdi], xmm15;
+ movd [rdi], xmm15;
mov rdi,arg(5)
- movq [rdi], xmm14;
+ movd [rdi], xmm14;
mov rdi,arg(6)
- movq [rdi], xmm13;
+ movd [rdi], xmm13;
mov rdi,arg(7)
- movq [rdi], xmm12;
+ movd [rdi], xmm12;
mov rdi,arg(8)
- movq [rdi], xmm11;
+ movd [rdi], xmm11;
; begin epilog
pop rdi
pop rsi
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 3fb23d0..95888f6 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -77,7 +77,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
index 0127b01..b777ef5 100644
--- a/vp8/encoder/x86/temporal_filter_apply_sse2.asm
+++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm
@@ -26,7 +26,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -164,10 +164,10 @@
movdqa xmm6, [rdi+32]
movdqa xmm7, [rdi+48]
; += modifier
- paddw xmm4, xmm0
- paddw xmm5, xmm2
- paddw xmm6, xmm1
- paddw xmm7, xmm3
+ paddd xmm4, xmm0
+ paddd xmm5, xmm2
+ paddd xmm6, xmm1
+ paddd xmm7, xmm3
; write back
movdqa [rdi], xmm4
movdqa [rdi+16], xmm5
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index c2c30de..5becc73 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -85,6 +85,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
push rbx
push rsi
push rdi
@@ -206,6 +207,7 @@
pop rdi
pop rsi
pop rbx
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -223,6 +225,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -321,6 +324,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -341,6 +345,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -506,6 +511,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -528,7 +534,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -805,6 +811,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -906,6 +913,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -925,7 +933,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1041,6 +1049,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1127,6 +1136,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1146,7 +1156,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1254,6 +1264,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@@ -1338,6 +1349,7 @@
pop rdi
pop rsi
RESTORE_GOT
+ RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@@ -1357,7 +1369,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/encoder/x86/variance_impl_ssse3.asm b/vp8/encoder/x86/variance_impl_ssse3.asm
index 3c0fef9..a582f8d 100644
--- a/vp8/encoder/x86/variance_impl_ssse3.asm
+++ b/vp8/encoder/x86/variance_impl_ssse3.asm
@@ -34,7 +34,7 @@
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
- SAVE_XMM
+ SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 8f2774b..b01319f 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -271,9 +271,7 @@
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3;
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx3;
-#endif
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3;
@@ -314,9 +312,9 @@
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4;
-#if !(CONFIG_REALTIME_ONLY)
cpi->rtcd.search.full_search = vp8_full_search_sadx8;
-#endif
+
+ cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse4;
}
#endif
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 6b13033..db60bfe 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -137,8 +137,8 @@
const vpx_codec_enc_cfg_t *cfg,
const struct vp8_extracfg *vp8_cfg)
{
- RANGE_CHECK(cfg, g_w, 1, 16384);
- RANGE_CHECK(cfg, g_h, 1, 16384);
+ RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */
+ RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
RANGE_CHECK_HI(cfg, g_profile, 3);
@@ -175,16 +175,13 @@
"or kf_max_dist instead.");
RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref);
+ RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
+
#if !(CONFIG_REALTIME_ONLY)
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
- RANGE_CHECK(vp8_cfg, cpu_used, -16, 16);
RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6);
#else
RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
-
- if (!((vp8_cfg->cpu_used >= -16 && vp8_cfg->cpu_used <= -4) || (vp8_cfg->cpu_used >= 4 && vp8_cfg->cpu_used <= 16)))
- ERROR("cpu_used out of range [-16..-4] or [4..16]");
-
RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0);
#endif
@@ -198,8 +195,6 @@
#if !(CONFIG_REALTIME_ONLY)
if (cfg->g_pass == VPX_RC_LAST_PASS)
{
- int mb_r = (cfg->g_h + 15) / 16;
- int mb_c = (cfg->g_w + 15) / 16;
size_t packet_sz = sizeof(FIRSTPASS_STATS);
int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
FIRSTPASS_STATS *stats;
@@ -310,6 +305,7 @@
}
oxcf->target_bandwidth = cfg.rc_target_bitrate;
+ oxcf->rc_max_intra_bitrate_pct = cfg.rc_max_intra_bitrate_pct;
oxcf->best_allowed_q = cfg.rc_min_quantizer;
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
@@ -1090,7 +1086,7 @@
{0}, /* rc_twopass_stats_in */
#endif
256, /* rc_target_bandwidth */
-
+ 0, /* rc_max_intra_bitrate_pct */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
100, /* rc_undershoot_pct */
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 8037f9a..c178371 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -52,6 +52,8 @@
VP8_CX_SRCS-yes += encoder/encodemb.h
VP8_CX_SRCS-yes += encoder/encodemv.h
VP8_CX_SRCS-yes += encoder/firstpass.h
+VP8_CX_SRCS-yes += encoder/lookahead.c
+VP8_CX_SRCS-yes += encoder/lookahead.h
VP8_CX_SRCS-yes += encoder/mcomp.h
VP8_CX_SRCS-yes += encoder/modecosts.h
VP8_CX_SRCS-yes += encoder/onyx_int.h
@@ -115,6 +117,7 @@
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/variance_impl_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
+VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt.asm
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 5ff6bdc..6fd161b 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -138,7 +138,7 @@
* onyx_if.c:vp8_set_speed_features().
* \todo List highlights of the changes at various levels.
*
- * \note Valid range: -16..16 or {-16..-4, 4..16} w/CONFIG_REALTIME_ONLY
+ * \note Valid range: -16..16
*/
VP8E_SET_CPUUSED = 13,
VP8E_SET_ENABLEAUTOALTREF, /**< control function to enable vp8 to automatic set and use altref frame */
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 3760241..6dbce0d 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -398,6 +398,21 @@
unsigned int rc_target_bitrate;
+ /*!\brief Max data rate for Intra frames
+ *
+ * This value controls additional clamping on the maximum size of a
+ * keyframe. It is expressed as a percentage of the average
+ * per-frame bitrate, with the special (and default) value 0 meaning
+ * unlimited, or no additional clamping beyond the codec's built-in
+ * algorithm.
+ *
+ * For example, to allocate no more than 4.5 frames worth of bitrate
+ * to a keyframe, set this to 450.
+ *
+ */
+ unsigned int rc_max_intra_bitrate_pct;
+
+
/*
* quantizer settings
*/
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 190c864..b0130fb 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -151,8 +151,8 @@
__asm__ __volatile__ ("pause \n\t")
#else
#if ARCH_X86_64
-/* No pause intrinsic for windows x64 */
-#define x86_pause_hint()
+#define x86_pause_hint()\
+ _mm_pause();
#else
#define x86_pause_hint()\
__asm pause
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index be64cd7..37a3205 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -255,21 +255,48 @@
%define UNSHADOW_ARGS mov rsp, rbp
%endif
-; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
-; rsp register has to be aligned
+; Win64 ABI requires that XMM6:XMM15 are callee saved
+; SAVE_XMM n, [u]
+; store registers 6-n on the stack
+; if u is specified, use unaligned movs.
+; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return
+; value. Typically we follow this up with 'push rbp' - re-aligning the stack -
+; but in some cases this is not done and unaligned movs must be used.
%ifidn __OUTPUT_FORMAT__,x64
-%macro SAVE_XMM 0
- sub rsp, 32
- movdqa XMMWORD PTR [rsp], xmm6
- movdqa XMMWORD PTR [rsp+16], xmm7
+%macro SAVE_XMM 1-2 a
+ %if %1 < 6
+ %error Only xmm registers 6-15 must be preserved
+ %else
+ %assign last_xmm %1
+ %define movxmm movdq %+ %2
+ %assign xmm_stack_space ((last_xmm - 5) * 16)
+ sub rsp, xmm_stack_space
+ %assign i 6
+ %rep (last_xmm - 5)
+ movxmm [rsp + ((i - 6) * 16)], xmm %+ i
+ %assign i i+1
+ %endrep
+ %endif
%endmacro
%macro RESTORE_XMM 0
- movdqa xmm6, XMMWORD PTR [rsp]
- movdqa xmm7, XMMWORD PTR [rsp+16]
- add rsp, 32
+ %ifndef last_xmm
+ %error RESTORE_XMM must be paired with SAVE_XMM n
+ %else
+ %assign i last_xmm
+ %rep (last_xmm - 5)
+ movxmm xmm %+ i, [rsp +((i - 6) * 16)]
+ %assign i i-1
+ %endrep
+ add rsp, xmm_stack_space
+ ; there are a couple functions which return from multiple places.
+ ; otherwise, we could uncomment these:
+ ; %undef last_xmm
+ ; %undef xmm_stack_space
+ ; %undef movxmm
+ %endif
%endmacro
%else
-%macro SAVE_XMM 0
+%macro SAVE_XMM 1-2
%endmacro
%macro RESTORE_XMM 0
%endmacro
diff --git a/vpxenc.c b/vpxenc.c
old mode 100755
new mode 100644
index 39256b6..6ea83fb
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -256,6 +256,16 @@
return stats->buf;
}
+/* Stereo 3D packed frame format */
+typedef enum stereo_format
+{
+ STEREO_FORMAT_MONO = 0,
+ STEREO_FORMAT_LEFT_RIGHT = 1,
+ STEREO_FORMAT_BOTTOM_TOP = 2,
+ STEREO_FORMAT_TOP_BOTTOM = 3,
+ STEREO_FORMAT_RIGHT_LEFT = 11
+} stereo_format_t;
+
enum video_file_type
{
FILE_TYPE_RAW,
@@ -606,7 +616,8 @@
static void
write_webm_file_header(EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,
- const struct vpx_rational *fps)
+ const struct vpx_rational *fps,
+ stereo_format_t stereo_fmt)
{
{
EbmlLoc start;
@@ -650,6 +661,7 @@
Ebml_StartSubElement(glob, &videoStart, Video);
Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+ Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt);
Ebml_SerializeFloat(glob, FrameRate, frameRate);
Ebml_EndSubElement(glob, &videoStart); //Video
}
@@ -916,6 +928,16 @@
"Frame width");
static const arg_def_t height = ARG_DEF("h", "height", 1,
"Frame height");
+static const struct arg_enum_list stereo_mode_enum[] = {
+ {"mono" , STEREO_FORMAT_MONO},
+ {"left-right", STEREO_FORMAT_LEFT_RIGHT},
+ {"bottom-top", STEREO_FORMAT_BOTTOM_TOP},
+ {"top-bottom", STEREO_FORMAT_TOP_BOTTOM},
+ {"right-left", STEREO_FORMAT_RIGHT_LEFT},
+ {NULL, 0}
+};
+static const arg_def_t stereo_mode = ARG_DEF_ENUM(NULL, "stereo-mode", 1,
+ "Stereo 3D video format", stereo_mode_enum);
static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1,
"Stream timebase (frame duration)");
static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1,
@@ -926,7 +948,7 @@
static const arg_def_t *global_args[] =
{
&use_yv12, &use_i420, &usage, &threads, &profile,
- &width, &height, &timebase, &framerate, &error_resilient,
+ &width, &height, &stereo_mode, &timebase, &framerate, &error_resilient,
&lag_in_frames, NULL
};
@@ -962,11 +984,14 @@
"Client initial buffer size (ms)");
static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1,
"Client optimal buffer size (ms)");
+static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1,
+ "Max I-frame bitrate (pct)");
static const arg_def_t *rc_args[] =
{
&dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh,
&end_usage, &target_bitrate, &min_quantizer, &max_quantizer,
&undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz,
+ &max_intra_rate_pct,
NULL
};
@@ -1084,7 +1109,6 @@
#define ARG_CTRL_CNT_MAX 10
-
int main(int argc, const char **argv_)
{
vpx_codec_ctx_t encoder;
@@ -1120,6 +1144,7 @@
uint64_t psnr_samples_total = 0;
double psnr_totals[4] = {0, 0, 0, 0};
int psnr_count = 0;
+ stereo_format_t stereo_fmt = STEREO_FORMAT_MONO;
exec_name = argv_[0];
ebml.last_pts_ms = -1;
@@ -1259,6 +1284,8 @@
cfg.g_w = arg_parse_uint(&arg);
else if (arg_match(&arg, &height, argi))
cfg.g_h = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &stereo_mode, argi))
+ stereo_fmt = arg_parse_enum_or_int(&arg);
else if (arg_match(&arg, &timebase, argi))
cfg.g_timebase = arg_parse_rational(&arg);
else if (arg_match(&arg, &error_resilient, argi))
@@ -1279,6 +1306,8 @@
cfg.rc_end_usage = arg_parse_enum_or_int(&arg);
else if (arg_match(&arg, &target_bitrate, argi))
cfg.rc_target_bitrate = arg_parse_uint(&arg);
+ else if (arg_match(&arg, &max_intra_rate_pct, argi))
+ cfg.rc_max_intra_bitrate_pct = arg_parse_uint(&arg);
else if (arg_match(&arg, &min_quantizer, argi))
cfg.rc_min_quantizer = arg_parse_uint(&arg);
else if (arg_match(&arg, &max_quantizer, argi))
@@ -1557,7 +1586,7 @@
if(write_webm)
{
ebml.stream = outfile;
- write_webm_file_header(&ebml, &cfg, &arg_framerate);
+ write_webm_file_header(&ebml, &cfg, &arg_framerate, stereo_fmt);
}
else
write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);