Merge "Add static syntax to total_adj_strong_thresh"
diff --git a/test/consistency_test.cc b/test/consistency_test.cc
index 6f5f452..9c2fd55 100644
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -23,11 +23,11 @@
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
#include "vpx_mem/vpx_mem.h"
extern "C"
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
uint8_t *img2, int img2_pitch,
int width, int height,
Ssimv *sv2, Metrics *m,
@@ -144,7 +144,7 @@
double CheckConsistency(int frame) {
EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
return
- vp9_get_ssim_metrics(source_data_[frame], source_stride_,
+ vpx_get_ssim_metrics(source_data_[frame], source_stride_,
reference_data_[frame], reference_stride_,
width_, height_, ssim_array_, &metrics_, 1);
}
diff --git a/vp8/common/rtcd_defs.pl b/vp8/common/rtcd_defs.pl
index d65ca60..7924ae7 100644
--- a/vp8/common/rtcd_defs.pl
+++ b/vp8/common/rtcd_defs.pl
@@ -255,19 +255,6 @@
}
#
-# Structured Similarity (SSIM)
-#
-if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- $opts{arch} eq "x86_64" and $sse2_on_x86_64 = "sse2";
-
- add_proto qw/void vp8_ssim_parms_8x8/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
- specialize qw/vp8_ssim_parms_8x8/, "$sse2_on_x86_64";
-
- add_proto qw/void vp8_ssim_parms_16x16/, "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
- specialize qw/vp8_ssim_parms_16x16/, "$sse2_on_x86_64";
-}
-
-#
# Forward DCT
#
add_proto qw/void vp8_short_fdct4x4/, "short *input, short *output, int pitch";
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index c2a7ac4..d2fb05a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -74,26 +74,7 @@
#if CONFIG_INTERNAL_STATS
#include "math.h"
-
-extern double vp8_calc_ssim
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- int lumamask,
- double *weight
-);
-
-
-extern double vp8_calc_ssimg
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- double *ssim_y,
- double *ssim_u,
- double *ssim_v
-);
-
-
+#include "vpx_dsp/ssim.h"
#endif
@@ -5741,8 +5722,8 @@
cpi->total_sq_error2 += sq_error2;
cpi->totalp += frame_psnr2;
- frame_ssim2 = vp8_calc_ssim(cpi->Source,
- &cm->post_proc_buffer, 1, &weight);
+ frame_ssim2 = vpx_calc_ssim(cpi->Source,
+ &cm->post_proc_buffer, &weight);
cpi->summed_quality += frame_ssim2 * weight;
cpi->summed_weights += weight;
@@ -5772,7 +5753,7 @@
if (cpi->b_calculate_ssimg)
{
double y, u, v, frame_all;
- frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show,
+ frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show,
&y, &u, &v);
if (cpi->oxcf.number_of_layers > 1)
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
deleted file mode 100644
index e751608..0000000
--- a/vp8/encoder/ssim.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "onyx_int.h"
-
-void vp8_ssim_parms_16x16_c
-(
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
-)
-{
- int i,j;
- for(i=0;i<16;i++,s+=sp,r+=rp)
- {
- for(j=0;j<16;j++)
- {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-void vp8_ssim_parms_8x8_c
-(
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
-)
-{
- int i,j;
- for(i=0;i<8;i++,s+=sp,r+=rp)
- {
- for(j=0;j<8;j++)
- {
- *sum_s += s[j];
- *sum_r += r[j];
- *sum_sq_s += s[j] * s[j];
- *sum_sq_r += r[j] * r[j];
- *sum_sxr += s[j] * r[j];
- }
- }
-}
-
-const static int64_t cc1 = 26634; // (64^2*(.01*255)^2
-const static int64_t cc2 = 239708; // (64^2*(.03*255)^2
-
-static double similarity
-(
- unsigned long sum_s,
- unsigned long sum_r,
- unsigned long sum_sq_s,
- unsigned long sum_sq_r,
- unsigned long sum_sxr,
- int count
-)
-{
- int64_t ssim_n, ssim_d;
- int64_t c1, c2;
-
- //scale the constants by number of pixels
- c1 = (cc1*count*count)>>12;
- c2 = (cc2*count*count)>>12;
-
- ssim_n = (2*sum_s*sum_r+ c1)*((int64_t) 2*count*sum_sxr-
- (int64_t) 2*sum_s*sum_r+c2);
-
- ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
- ((int64_t)count*sum_sq_s-(int64_t)sum_s*sum_s +
- (int64_t)count*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;
-
- return ssim_n * 1.0 / ssim_d;
-}
-
-static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp)
-{
- unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
- return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
-}
-static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp)
-{
- unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
- return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
-}
-
-// TODO: (jbb) tried to scale this function such that we may be able to use it
-// for distortion metric in mode selection code ( provided we do a reconstruction)
-long dssim(unsigned char *s,int sp, unsigned char *r,int rp)
-{
- unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
- int64_t ssim3;
- int64_t ssim_n1,ssim_n2;
- int64_t ssim_d1,ssim_d2;
- int64_t ssim_t1,ssim_t2;
- int64_t c1, c2;
-
- // normalize by 256/64
- c1 = cc1*16;
- c2 = cc2*16;
-
- vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
- ssim_n1 = (2*sum_s*sum_r+ c1);
-
- ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
-
- ssim_d1 =((int64_t)sum_s*sum_s +(int64_t)sum_r*sum_r+c1);
-
- ssim_d2 = (256 * (int64_t) sum_sq_s-(int64_t) sum_s*sum_s +
- (int64_t) 256*sum_sq_r-(int64_t) sum_r*sum_r +c2) ;
-
- ssim_t1 = 256 - 256 * ssim_n1 / ssim_d1;
- ssim_t2 = 256 - 256 * ssim_n2 / ssim_d2;
-
- ssim3 = 256 *ssim_t1 * ssim_t2;
- if(ssim3 <0 )
- ssim3=0;
- return (long)( ssim3 );
-}
-
-// We are using a 8x8 moving window with starting location of each 8x8 window
-// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
-// block boundaries to penalize blocking artifacts.
-double vp8_ssim2
-(
- unsigned char *img1,
- unsigned char *img2,
- int stride_img1,
- int stride_img2,
- int width,
- int height
-)
-{
- int i,j;
- int samples =0;
- double ssim_total=0;
-
- // sample point start with each 4x4 location
- for(i=0; i < height-8; i+=4, img1 += stride_img1*4, img2 += stride_img2*4)
- {
- for(j=0; j < width-8; j+=4 )
- {
- double v = ssim_8x8(img1+j, stride_img1, img2+j, stride_img2);
- ssim_total += v;
- samples++;
- }
- }
- ssim_total /= samples;
- return ssim_total;
-}
-double vp8_calc_ssim
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- int lumamask,
- double *weight
-)
-{
- double a, b, c;
- double ssimv;
-
- a = vp8_ssim2(source->y_buffer, dest->y_buffer,
- source->y_stride, dest->y_stride, source->y_width,
- source->y_height);
-
- b = vp8_ssim2(source->u_buffer, dest->u_buffer,
- source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height);
-
- c = vp8_ssim2(source->v_buffer, dest->v_buffer,
- source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height);
-
- ssimv = a * .8 + .1 * (b + c);
-
- *weight = 1;
-
- return ssimv;
-}
-
-double vp8_calc_ssimg
-(
- YV12_BUFFER_CONFIG *source,
- YV12_BUFFER_CONFIG *dest,
- double *ssim_y,
- double *ssim_u,
- double *ssim_v
-)
-{
- double ssim_all = 0;
- double a, b, c;
-
- a = vp8_ssim2(source->y_buffer, dest->y_buffer,
- source->y_stride, dest->y_stride, source->y_width,
- source->y_height);
-
- b = vp8_ssim2(source->u_buffer, dest->u_buffer,
- source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height);
-
- c = vp8_ssim2(source->v_buffer, dest->v_buffer,
- source->uv_stride, dest->uv_stride, source->uv_width,
- source->uv_height);
- *ssim_y = a;
- *ssim_u = b;
- *ssim_v = c;
- ssim_all = (a * 4 + b + c) /6;
-
- return ssim_all;
-}
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 59f9fe8..ea7d472 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -65,7 +65,6 @@
VP8_CX_SRCS-yes += encoder/rdopt.c
VP8_CX_SRCS-yes += encoder/segmentation.c
VP8_CX_SRCS-yes += encoder/segmentation.h
-VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/ssim.c
VP8_CX_SRCS-yes += encoder/tokenize.c
VP8_CX_SRCS-yes += encoder/dct_value_cost.h
VP8_CX_SRCS-yes += encoder/dct_value_tokens.h
@@ -97,7 +96,6 @@
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
-VP8_CX_SRCS-$(ARCH_X86_64) += encoder/x86/ssim_opt_x86_64.asm
ifeq ($(CONFIG_REALTIME_ONLY),yes)
VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index f029bbe..737fc56 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -261,17 +261,6 @@
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
}
-#
-# Structured Similarity (SSIM)
-#
-if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
- specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";
-
- add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
- specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
-}
-
# fdct functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
@@ -330,14 +319,6 @@
add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_fp_32x32/;
- #
- # Structured Similarity (SSIM)
- #
- if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
- add_proto qw/void vp9_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
- specialize qw/vp9_highbd_ssim_parms_8x8/;
- }
-
# fdct functions
add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_highbd_fht4x4/;
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index fc77762..e4178b2 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -33,16 +33,6 @@
#define vp9_clear_system_state()
#endif
-#if defined(_MSC_VER) && _MSC_VER < 1800
-// round is not defined in MSVC before VS2013.
-static INLINE int round(double x) {
- if (x < 0)
- return (int)ceil(x - 0.5);
- else
- return (int)floor(x + 0.5);
-}
-#endif
-
// use GNU builtins where available.
#if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index b619063..a13f0c0 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -459,7 +459,10 @@
cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
// periods of the refresh cycle, after a key frame.
- if (rc->frames_since_key < 4 * cr->percent_refresh)
+ // Account for larger interval on base layer for temporal layers.
+ if (cr->percent_refresh > 0 &&
+ rc->frames_since_key < (4 * cpi->svc.number_temporal_layers) *
+ (100 / cr->percent_refresh))
cr->rate_ratio_qdelta = 3.0;
else
cr->rate_ratio_qdelta = 2.0;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7184801..b4e07a6 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -18,6 +18,9 @@
#include "./vpx_scale_rtcd.h"
#include "vpx/internal/vpx_psnr.h"
#include "vpx_dsp/vpx_filter.h"
+#if CONFIG_INTERNAL_STATS
+#include "vpx_dsp/ssim.h"
+#endif
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
@@ -51,9 +54,6 @@
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_skin_detection.h"
#include "vp9/encoder/vp9_speed_features.h"
-#if CONFIG_INTERNAL_STATS
-#include "vp9/encoder/vp9_ssim.h"
-#endif
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_temporal_filter.h"
@@ -4416,13 +4416,13 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- frame_ssim2 = vp9_highbd_calc_ssim(orig, recon, &weight,
+ frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight,
(int)cm->bit_depth);
} else {
- frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
}
#else
- frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2);
@@ -4431,13 +4431,13 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- frame_ssim2 = vp9_highbd_calc_ssim(
+ frame_ssim2 = vpx_highbd_calc_ssim(
orig, &cm->post_proc_buffer, &weight, (int)cm->bit_depth);
} else {
- frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
+ frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
}
#else
- frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
+ frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->summedp_quality += frame_ssim2 * weight;
@@ -4472,7 +4472,7 @@
if (!cm->use_highbitdepth)
#endif
{
- double this_inconsistency = vp9_get_ssim_metrics(
+ double this_inconsistency = vpx_get_ssim_metrics(
cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
@@ -4492,14 +4492,14 @@
double y, u, v, frame_all;
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
- frame_all = vp9_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
+ frame_all = vpx_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,
&u, &v, (int)cm->bit_depth);
} else {
- frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
+ frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,
&v);
}
#else
- frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
+ frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
#endif // CONFIG_VP9_HIGHBITDEPTH
adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
}
@@ -4508,7 +4508,7 @@
#endif
{
double y, u, v, frame_all;
- frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
+ frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
&v);
adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
/* TODO(JBB): add 10/12 bit support */
@@ -4518,7 +4518,7 @@
#endif
{
double y, u, v, frame_all;
- frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
+ frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
}
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 4d4da92..78d55e1 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -16,6 +16,10 @@
#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
+#if CONFIG_INTERNAL_STATS
+#include "vpx_dsp/ssim.h"
+#endif
+#include "vpx_dsp/variance.h"
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -34,13 +38,9 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
-#if CONFIG_INTERNAL_STATS
-#include "vp9/encoder/vp9_ssim.h"
-#endif
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_tokenize.h"
-#include "vpx_dsp/variance.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
#include "vp9/encoder/vp9_denoiser.h"
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 5d3bc5d..aa3e51c 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -256,6 +256,27 @@
} \
}
+// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
+// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
+// later in the same way.
+#define SECOND_LEVEL_CHECKS_BEST \
+ { \
+ unsigned int second; \
+ int br0 = br; \
+ int bc0 = bc; \
+ assert(tr == br || tc == bc); \
+ if (tr == br && tc != bc) { \
+ kc = bc - tc; \
+ } else if (tr != br && tc == bc) { \
+ kr = br - tr; \
+ } \
+ CHECK_BETTER(second, br0 + kr, bc0); \
+ CHECK_BETTER(second, br0, bc0 + kc); \
+ if (br0 != br || bc0 != bc) { \
+ CHECK_BETTER(second, br0 + kr, bc0 + kc); \
+ } \
+ }
+
#define SETUP_SUBPEL_SEARCH \
const uint8_t *const z = x->plane[0].src.buf; \
const int src_stride = x->plane[0].src.stride; \
@@ -636,7 +657,6 @@
const MACROBLOCKD *xd = &x->e_mbd;
unsigned int besterr = INT_MAX;
unsigned int sse;
- unsigned int whichdir = 0;
int thismse;
const int y_stride = xd->plane[0].pre[0].stride;
const int offset = bestmv->row * y_stride + bestmv->col;
@@ -657,6 +677,7 @@
const MV *search_step = search_step_table;
int idx, best_idx = -1;
unsigned int cost_array[5];
+ int kr, kc;
if (!(allow_hp && vp9_use_mv_hp(ref_mv)))
if (round == 3)
@@ -703,8 +724,11 @@
}
// Check diagonal sub-pixel position
- tc = bc + (cost_array[0] <= cost_array[1] ? -hstep : hstep);
- tr = br + (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv = {tr, tc};
@@ -736,7 +760,7 @@
}
if (iters_per_step > 1 && best_idx != -1)
- SECOND_LEVEL_CHECKS;
+ SECOND_LEVEL_CHECKS_BEST;
tr = br;
tc = bc;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index da80a99..0e12d8c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2488,9 +2488,8 @@
}
// We don't include the cost of the second reference here, because there
- // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
- // words if you present them in that order, the second one is always known
- // if the first is known.
+ // are only two options: Last/ARF or Golden/ARF; The second one is always
+ // known, which is ARF.
//
// Under some circumstances we discount the cost of new mv mode to encourage
// initiation of a motion field.
diff --git a/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
deleted file mode 100644
index 455d10d..0000000
--- a/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm
+++ /dev/null
@@ -1,216 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-%include "vpx_ports/x86_abi_support.asm"
-
-; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
-%macro TABULATE_SSIM 0
- paddusw xmm15, xmm3 ; sum_s
- paddusw xmm14, xmm4 ; sum_r
- movdqa xmm1, xmm3
- pmaddwd xmm1, xmm1
- paddd xmm13, xmm1 ; sum_sq_s
- movdqa xmm2, xmm4
- pmaddwd xmm2, xmm2
- paddd xmm12, xmm2 ; sum_sq_r
- pmaddwd xmm3, xmm4
- paddd xmm11, xmm3 ; sum_sxr
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_Q 1
- movdqa xmm2,%1
- punpckldq %1,xmm0
- punpckhdq xmm2,xmm0
- paddq %1,xmm2
- movdqa xmm2,%1
- punpcklqdq %1,xmm0
- punpckhqdq xmm2,xmm0
- paddq %1,xmm2
-%endmacro
-
-; Sum across the register %1 starting with q words
-%macro SUM_ACROSS_W 1
- movdqa xmm1, %1
- punpcklwd %1,xmm0
- punpckhwd xmm1,xmm0
- paddd %1, xmm1
- SUM_ACROSS_Q %1
-%endmacro
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(vp9_ssim_parms_16x16_sse2) PRIVATE
-sym(vp9_ssim_parms_16x16_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 16 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movdqu xmm5, [rsi]
- movdqu xmm6, [rdi]
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpckhbw xmm3, xmm0 ; high_s
- punpckhbw xmm4, xmm0 ; high_r
-
- TABULATE_SSIM
-
- movdqa xmm3, xmm5
- movdqa xmm4, xmm6
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void ssim_parms_sse2(
-; unsigned char *s,
-; int sp,
-; unsigned char *r,
-; int rp
-; unsigned long *sum_s,
-; unsigned long *sum_r,
-; unsigned long *sum_sq_s,
-; unsigned long *sum_sq_r,
-; unsigned long *sum_sxr);
-;
-; TODO: Use parm passing through structure, probably don't need the pxors
-; ( calling app will initialize to 0 ) could easily fit everything in sse2
-; without too much hastle, and can probably do better estimates with psadw
-; or pavgb At this point this is just meant to be first pass for calculating
-; all the parms needed for 16x16 ssim so we can play with dssim as distortion
-; in mode selection code.
-global sym(vp9_ssim_parms_8x8_sse2) PRIVATE
-sym(vp9_ssim_parms_8x8_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- SAVE_XMM 15
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;s
- mov rcx, arg(1) ;sp
- mov rdi, arg(2) ;r
- mov rax, arg(3) ;rp
-
- pxor xmm0, xmm0
- pxor xmm15,xmm15 ;sum_s
- pxor xmm14,xmm14 ;sum_r
- pxor xmm13,xmm13 ;sum_sq_s
- pxor xmm12,xmm12 ;sum_sq_r
- pxor xmm11,xmm11 ;sum_sxr
-
- mov rdx, 8 ;row counter
-.NextRow:
-
- ;grab source and reference pixels
- movq xmm3, [rsi]
- movq xmm4, [rdi]
- punpcklbw xmm3, xmm0 ; low_s
- punpcklbw xmm4, xmm0 ; low_r
-
- TABULATE_SSIM
-
- add rsi, rcx ; next s row
- add rdi, rax ; next r row
-
- dec rdx ; counter
- jnz .NextRow
-
- SUM_ACROSS_W xmm15
- SUM_ACROSS_W xmm14
- SUM_ACROSS_Q xmm13
- SUM_ACROSS_Q xmm12
- SUM_ACROSS_Q xmm11
-
- mov rdi,arg(4)
- movd [rdi], xmm15;
- mov rdi,arg(5)
- movd [rdi], xmm14;
- mov rdi,arg(6)
- movd [rdi], xmm13;
- mov rdi,arg(7)
- movd [rdi], xmm12;
- mov rdi,arg(8)
- movd [rdi], xmm11;
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 186ce11..84b12d7 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -33,7 +33,6 @@
VP9_CX_SRCS-yes += encoder/vp9_ethread.h
VP9_CX_SRCS-yes += encoder/vp9_ethread.c
VP9_CX_SRCS-yes += encoder/vp9_extend.c
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_fastssim.c
VP9_CX_SRCS-yes += encoder/vp9_firstpass.c
VP9_CX_SRCS-yes += encoder/vp9_block.h
VP9_CX_SRCS-yes += encoder/vp9_bitstream.h
@@ -57,7 +56,6 @@
VP9_CX_SRCS-yes += encoder/vp9_encoder.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.c
VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_psnrhvs.c
VP9_CX_SRCS-yes += encoder/vp9_quantize.c
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rd.c
@@ -72,8 +70,6 @@
VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
VP9_CX_SRCS-yes += encoder/vp9_resize.c
VP9_CX_SRCS-yes += encoder/vp9_resize.h
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c
-VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
@@ -113,7 +109,6 @@
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3_x86_64.asm
endif
endif
-VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
diff --git a/vp9/encoder/vp9_fastssim.c b/vpx_dsp/fastssim.c
similarity index 98%
rename from vp9/encoder/vp9_fastssim.c
rename to vpx_dsp/fastssim.c
index f1d408c..25f01e5 100644
--- a/vp9/encoder/vp9_fastssim.c
+++ b/vpx_dsp/fastssim.c
@@ -11,10 +11,11 @@
* project.
*/
#include <math.h>
+#include <stdlib.h>
#include <string.h>
#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ssim.h"
/* TODO(jbb): High bit depth version of this code needed */
typedef struct fs_level fs_level;
typedef struct fs_ctx fs_ctx;
@@ -443,10 +444,10 @@
return 10 * (log10(_weight) - log10(_weight - _ssim));
}
-double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v) {
double ssimv;
- vp9_clear_system_state();
+ vpx_clear_system_state();
*ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,
dest->y_stride, source->y_crop_width,
diff --git a/vp9/encoder/vp9_psnrhvs.c b/vpx_dsp/psnrhvs.c
similarity index 97%
rename from vp9/encoder/vp9_psnrhvs.c
rename to vpx_dsp/psnrhvs.c
index 5104b9a..8aa30f2 100644
--- a/vp9/encoder/vp9_psnrhvs.c
+++ b/vpx_dsp/psnrhvs.c
@@ -15,9 +15,8 @@
#include <math.h>
#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
-#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_dsp/ssim.h"
#if !defined(M_PI)
# define M_PI (3.141592653589793238462643)
@@ -201,12 +200,12 @@
ret /= pixels;
return ret;
}
-double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs) {
double psnrhvs;
double par = 1.0;
int step = 7;
- vp9_clear_system_state();
+ vpx_clear_system_state();
*y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,
dest->y_stride, par, source->y_crop_width,
source->y_crop_height, step, csf_y);
diff --git a/vp9/encoder/vp9_ssim.c b/vpx_dsp/ssim.c
similarity index 91%
rename from vp9/encoder/vp9_ssim.c
rename to vpx_dsp/ssim.c
index 172de5d..991906f 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vpx_dsp/ssim.c
@@ -9,11 +9,11 @@
*/
#include <math.h>
-#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ssim.h"
#include "vpx_ports/mem.h"
-#include "vp9/encoder/vp9_ssim.h"
-void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
+void vpx_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
int rp, unsigned long *sum_s, unsigned long *sum_r,
unsigned long *sum_sq_s, unsigned long *sum_sq_r,
unsigned long *sum_sxr) {
@@ -28,7 +28,7 @@
}
}
}
-void vp9_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
+void vpx_ssim_parms_8x8_c(uint8_t *s, int sp, uint8_t *r, int rp,
unsigned long *sum_s, unsigned long *sum_r,
unsigned long *sum_sq_s, unsigned long *sum_sq_r,
unsigned long *sum_sxr) {
@@ -45,7 +45,7 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,
+void vpx_highbd_ssim_parms_8x8_c(uint16_t *s, int sp, uint16_t *r, int rp,
uint32_t *sum_s, uint32_t *sum_r,
uint32_t *sum_sq_s, uint32_t *sum_sq_r,
uint32_t *sum_sxr) {
@@ -87,7 +87,7 @@
static double ssim_8x8(uint8_t *s, int sp, uint8_t *r, int rp) {
unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
- vp9_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
&sum_sxr);
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
}
@@ -97,7 +97,7 @@
unsigned int bd) {
uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
const int oshift = bd - 8;
- vp9_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
&sum_sxr);
return similarity(sum_s >> oshift,
sum_r >> oshift,
@@ -111,7 +111,7 @@
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
-double vp9_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
+double vpx_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
int stride_img2, int width, int height) {
int i, j;
int samples = 0;
@@ -131,7 +131,7 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
+double vpx_highbd_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
int stride_img2, int width, int height,
unsigned int bd) {
int i, j;
@@ -154,20 +154,20 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *weight) {
double a, b, c;
double ssimv;
- a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+ a = vpx_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
source->y_crop_width, source->y_crop_height);
- b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+ b = vpx_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height);
- c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+ c = vpx_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height);
@@ -178,20 +178,20 @@
return ssimv;
}
-double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v) {
double ssim_all = 0;
double a, b, c;
- a = vp9_ssim2(source->y_buffer, dest->y_buffer,
+ a = vpx_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
source->y_crop_width, source->y_crop_height);
- b = vp9_ssim2(source->u_buffer, dest->u_buffer,
+ b = vpx_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height);
- c = vp9_ssim2(source->v_buffer, dest->v_buffer,
+ c = vpx_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height);
*ssim_y = a;
@@ -280,12 +280,12 @@
}
void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch,
Ssimv *sv) {
- vp9_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
+ vpx_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
&sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r,
&sv->sum_sxr);
}
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
uint8_t *img2, int img2_pitch,
int width, int height,
Ssimv *sv2, Metrics *m,
@@ -298,7 +298,7 @@
int c = 0;
double norm;
double old_ssim_total = 0;
- vp9_clear_system_state();
+ vpx_clear_system_state();
// We can sample points as frequently as we like start with 1 per 4x4.
for (i = 0; i < height; i += 4,
img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
@@ -448,21 +448,21 @@
#if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
double *weight, unsigned int bd) {
double a, b, c;
double ssimv;
- a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
+ a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
source->y_crop_width, source->y_crop_height, bd);
- b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
+ b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height, bd);
- c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
+ c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height, bd);
@@ -473,21 +473,21 @@
return ssimv;
}
-double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest, double *ssim_y,
double *ssim_u, double *ssim_v, unsigned int bd) {
double ssim_all = 0;
double a, b, c;
- a = vp9_highbd_ssim2(source->y_buffer, dest->y_buffer,
+ a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,
source->y_stride, dest->y_stride,
source->y_crop_width, source->y_crop_height, bd);
- b = vp9_highbd_ssim2(source->u_buffer, dest->u_buffer,
+ b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height, bd);
- c = vp9_highbd_ssim2(source->v_buffer, dest->v_buffer,
+ c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,
source->uv_stride, dest->uv_stride,
source->uv_crop_width, source->uv_crop_height, bd);
*ssim_y = a;
diff --git a/vp9/encoder/vp9_ssim.h b/vpx_dsp/ssim.h
similarity index 74%
rename from vp9/encoder/vp9_ssim.h
rename to vpx_dsp/ssim.h
index 10f14c4..b1579f7 100644
--- a/vp9/encoder/vp9_ssim.h
+++ b/vpx_dsp/ssim.h
@@ -8,15 +8,24 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SSIM_H_
-#define VP9_ENCODER_VP9_SSIM_H_
+#ifndef VPX_ENCODER_VP9_SSIM_H_
+#define VPX_ENCODER_VP9_SSIM_H_
#ifdef __cplusplus
extern "C" {
#endif
+#include "./vpx_config.h"
#include "vpx_scale/yv12config.h"
+// TODO(aconverse): Unify vp8/vp9_clear_system_state
+#if ARCH_X86 || ARCH_X86_64
+void vpx_reset_mmx_state(void);
+#define vpx_clear_system_state() vpx_reset_mmx_state()
+#else
+#define vpx_clear_system_state()
+#endif
+
// metrics used for calculating ssim, ssim2, dssim, and ssimc
typedef struct {
// source sum ( over 8x8 region )
@@ -59,29 +68,29 @@
double ssimcd;
} Metrics;
-double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
int img2_pitch, int width, int height, Ssimv *sv2,
Metrics *m, int do_inconsistency);
-double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *weight);
-double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
-double vp9_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_calc_fastssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
-double vp9_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
+double vpx_psnrhvs(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
double *ssim_y, double *ssim_u, double *ssim_v);
#if CONFIG_VP9_HIGHBITDEPTH
-double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
double *weight,
unsigned int bd);
-double vp9_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
+double vpx_highbd_calc_ssimg(YV12_BUFFER_CONFIG *source,
YV12_BUFFER_CONFIG *dest,
double *ssim_y,
double *ssim_u,
@@ -93,4 +102,4 @@
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SSIM_H_
+#endif // VPX_ENCODER_VP9_SSIM_H_
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk
index 468e4c3..5d4ec3e 100644
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -22,6 +22,10 @@
DSP_SRCS-yes += bitwriter.c
DSP_SRCS-yes += bitwriter_buffer.c
DSP_SRCS-yes += bitwriter_buffer.h
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c
+DSP_SRCS-$(CONFIG_INTERNAL_STATS) += fastssim.c
endif
ifeq ($(CONFIG_DECODERS),yes)
@@ -295,6 +299,10 @@
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c
+ifeq ($(ARCH_X86_64),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
+endif # ARCH_X86_64
+
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
endif # CONFIG_USE_X86INC
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 7326adf..3260227 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -990,6 +990,17 @@
add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_sad4x4x4d msa/, "$sse_x86inc";
+#
+# Structured Similarity (SSIM)
+#
+if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ add_proto qw/void vpx_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
+
+ add_proto qw/void vpx_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
+ specialize qw/vpx_ssim_parms_16x16/, "$sse2_x86_64";
+}
+
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Block subtraction
@@ -1176,6 +1187,13 @@
add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
specialize qw/vpx_highbd_sad4x4x4d/, "$sse2_x86inc";
+ #
+ # Structured Similarity (SSIM)
+ #
+ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
+ add_proto qw/void vpx_highbd_ssim_parms_8x8/, "uint16_t *s, int sp, uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
+ specialize qw/vpx_highbd_ssim_parms_8x8/;
+ }
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_ENCODERS
diff --git a/vp8/encoder/x86/ssim_opt_x86_64.asm b/vpx_dsp/x86/ssim_opt_x86_64.asm
similarity index 97%
rename from vp8/encoder/x86/ssim_opt_x86_64.asm
rename to vpx_dsp/x86/ssim_opt_x86_64.asm
index 5964a85..5d05d4f 100644
--- a/vp8/encoder/x86/ssim_opt_x86_64.asm
+++ b/vpx_dsp/x86/ssim_opt_x86_64.asm
@@ -61,8 +61,8 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_16x16_sse2) PRIVATE
-sym(vp8_ssim_parms_16x16_sse2):
+global sym(vpx_ssim_parms_16x16_sse2) PRIVATE
+sym(vpx_ssim_parms_16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
@@ -151,8 +151,8 @@
; or pavgb At this point this is just meant to be first pass for calculating
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
; in mode selection code.
-global sym(vp8_ssim_parms_8x8_sse2) PRIVATE
-sym(vp8_ssim_parms_8x8_sse2):
+global sym(vpx_ssim_parms_8x8_sse2) PRIVATE
+sym(vpx_ssim_parms_8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 9
diff --git a/vpx_ports/msvc.h b/vpx_ports/msvc.h
index 43a36e7..cab7740 100644
--- a/vpx_ports/msvc.h
+++ b/vpx_ports/msvc.h
@@ -18,5 +18,15 @@
# define snprintf _snprintf
# endif // _MSC_VER < 1900
+#if _MSC_VER < 1800 // VS2013 provides round
+#include <math.h>
+static INLINE double round(double x) {
+ if (x < 0)
+ return ceil(x - 0.5);
+ else
+ return floor(x + 0.5);
+}
+#endif // _MSC_VER < 1800
+
#endif // _MSC_VER
#endif // VPX_PORTS_MSVC_H_
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 0fef6a5..5da346e 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -136,7 +136,7 @@
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
#endif
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && _MSC_VER >= 1700
#include <windows.h>
#if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP)
#define getenv(x) NULL